Files changed (1) hide show
  1. front/src/utils/utils.ts +528 -509
front/src/utils/utils.ts CHANGED
@@ -1,509 +1,528 @@
1
- // @ts-expect-error this package does not have typing
2
- import TextLineStream from 'textlinestream';
3
- import { Client } from '@gradio/client';
4
- import * as lamejs from '@breezystack/lamejs';
5
-
6
- // ponyfill for missing ReadableStream asyncIterator on Safari
7
- import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
8
- import { CONFIG } from '../config';
9
- import { uploadFiles } from '@huggingface/hub';
10
-
11
- export const isDev: boolean = import.meta.env.MODE === 'development';
12
- export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
13
- export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
14
-
15
- // return URL to the WAV file
16
- export const generateAudio = async (
17
- content: string,
18
- voice: string,
19
- speed: number = 1.1
20
- ): Promise<string> => {
21
- const maxRetries = 3;
22
- for (let i = 0; i < maxRetries; i++) {
23
- try {
24
- const client = await Client.connect(CONFIG.ttsSpaceId);
25
- const result = await client.predict('/tts', {
26
- text: content,
27
- voice,
28
- speed,
29
- });
30
-
31
- console.log(result.data);
32
- return (result.data as any)[0].url;
33
- } catch (e) {
34
- if (i === maxRetries - 1) {
35
- throw e; // last retry, throw error
36
- }
37
- console.error('Failed to generate audio, retrying...', e);
38
- }
39
- continue;
40
- }
41
- return ''; // should never reach here
42
- };
43
-
44
- export const pickRand = <T>(arr: T[]): T => {
45
- return arr[Math.floor(Math.random() * arr.length)];
46
- };
47
-
48
- // wrapper for SSE
49
- export async function* getSSEStreamAsync(fetchResponse: Response) {
50
- if (!fetchResponse.body) throw new Error('Response body is empty');
51
- const lines: ReadableStream<string> = fetchResponse.body
52
- .pipeThrough(new TextDecoderStream())
53
- .pipeThrough(new TextLineStream());
54
- // @ts-expect-error asyncIterator complains about type, but it should work
55
- for await (const line of asyncIterator(lines)) {
56
- //if (isDev) console.log({ line });
57
- if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
58
- const data = JSON.parse(line.slice(5));
59
- yield data;
60
- } else if (line.startsWith('error:')) {
61
- const data = JSON.parse(line.slice(6));
62
- throw new Error(data.message || 'Unknown error');
63
- }
64
- }
65
- }
66
-
67
- export const uploadFileToHub = async (
68
- buf: ArrayBuffer,
69
- filename: string,
70
- repoId: string,
71
- hfToken: string
72
- ) => {
73
- await uploadFiles({
74
- accessToken: hfToken,
75
- repo: repoId,
76
- files: [
77
- {
78
- path: filename,
79
- content: new Blob([buf], { type: 'audio/wav' }),
80
- },
81
- ],
82
- });
83
- };
84
-
85
- /**
86
- * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
87
- */
88
-
89
- ////////////////////////////////////////
90
- // Audio manipulation utils
91
-
92
- export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
93
- const threshold = 0.01; // Amplitude below which a sample is considered silent.
94
- const numChannels = audioBuffer.numberOfChannels;
95
- const totalSamples = audioBuffer.length;
96
-
97
- // Helper function to check if a sample at the given index is silent in all channels.
98
- const isSilent = (index: number): boolean => {
99
- for (let channel = 0; channel < numChannels; channel++) {
100
- const channelData = audioBuffer.getChannelData(channel);
101
- if (Math.abs(channelData[index]) > threshold) {
102
- return false;
103
- }
104
- }
105
- return true;
106
- };
107
-
108
- // Find the first non-silent sample.
109
- let startSample = 0;
110
- while (startSample < totalSamples && isSilent(startSample)) {
111
- startSample++;
112
- }
113
-
114
- // Find the last non-silent sample.
115
- let endSample = totalSamples - 1;
116
- while (endSample >= startSample && isSilent(endSample)) {
117
- endSample--;
118
- }
119
-
120
- // If no non-silent samples were found, return an empty AudioBuffer.
121
- if (startSample >= totalSamples || endSample < startSample) {
122
- return new AudioBuffer({
123
- length: 1,
124
- numberOfChannels: numChannels,
125
- sampleRate: audioBuffer.sampleRate,
126
- });
127
- }
128
-
129
- const newLength = endSample - startSample + 1;
130
- const newBuffer = new AudioBuffer({
131
- length: newLength,
132
- numberOfChannels: numChannels,
133
- sampleRate: audioBuffer.sampleRate,
134
- });
135
-
136
- // Copy the trimmed audio samples from the original buffer to the new buffer.
137
- for (let channel = 0; channel < numChannels; channel++) {
138
- const oldData = audioBuffer.getChannelData(channel);
139
- const newData = newBuffer.getChannelData(channel);
140
- for (let i = 0; i < newLength; i++) {
141
- newData[i] = oldData[startSample + i];
142
- }
143
- }
144
-
145
- return newBuffer;
146
- };
147
-
148
- export const joinAudio = (
149
- audio1: AudioBuffer,
150
- audio2: AudioBuffer,
151
- gapMilisecs: number,
152
- overlap: 'none' | 'cross-fade' = 'none'
153
- ): AudioBuffer => {
154
- const sampleRate = audio1.sampleRate;
155
- const numChannels = audio1.numberOfChannels;
156
-
157
- // Ensure both audio buffers are compatible.
158
- if (audio2.sampleRate !== sampleRate) {
159
- throw new Error('Audio buffers must have the same sample rate');
160
- }
161
- if (audio2.numberOfChannels !== numChannels) {
162
- throw new Error('Audio buffers must have the same number of channels');
163
- }
164
-
165
- const gapSeconds = gapMilisecs / 1000;
166
- let newLength: number;
167
-
168
- if (gapSeconds > 0) {
169
- // Pad with silence: gapSamples of silence in between.
170
- const gapSamples = Math.round(gapSeconds * sampleRate);
171
- newLength = audio1.length + gapSamples + audio2.length;
172
- } else if (gapSeconds === 0) {
173
- // Simply join one after the other.
174
- newLength = audio1.length + audio2.length;
175
- } else {
176
- // gapSeconds < 0 means we blend (overlap) the end of audio1 with the beginning of audio2.
177
- const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
178
- // Ensure we don't overlap more than available in either buffer.
179
- const effectiveOverlap = Math.min(
180
- overlapSamplesRequested,
181
- audio1.length,
182
- audio2.length
183
- );
184
- newLength = audio1.length + audio2.length - effectiveOverlap;
185
- }
186
-
187
- // Create a new AudioBuffer for the joined result.
188
- const newBuffer = new AudioBuffer({
189
- length: newLength,
190
- numberOfChannels: numChannels,
191
- sampleRate: sampleRate,
192
- });
193
-
194
- // Process each channel.
195
- for (let channel = 0; channel < numChannels; channel++) {
196
- const outputData = newBuffer.getChannelData(channel);
197
- const data1 = audio1.getChannelData(channel);
198
- const data2 = audio2.getChannelData(channel);
199
- let offset = 0;
200
-
201
- if (gapSeconds < 0) {
202
- // Blend the join section.
203
- const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
204
- const effectiveOverlap = Math.min(
205
- overlapSamplesRequested,
206
- audio1.length,
207
- audio2.length
208
- );
209
-
210
- // Copy audio1 data up to the start of the overlapping section.
211
- const nonOverlapLength = audio1.length - effectiveOverlap;
212
- outputData.set(data1.subarray(0, nonOverlapLength), offset);
213
- offset += nonOverlapLength;
214
-
215
- // Blend overlapping region.
216
- if (overlap === 'cross-fade') {
217
- for (let i = 0; i < effectiveOverlap; i++) {
218
- // Linear crossfade:
219
- const fadeOut = 1 - i / effectiveOverlap;
220
- const fadeIn = i / effectiveOverlap;
221
- outputData[offset + i] =
222
- data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
223
- }
224
- } else {
225
- for (let i = 0; i < effectiveOverlap; i++) {
226
- outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
227
- }
228
- }
229
- offset += effectiveOverlap;
230
-
231
- // Append remaining audio2 data.
232
- outputData.set(data2.subarray(effectiveOverlap), offset);
233
- } else if (gapSeconds === 0) {
234
- // Directly concatenate: copy audio1 then audio2.
235
- outputData.set(data1, offset);
236
- offset += audio1.length;
237
- outputData.set(data2, offset);
238
- } else {
239
- // gapSeconds > 0: insert silence between audio1 and audio2.
240
- const gapSamples = Math.round(gapSeconds * sampleRate);
241
- outputData.set(data1, offset);
242
- offset += audio1.length;
243
-
244
- // Silence: the buffer is initialized with zeros, so we simply move the offset.
245
- offset += gapSamples;
246
-
247
- outputData.set(data2, offset);
248
- }
249
- }
250
-
251
- return newBuffer;
252
- };
253
-
254
- export const addNoise = (
255
- audioBuffer: AudioBuffer,
256
- magnitude: number
257
- ): AudioBuffer => {
258
- const { numberOfChannels, sampleRate, length } = audioBuffer;
259
- const newBuffer = new AudioBuffer({
260
- length,
261
- numberOfChannels,
262
- sampleRate,
263
- });
264
-
265
- for (let channel = 0; channel < numberOfChannels; channel++) {
266
- const inputData = audioBuffer.getChannelData(channel);
267
- const outputData = newBuffer.getChannelData(channel);
268
-
269
- for (let i = 0; i < length; i++) {
270
- // Generate white noise in the range [-magnitude, +magnitude].
271
- const noise = (Math.random() * 2 - 1) * magnitude;
272
- outputData[i] = inputData[i] + noise;
273
- }
274
- }
275
-
276
- return newBuffer;
277
- };
278
-
279
- export const addSilence = (
280
- audioBuffer: AudioBuffer,
281
- toBeginning: boolean,
282
- durationMilisecs: number
283
- ): AudioBuffer => {
284
- // Convert duration from milliseconds to samples.
285
- const sampleRate = audioBuffer.sampleRate;
286
- const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate);
287
- const numChannels = audioBuffer.numberOfChannels;
288
- const originalLength = audioBuffer.length;
289
- const newLength = originalLength + silenceSamples;
290
-
291
- // Create a new AudioBuffer with extra space for the silence.
292
- const newBuffer = new AudioBuffer({
293
- length: newLength,
294
- numberOfChannels: numChannels,
295
- sampleRate: sampleRate,
296
- });
297
-
298
- // Process each channel: copy original audio into the correct position.
299
- for (let channel = 0; channel < numChannels; channel++) {
300
- const originalData = audioBuffer.getChannelData(channel);
301
- const newData = newBuffer.getChannelData(channel);
302
-
303
- if (toBeginning) {
304
- // Leave the first `silenceSamples` as zeros, then copy the original data.
305
- newData.set(originalData, silenceSamples);
306
- } else {
307
- // Copy the original data first; the remaining samples are already zeros.
308
- newData.set(originalData, 0);
309
- }
310
- }
311
-
312
- return newBuffer;
313
- };
314
-
315
- ////////////////////////////////////////
316
- // Audio formatting utils
317
-
318
- export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => {
319
- const response = await fetch(url);
320
- const arrayBuffer = await response.arrayBuffer();
321
- // @ts-expect-error this is fine
322
- const AudioContext = window.AudioContext || window.webkitAudioContext;
323
- if (!AudioContext) {
324
- throw new Error('AudioContext is not supported on this browser');
325
- }
326
- const audioCtx = new AudioContext();
327
- let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
328
- // force mono
329
- if (audioBuffer.numberOfChannels > 1) {
330
- const monoBuffer = new AudioContext().createBuffer(
331
- 1,
332
- audioBuffer.length,
333
- audioBuffer.sampleRate
334
- );
335
- const monoData = monoBuffer.getChannelData(0);
336
- for (let i = 0; i < audioBuffer.length; i++) {
337
- let sum = 0;
338
- for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
339
- sum += audioBuffer.getChannelData(channel)[i];
340
- }
341
- monoData[i] = sum / audioBuffer.numberOfChannels;
342
- }
343
- audioBuffer = monoBuffer;
344
- }
345
- return audioBuffer;
346
- };
347
-
348
- export function audioBufferToWav(
349
- buffer: AudioBuffer,
350
- options: { float32?: boolean } = {}
351
- ): ArrayBuffer {
352
- const numChannels = buffer.numberOfChannels;
353
- const sampleRate = buffer.sampleRate;
354
- const format = options.float32 ? 3 : 1; // 3 = IEEE float, 1 = PCM
355
- const bitDepth = options.float32 ? 32 : 16;
356
-
357
- const numSamples = buffer.length;
358
- const headerLength = 44;
359
- const bytesPerSample = bitDepth / 8;
360
- const dataLength = numSamples * numChannels * bytesPerSample;
361
- const bufferLength = headerLength + dataLength;
362
-
363
- const arrayBuffer = new ArrayBuffer(bufferLength);
364
- const view = new DataView(arrayBuffer);
365
- let offset = 0;
366
-
367
- function writeString(str: string) {
368
- for (let i = 0; i < str.length; i++) {
369
- view.setUint8(offset, str.charCodeAt(i));
370
- offset++;
371
- }
372
- }
373
-
374
- // Write WAV header
375
- writeString('RIFF');
376
- view.setUint32(offset, 36 + dataLength, true);
377
- offset += 4;
378
- writeString('WAVE');
379
- writeString('fmt ');
380
- view.setUint32(offset, 16, true);
381
- offset += 4;
382
- view.setUint16(offset, format, true);
383
- offset += 2;
384
- view.setUint16(offset, numChannels, true);
385
- offset += 2;
386
- view.setUint32(offset, sampleRate, true);
387
- offset += 4;
388
- view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true);
389
- offset += 4;
390
- view.setUint16(offset, numChannels * bytesPerSample, true);
391
- offset += 2;
392
- view.setUint16(offset, bitDepth, true);
393
- offset += 2;
394
- writeString('data');
395
- view.setUint32(offset, dataLength, true);
396
- offset += 4;
397
-
398
- // Write PCM samples: interleave channels
399
- const channels: Float32Array[] = [];
400
- for (let i = 0; i < numChannels; i++) {
401
- channels.push(buffer.getChannelData(i));
402
- }
403
-
404
- for (let i = 0; i < numSamples; i++) {
405
- for (let channel = 0; channel < numChannels; channel++) {
406
- let sample = channels[channel][i];
407
- // Clamp the sample to [-1, 1]
408
- sample = Math.max(-1, Math.min(1, sample));
409
- if (options.float32) {
410
- view.setFloat32(offset, sample, true);
411
- offset += 4;
412
- } else {
413
- // Convert to 16-bit PCM sample
414
- const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
415
- view.setInt16(offset, intSample, true);
416
- offset += 2;
417
- }
418
- }
419
- }
420
-
421
- return arrayBuffer;
422
- }
423
-
424
- export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
425
- // Using 16-bit PCM for compatibility.
426
- const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
427
- return new Blob([wavArrayBuffer], { type: 'audio/wav' });
428
- };
429
-
430
- export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
431
- const numChannels = buffer.numberOfChannels;
432
- const sampleRate = buffer.sampleRate;
433
- const bitRate = 128; // kbps - adjust as desired
434
-
435
- // Initialize MP3 encoder.
436
- // Note: If more than 2 channels are present, only the first 2 channels will be used.
437
- const mp3encoder = new lamejs.Mp3Encoder(
438
- numChannels >= 2 ? 2 : 1,
439
- sampleRate,
440
- bitRate
441
- );
442
-
443
- const samples = buffer.length;
444
- const chunkSize = 1152; // Frame size for MP3 encoding
445
-
446
- // Prepare channel data.
447
- const channels: Float32Array[] = [];
448
- for (let ch = 0; ch < numChannels; ch++) {
449
- channels.push(buffer.getChannelData(ch));
450
- }
451
-
452
- const mp3Data: Uint8Array[] = [];
453
-
454
- // For mono audio, encode directly.
455
- if (numChannels === 1) {
456
- for (let i = 0; i < samples; i += chunkSize) {
457
- const sampleChunk = channels[0].subarray(i, i + chunkSize);
458
- const int16Buffer = floatTo16BitPCM(sampleChunk);
459
- const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
460
- if (mp3buf.length > 0) {
461
- mp3Data.push(new Uint8Array(mp3buf));
462
- }
463
- }
464
- } else {
465
- // For stereo (or more channels, use first two channels).
466
- const left = channels[0];
467
- const right = channels[1];
468
- for (let i = 0; i < samples; i += chunkSize) {
469
- const leftChunk = left.subarray(i, i + chunkSize);
470
- const rightChunk = right.subarray(i, i + chunkSize);
471
- const leftInt16 = floatTo16BitPCM(leftChunk);
472
- const rightInt16 = floatTo16BitPCM(rightChunk);
473
- const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
474
- if (mp3buf.length > 0) {
475
- mp3Data.push(new Uint8Array(mp3buf));
476
- }
477
- }
478
- }
479
-
480
- // Flush the encoder to get any remaining MP3 data.
481
- const endBuf = mp3encoder.flush();
482
- if (endBuf.length > 0) {
483
- mp3Data.push(new Uint8Array(endBuf));
484
- }
485
-
486
- // Concatenate all MP3 chunks into a single ArrayBuffer.
487
- const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
488
- const result = new Uint8Array(totalLength);
489
- let offset = 0;
490
- for (const chunk of mp3Data) {
491
- result.set(chunk, offset);
492
- offset += chunk.length;
493
- }
494
-
495
- return result.buffer;
496
- }
497
-
498
- /**
499
- * Helper function that converts a Float32Array of PCM samples (range -1..1)
500
- * into an Int16Array (range -32768..32767).
501
- */
502
- function floatTo16BitPCM(input: Float32Array): Int16Array {
503
- const output = new Int16Array(input.length);
504
- for (let i = 0; i < input.length; i++) {
505
- const s = Math.max(-1, Math.min(1, input[i]));
506
- output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
507
- }
508
- return output;
509
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // @ts-expect-error this package does not have typing
2
+ import TextLineStream from 'textlinestream';
3
+ import { Client } from '@gradio/client';
4
+ import * as lamejs from '@breezystack/lamejs';
5
+
6
+ // ponyfill for missing ReadableStream asyncIterator on Safari
7
+ import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
8
+ import { CONFIG } from '../config';
9
+ import { uploadFiles } from '@huggingface/hub';
10
+ import { denoiseBuffer } from 'audio-denoiser'; // Import the denoiseBuffer function
11
+
12
+
13
+ export const isDev: boolean = import.meta.env.MODE === 'development';
14
+ export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
15
+ export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
16
+
17
+ // return URL to the WAV file
18
+ export const generateAudio = async (
19
+ content: string,
20
+ voice: string,
21
+ speed: number = 1.1
22
+ ): Promise<string> => {
23
+ const maxRetries = 3;
24
+ for (let i = 0; i < maxRetries; i++) {
25
+ try {
26
+ const client = await Client.connect(CONFIG.ttsSpaceId);
27
+ const result = await client.predict('/tts', {
28
+ text: content,
29
+ voice,
30
+ speed,
31
+ });
32
+
33
+ console.log(result.data);
34
+ return (result.data as any)[0].url;
35
+ } catch (e) {
36
+ if (i === maxRetries - 1) {
37
+ throw e; // last retry, throw error
38
+ }
39
+ console.error('Failed to generate audio, retrying...', e);
40
+ }
41
+ continue;
42
+ }
43
+ return ''; // should never reach here
44
+ };
45
+
46
+ export const pickRand = <T>(arr: T[]): T => {
47
+ return arr[Math.floor(Math.random() * arr.length)];
48
+ };
49
+
50
+ // wrapper for SSE
51
+ export async function* getSSEStreamAsync(fetchResponse: Response) {
52
+ if (!fetchResponse.body) throw new Error('Response body is empty');
53
+ const lines: ReadableStream<string> = fetchResponse.body
54
+ .pipeThrough(new TextDecoderStream())
55
+ .pipeThrough(new TextLineStream());
56
+ // @ts-expect-error asyncIterator complains about type, but it should work
57
+ for await (const line of asyncIterator(lines)) {
58
+ //if (isDev) console.log({ line });
59
+ if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
60
+ const data = JSON.parse(line.slice(5));
61
+ yield data;
62
+ } else if (line.startsWith('error:')) {
63
+ const data = JSON.parse(line.slice(6));
64
+ throw new Error(data.message || 'Unknown error');
65
+ }
66
+ }
67
+ }
68
+
69
+ export const uploadFileToHub = async (
70
+ buf: ArrayBuffer,
71
+ filename: string,
72
+ repoId: string,
73
+ hfToken: string
74
+ ) => {
75
+ await uploadFiles({
76
+ accessToken: hfToken,
77
+ repo: repoId,
78
+ files: [
79
+ {
80
+ path: filename,
81
+ content: new Blob([buf], { type: 'audio/wav' }),
82
+ },
83
+ ],
84
+ });
85
+ };
86
+
87
+ /**
88
+ * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
89
+ */
90
+
91
+ ////////////////////////////////////////
92
+ // Audio manipulation utils
93
+
94
+ export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
95
+ const threshold = 0.01; // Amplitude below which a sample is considered silent.
96
+ const numChannels = audioBuffer.numberOfChannels;
97
+ const totalSamples = audioBuffer.length;
98
+
99
+ // Helper function to check if a sample at the given index is silent in all channels.
100
+ const isSilent = (index: number): boolean => {
101
+ for (let channel = 0; channel < numChannels; channel++) {
102
+ const channelData = audioBuffer.getChannelData(channel);
103
+ if (Math.abs(channelData[index]) > threshold) {
104
+ return false;
105
+ }
106
+ }
107
+ return true;
108
+ };
109
+
110
+ // Find the first non-silent sample.
111
+ let startSample = 0;
112
+ while (startSample < totalSamples && isSilent(startSample)) {
113
+ startSample++;
114
+ }
115
+
116
+ // Find the last non-silent sample.
117
+ let endSample = totalSamples - 1;
118
+ while (endSample >= startSample && isSilent(endSample)) {
119
+ endSample--;
120
+ }
121
+
122
+ // If no non-silent samples were found, return an empty AudioBuffer.
123
+ if (startSample >= totalSamples || endSample < startSample) {
124
+ return new AudioBuffer({
125
+ length: 1,
126
+ numberOfChannels: numChannels,
127
+ sampleRate: audioBuffer.sampleRate,
128
+ });
129
+ }
130
+
131
+ const newLength = endSample - startSample + 1;
132
+ const newBuffer = new AudioBuffer({
133
+ length: newLength,
134
+ numberOfChannels: numChannels,
135
+ sampleRate: audioBuffer.sampleRate,
136
+ });
137
+
138
+ // Copy the trimmed audio samples from the original buffer to the new buffer.
139
+ for (let channel = 0; channel < numChannels; channel++) {
140
+ const oldData = audioBuffer.getChannelData(channel);
141
+ const newData = newBuffer.getChannelData(channel);
142
+ for (let i = 0; i < newLength; i++) {
143
+ newData[i] = oldData[startSample + i];
144
+ }
145
+ }
146
+
147
+ return newBuffer;
148
+ };
149
+
150
+ export const joinAudio = (
151
+ audio1: AudioBuffer,
152
+ audio2: AudioBuffer,
153
+ gapMilisecs: number,
154
+ overlap: 'none' | 'cross-fade' = 'none'
155
+ ): AudioBuffer => {
156
+ const sampleRate = audio1.sampleRate;
157
+ const numChannels = audio1.numberOfChannels;
158
+
159
+ // Ensure both audio buffers are compatible.
160
+ if (audio2.sampleRate !== sampleRate) {
161
+ throw new Error('Audio buffers must have the same sample rate');
162
+ }
163
+ if (audio2.numberOfChannels !== numChannels) {
164
+ throw new Error('Audio buffers must have the same number of channels');
165
+ }
166
+
167
+ const gapSeconds = gapMilisecs / 1000;
168
+ let newLength: number;
169
+
170
+ if (gapSeconds > 0) {
171
+ // Pad with silence: gapSamples of silence in between.
172
+ const gapSamples = Math.round(gapSeconds * sampleRate);
173
+ newLength = audio1.length + gapSamples + audio2.length;
174
+ } else if (gapSeconds === 0) {
175
+ // Simply join one after the other.
176
+ newLength = audio1.length + audio2.length;
177
+ } else {
178
+ // gapSeconds < 0 means we blend (overlap) the end of audio1 with the beginning of audio2.
179
+ const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
180
+ // Ensure we don't overlap more than available in either buffer.
181
+ const effectiveOverlap = Math.min(
182
+ overlapSamplesRequested,
183
+ audio1.length,
184
+ audio2.length
185
+ );
186
+ newLength = audio1.length + audio2.length - effectiveOverlap;
187
+ }
188
+
189
+ // Create a new AudioBuffer for the joined result.
190
+ const newBuffer = new AudioBuffer({
191
+ length: newLength,
192
+ numberOfChannels: numChannels,
193
+ sampleRate: sampleRate,
194
+ });
195
+
196
+ // Process each channel.
197
+ for (let channel = 0; channel < numChannels; channel++) {
198
+ const outputData = newBuffer.getChannelData(channel);
199
+ const data1 = audio1.getChannelData(channel);
200
+ const data2 = audio2.getChannelData(channel);
201
+ let offset = 0;
202
+
203
+ if (gapSeconds < 0) {
204
+ // Blend the join section.
205
+ const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
206
+ const effectiveOverlap = Math.min(
207
+ overlapSamplesRequested,
208
+ audio1.length,
209
+ audio2.length
210
+ );
211
+
212
+ // Copy audio1 data up to the start of the overlapping section.
213
+ const nonOverlapLength = audio1.length - effectiveOverlap;
214
+ outputData.set(data1.subarray(0, nonOverlapLength), offset);
215
+ offset += nonOverlapLength;
216
+
217
+ // Blend overlapping region.
218
+ if (overlap === 'cross-fade') {
219
+ for (let i = 0; i < effectiveOverlap; i++) {
220
+ // Linear crossfade:
221
+ const fadeOut = 1 - i / effectiveOverlap;
222
+ const fadeIn = i / effectiveOverlap;
223
+ outputData[offset + i] =
224
+ data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
225
+ }
226
+ } else {
227
+ for (let i = 0; i < effectiveOverlap; i++) {
228
+ outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
229
+ }
230
+ }
231
+ offset += effectiveOverlap;
232
+
233
+ // Append remaining audio2 data.
234
+ outputData.set(data2.subarray(effectiveOverlap), offset);
235
+ } else if (gapSeconds === 0) {
236
+ // Directly concatenate: copy audio1 then audio2.
237
+ outputData.set(data1, offset);
238
+ offset += audio1.length;
239
+ outputData.set(data2, offset);
240
+ } else {
241
+ // gapSeconds > 0: insert silence between audio1 and audio2.
242
+ const gapSamples = Math.round(gapSeconds * sampleRate);
243
+ outputData.set(data1, offset);
244
+ offset += audio1.length;
245
+
246
+ // Silence: the buffer is initialized with zeros, so we simply move the offset.
247
+ offset += gapSamples;
248
+
249
+ outputData.set(data2, offset);
250
+ }
251
+ }
252
+
253
+ return newBuffer;
254
+ };
255
+
256
+ export const addNoise = (
257
+ audioBuffer: AudioBuffer,
258
+ magnitude: number
259
+ ): AudioBuffer => {
260
+ const { numberOfChannels, sampleRate, length } = audioBuffer;
261
+ const newBuffer = new AudioBuffer({
262
+ length,
263
+ numberOfChannels,
264
+ sampleRate,
265
+ });
266
+
267
+ for (let channel = 0; channel < numberOfChannels; channel++) {
268
+ const inputData = audioBuffer.getChannelData(channel);
269
+ const outputData = newBuffer.getChannelData(channel);
270
+
271
+ for (let i = 0; i < length; i++) {
272
+ // Generate white noise in the range [-magnitude, +magnitude].
273
+ const noise = (Math.random() * 2 - 1) * magnitude;
274
+ outputData[i] = inputData[i] + noise;
275
+ }
276
+ }
277
+
278
+ return newBuffer;
279
+ };
280
+
281
+ export const addSilence = (
282
+ audioBuffer: AudioBuffer,
283
+ toBeginning: boolean,
284
+ durationMilisecs: number
285
+ ): AudioBuffer => {
286
+ // Convert duration from milliseconds to samples.
287
+ const sampleRate = audioBuffer.sampleRate;
288
+ const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate);
289
+ const numChannels = audioBuffer.numberOfChannels;
290
+ const originalLength = audioBuffer.length;
291
+ const newLength = originalLength + silenceSamples;
292
+
293
+ // Create a new AudioBuffer with extra space for the silence.
294
+ const newBuffer = new AudioBuffer({
295
+ length: newLength,
296
+ numberOfChannels: numChannels,
297
+ sampleRate: sampleRate,
298
+ });
299
+
300
+ // Process each channel: copy original audio into the correct position.
301
+ for (let channel = 0; channel < numChannels; channel++) {
302
+ const originalData = audioBuffer.getChannelData(channel);
303
+ const newData = newBuffer.getChannelData(channel);
304
+
305
+ if (toBeginning) {
306
+ // Leave the first `silenceSamples` as zeros, then copy the original data.
307
+ newData.set(originalData, silenceSamples);
308
+ } else {
309
+ // Copy the original data first; the remaining samples are already zeros.
310
+ newData.set(originalData, 0);
311
+ }
312
+ }
313
+
314
+ return newBuffer;
315
+ };
316
+
317
+ export const denoiseAudioBuffer = async (audioBuffer: AudioBuffer): Promise<AudioBuffer> => {
318
+ try {
319
+ console.log("Denoising audio...");
320
+ const denoisedBuffer = await denoiseBuffer(audioBuffer);
321
+ if (!denoisedBuffer) {
322
+ console.warn("Denoising returned null. Returning original buffer.");
323
+ return audioBuffer;
324
+ }
325
+
326
+ return denoisedBuffer;
327
+ } catch (error) {
328
+ console.error("Error during denoising:", error);
329
+ // Return the original buffer if denoising fails
330
+ return audioBuffer;
331
+ }
332
+ };
333
+
334
+ ////////////////////////////////////////
335
+ // Audio formatting utils
336
+
337
+ export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => {
338
+ const response = await fetch(url);
339
+ const arrayBuffer = await response.arrayBuffer();
340
+ // @ts-expect-error this is fine
341
+ const AudioContext = window.AudioContext || window.webkitAudioContext;
342
+ if (!AudioContext) {
343
+ throw new Error('AudioContext is not supported on this browser');
344
+ }
345
+ const audioCtx = new AudioContext();
346
+ let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
347
+ // force mono
348
+ if (audioBuffer.numberOfChannels > 1) {
349
+ const monoBuffer = new AudioContext().createBuffer(
350
+ 1,
351
+ audioBuffer.length,
352
+ audioBuffer.sampleRate
353
+ );
354
+ const monoData = monoBuffer.getChannelData(0);
355
+ for (let i = 0; i < audioBuffer.length; i++) {
356
+ let sum = 0;
357
+ for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
358
+ sum += audioBuffer.getChannelData(channel)[i];
359
+ }
360
+ monoData[i] = sum / audioBuffer.numberOfChannels;
361
+ }
362
+ audioBuffer = monoBuffer;
363
+ }
364
+ return audioBuffer;
365
+ };
366
+
367
+ export function audioBufferToWav(
368
+ buffer: AudioBuffer,
369
+ options: { float32?: boolean } = {}
370
+ ): ArrayBuffer {
371
+ const numChannels = buffer.numberOfChannels;
372
+ const sampleRate = buffer.sampleRate;
373
+ const format = options.float32 ? 3 : 1; // 3 = IEEE float, 1 = PCM
374
+ const bitDepth = options.float32 ? 32 : 16;
375
+
376
+ const numSamples = buffer.length;
377
+ const headerLength = 44;
378
+ const bytesPerSample = bitDepth / 8;
379
+ const dataLength = numSamples * numChannels * bytesPerSample;
380
+ const bufferLength = headerLength + dataLength;
381
+
382
+ const arrayBuffer = new ArrayBuffer(bufferLength);
383
+ const view = new DataView(arrayBuffer);
384
+ let offset = 0;
385
+
386
+ function writeString(str: string) {
387
+ for (let i = 0; i < str.length; i++) {
388
+ view.setUint8(offset, str.charCodeAt(i));
389
+ offset++;
390
+ }
391
+ }
392
+
393
+ // Write WAV header
394
+ writeString('RIFF');
395
+ view.setUint32(offset, 36 + dataLength, true);
396
+ offset += 4;
397
+ writeString('WAVE');
398
+ writeString('fmt ');
399
+ view.setUint32(offset, 16, true);
400
+ offset += 4;
401
+ view.setUint16(offset, format, true);
402
+ offset += 2;
403
+ view.setUint16(offset, numChannels, true);
404
+ offset += 2;
405
+ view.setUint32(offset, sampleRate, true);
406
+ offset += 4;
407
+ view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true);
408
+ offset += 4;
409
+ view.setUint16(offset, numChannels * bytesPerSample, true);
410
+ offset += 2;
411
+ view.setUint16(offset, bitDepth, true);
412
+ offset += 2;
413
+ writeString('data');
414
+ view.setUint32(offset, dataLength, true);
415
+ offset += 4;
416
+
417
+ // Write PCM samples: interleave channels
418
+ const channels: Float32Array[] = [];
419
+ for (let i = 0; i < numChannels; i++) {
420
+ channels.push(buffer.getChannelData(i));
421
+ }
422
+
423
+ for (let i = 0; i < numSamples; i++) {
424
+ for (let channel = 0; channel < numChannels; channel++) {
425
+ let sample = channels[channel][i];
426
+ // Clamp the sample to [-1, 1]
427
+ sample = Math.max(-1, Math.min(1, sample));
428
+ if (options.float32) {
429
+ view.setFloat32(offset, sample, true);
430
+ offset += 4;
431
+ } else {
432
+ // Convert to 16-bit PCM sample
433
+ const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
434
+ view.setInt16(offset, intSample, true);
435
+ offset += 2;
436
+ }
437
+ }
438
+ }
439
+
440
+ return arrayBuffer;
441
+ }
442
+
443
+ export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
444
+ // Using 16-bit PCM for compatibility.
445
+ const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
446
+ return new Blob([wavArrayBuffer], { type: 'audio/wav' });
447
+ };
448
+
449
+ export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
450
+ const numChannels = buffer.numberOfChannels;
451
+ const sampleRate = buffer.sampleRate;
452
+ const bitRate = 128; // kbps - adjust as desired
453
+
454
+ // Initialize MP3 encoder.
455
+ // Note: If more than 2 channels are present, only the first 2 channels will be used.
456
+ const mp3encoder = new lamejs.Mp3Encoder(
457
+ numChannels >= 2 ? 2 : 1,
458
+ sampleRate,
459
+ bitRate
460
+ );
461
+
462
+ const samples = buffer.length;
463
+ const chunkSize = 1152; // Frame size for MP3 encoding
464
+
465
+ // Prepare channel data.
466
+ const channels: Float32Array[] = [];
467
+ for (let ch = 0; ch < numChannels; ch++) {
468
+ channels.push(buffer.getChannelData(ch));
469
+ }
470
+
471
+ const mp3Data: Uint8Array[] = [];
472
+
473
+ // For mono audio, encode directly.
474
+ if (numChannels === 1) {
475
+ for (let i = 0; i < samples; i += chunkSize) {
476
+ const sampleChunk = channels[0].subarray(i, i + chunkSize);
477
+ const int16Buffer = floatTo16BitPCM(sampleChunk);
478
+ const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
479
+ if (mp3buf.length > 0) {
480
+ mp3Data.push(new Uint8Array(mp3buf));
481
+ }
482
+ }
483
+ } else {
484
+ // For stereo (or more channels, use first two channels).
485
+ const left = channels[0];
486
+ const right = channels[1];
487
+ for (let i = 0; i < samples; i += chunkSize) {
488
+ const leftChunk = left.subarray(i, i + chunkSize);
489
+ const rightChunk = right.subarray(i, i + chunkSize);
490
+ const leftInt16 = floatTo16BitPCM(leftChunk);
491
+ const rightInt16 = floatTo16BitPCM(rightChunk);
492
+ const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
493
+ if (mp3buf.length > 0) {
494
+ mp3Data.push(new Uint8Array(mp3buf));
495
+ }
496
+ }
497
+ }
498
+
499
+ // Flush the encoder to get any remaining MP3 data.
500
+ const endBuf = mp3encoder.flush();
501
+ if (endBuf.length > 0) {
502
+ mp3Data.push(new Uint8Array(endBuf));
503
+ }
504
+
505
+ // Concatenate all MP3 chunks into a single ArrayBuffer.
506
+ const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
507
+ const result = new Uint8Array(totalLength);
508
+ let offset = 0;
509
+ for (const chunk of mp3Data) {
510
+ result.set(chunk, offset);
511
+ offset += chunk.length;
512
+ }
513
+
514
+ return result.buffer;
515
+ }
516
+
517
+ /**
518
+ * Helper function that converts a Float32Array of PCM samples (range -1..1)
519
+ * into an Int16Array (range -32768..32767).
520
+ */
521
+ function floatTo16BitPCM(input: Float32Array): Int16Array {
522
+ const output = new Int16Array(input.length);
523
+ for (let i = 0; i < input.length; i++) {
524
+ const s = Math.max(-1, Math.min(1, input[i]));
525
+ output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
526
+ }
527
+ return output;
528
+ }