Spaces:

ngxson
/

kokoro-podcast-generator

Running

App Files Files Community

Upload utils.ts

by AcTePuKc - opened 25 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+528

-509

Files changed (1) hide show

front/src/utils/utils.ts +528 -509

front/src/utils/utils.ts CHANGED Viewed

@@ -1,509 +1,528 @@
-// @ts-expect-error this package does not have typing
-import TextLineStream from 'textlinestream';
-import { Client } from '@gradio/client';
-import * as lamejs from '@breezystack/lamejs';
-// ponyfill for missing ReadableStream asyncIterator on Safari
-import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
-import { CONFIG } from '../config';
-import { uploadFiles } from '@huggingface/hub';
-export const isDev: boolean = import.meta.env.MODE === 'development';
-export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
-export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
-// return URL to the WAV file
-export const generateAudio = async (
-  content: string,
-  voice: string,
-  speed: number = 1.1
-): Promise<string> => {
-  const maxRetries = 3;
-  for (let i = 0; i < maxRetries; i++) {
-    try {
-      const client = await Client.connect(CONFIG.ttsSpaceId);
-      const result = await client.predict('/tts', {
-        text: content,
-        voice,
-        speed,
-      });
-      console.log(result.data);
-      return (result.data as any)[0].url;
-    } catch (e) {
-      if (i === maxRetries - 1) {
-        throw e; // last retry, throw error
-      }
-      console.error('Failed to generate audio, retrying...', e);
-    }
-    continue;
-  }
-  return ''; // should never reach here
-};
-export const pickRand = <T>(arr: T[]): T => {
-  return arr[Math.floor(Math.random() * arr.length)];
-};
-// wrapper for SSE
-export async function* getSSEStreamAsync(fetchResponse: Response) {
-  if (!fetchResponse.body) throw new Error('Response body is empty');
-  const lines: ReadableStream<string> = fetchResponse.body
-    .pipeThrough(new TextDecoderStream())
-    .pipeThrough(new TextLineStream());
-  // @ts-expect-error asyncIterator complains about type, but it should work
-  for await (const line of asyncIterator(lines)) {
-    //if (isDev) console.log({ line });
-    if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
-      const data = JSON.parse(line.slice(5));
-      yield data;
-    } else if (line.startsWith('error:')) {
-      const data = JSON.parse(line.slice(6));
-      throw new Error(data.message || 'Unknown error');
-    }
-  }
-}
-export const uploadFileToHub = async (
-  buf: ArrayBuffer,
-  filename: string,
-  repoId: string,
-  hfToken: string
-) => {
-  await uploadFiles({
-    accessToken: hfToken,
-    repo: repoId,
-    files: [
-      {
-        path: filename,
-        content: new Blob([buf], { type: 'audio/wav' }),
-      },
-    ],
-  });
-};
-/**
- * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
- */
-////////////////////////////////////////
-// Audio manipulation utils
-export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
-  const threshold = 0.01; // Amplitude below which a sample is considered silent.
-  const numChannels = audioBuffer.numberOfChannels;
-  const totalSamples = audioBuffer.length;
-  // Helper function to check if a sample at the given index is silent in all channels.
-  const isSilent = (index: number): boolean => {
-    for (let channel = 0; channel < numChannels; channel++) {
-      const channelData = audioBuffer.getChannelData(channel);
-      if (Math.abs(channelData[index]) > threshold) {
-        return false;
-      }
-    }
-    return true;
-  };
-  // Find the first non-silent sample.
-  let startSample = 0;
-  while (startSample < totalSamples && isSilent(startSample)) {
-    startSample++;
-  }
-  // Find the last non-silent sample.
-  let endSample = totalSamples - 1;
-  while (endSample >= startSample && isSilent(endSample)) {
-    endSample--;
-  }
-  // If no non-silent samples were found, return an empty AudioBuffer.
-  if (startSample >= totalSamples || endSample < startSample) {
-    return new AudioBuffer({
-      length: 1,
-      numberOfChannels: numChannels,
-      sampleRate: audioBuffer.sampleRate,
-    });
-  }
-  const newLength = endSample - startSample + 1;
-  const newBuffer = new AudioBuffer({
-    length: newLength,
-    numberOfChannels: numChannels,
-    sampleRate: audioBuffer.sampleRate,
-  });
-  // Copy the trimmed audio samples from the original buffer to the new buffer.
-  for (let channel = 0; channel < numChannels; channel++) {
-    const oldData = audioBuffer.getChannelData(channel);
-    const newData = newBuffer.getChannelData(channel);
-    for (let i = 0; i < newLength; i++) {
-      newData[i] = oldData[startSample + i];
-    }
-  }
-  return newBuffer;
-};
-export const joinAudio = (
-  audio1: AudioBuffer,
-  audio2: AudioBuffer,
-  gapMilisecs: number,
-  overlap: 'none' | 'cross-fade' = 'none'
-): AudioBuffer => {
-  const sampleRate = audio1.sampleRate;
-  const numChannels = audio1.numberOfChannels;
-  // Ensure both audio buffers are compatible.
-  if (audio2.sampleRate !== sampleRate) {
-    throw new Error('Audio buffers must have the same sample rate');
-  }
-  if (audio2.numberOfChannels !== numChannels) {
-    throw new Error('Audio buffers must have the same number of channels');
-  }
-  const gapSeconds = gapMilisecs / 1000;
-  let newLength: number;
-  if (gapSeconds > 0) {
-    // Pad with silence: gapSamples of silence in between.
-    const gapSamples = Math.round(gapSeconds * sampleRate);
-    newLength = audio1.length + gapSamples + audio2.length;
-  } else if (gapSeconds === 0) {
-    // Simply join one after the other.
-    newLength = audio1.length + audio2.length;
-  } else {
-    // gapSeconds < 0 means we blend (overlap) the end of audio1 with the beginning of audio2.
-    const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
-    // Ensure we don't overlap more than available in either buffer.
-    const effectiveOverlap = Math.min(
-      overlapSamplesRequested,
-      audio1.length,
-      audio2.length
-    );
-    newLength = audio1.length + audio2.length - effectiveOverlap;
-  }
-  // Create a new AudioBuffer for the joined result.
-  const newBuffer = new AudioBuffer({
-    length: newLength,
-    numberOfChannels: numChannels,
-    sampleRate: sampleRate,
-  });
-  // Process each channel.
-  for (let channel = 0; channel < numChannels; channel++) {
-    const outputData = newBuffer.getChannelData(channel);
-    const data1 = audio1.getChannelData(channel);
-    const data2 = audio2.getChannelData(channel);
-    let offset = 0;
-    if (gapSeconds < 0) {
-      // Blend the join section.
-      const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
-      const effectiveOverlap = Math.min(
-        overlapSamplesRequested,
-        audio1.length,
-        audio2.length
-      );
-      // Copy audio1 data up to the start of the overlapping section.
-      const nonOverlapLength = audio1.length - effectiveOverlap;
-      outputData.set(data1.subarray(0, nonOverlapLength), offset);
-      offset += nonOverlapLength;
-      // Blend overlapping region.
-      if (overlap === 'cross-fade') {
-        for (let i = 0; i < effectiveOverlap; i++) {
-          // Linear crossfade:
-          const fadeOut = 1 - i / effectiveOverlap;
-          const fadeIn = i / effectiveOverlap;
-          outputData[offset + i] =
-            data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
-        }
-      } else {
-        for (let i = 0; i < effectiveOverlap; i++) {
-          outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
-        }
-      }
-      offset += effectiveOverlap;
-      // Append remaining audio2 data.
-      outputData.set(data2.subarray(effectiveOverlap), offset);
-    } else if (gapSeconds === 0) {
-      // Directly concatenate: copy audio1 then audio2.
-      outputData.set(data1, offset);
-      offset += audio1.length;
-      outputData.set(data2, offset);
-    } else {
-      // gapSeconds > 0: insert silence between audio1 and audio2.
-      const gapSamples = Math.round(gapSeconds * sampleRate);
-      outputData.set(data1, offset);
-      offset += audio1.length;
-      // Silence: the buffer is initialized with zeros, so we simply move the offset.
-      offset += gapSamples;
-      outputData.set(data2, offset);
-    }
-  }
-  return newBuffer;
-};
-export const addNoise = (
-  audioBuffer: AudioBuffer,
-  magnitude: number
-): AudioBuffer => {
-  const { numberOfChannels, sampleRate, length } = audioBuffer;
-  const newBuffer = new AudioBuffer({
-    length,
-    numberOfChannels,
-    sampleRate,
-  });
-  for (let channel = 0; channel < numberOfChannels; channel++) {
-    const inputData = audioBuffer.getChannelData(channel);
-    const outputData = newBuffer.getChannelData(channel);
-    for (let i = 0; i < length; i++) {
-      // Generate white noise in the range [-magnitude, +magnitude].
-      const noise = (Math.random() * 2 - 1) * magnitude;
-      outputData[i] = inputData[i] + noise;
-    }
-  }
-  return newBuffer;
-};
-export const addSilence = (
-  audioBuffer: AudioBuffer,
-  toBeginning: boolean,
-  durationMilisecs: number
-): AudioBuffer => {
-  // Convert duration from milliseconds to samples.
-  const sampleRate = audioBuffer.sampleRate;
-  const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate);
-  const numChannels = audioBuffer.numberOfChannels;
-  const originalLength = audioBuffer.length;
-  const newLength = originalLength + silenceSamples;
-  // Create a new AudioBuffer with extra space for the silence.
-  const newBuffer = new AudioBuffer({
-    length: newLength,
-    numberOfChannels: numChannels,
-    sampleRate: sampleRate,
-  });
-  // Process each channel: copy original audio into the correct position.
-  for (let channel = 0; channel < numChannels; channel++) {
-    const originalData = audioBuffer.getChannelData(channel);
-    const newData = newBuffer.getChannelData(channel);
-    if (toBeginning) {
-      // Leave the first `silenceSamples` as zeros, then copy the original data.
-      newData.set(originalData, silenceSamples);
-    } else {
-      // Copy the original data first; the remaining samples are already zeros.
-      newData.set(originalData, 0);
-    }
-  }
-  return newBuffer;
-};
-////////////////////////////////////////
-// Audio formatting utils
-export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => {
-  const response = await fetch(url);
-  const arrayBuffer = await response.arrayBuffer();
-  // @ts-expect-error this is fine
-  const AudioContext = window.AudioContext || window.webkitAudioContext;
-  if (!AudioContext) {
-    throw new Error('AudioContext is not supported on this browser');
-  }
-  const audioCtx = new AudioContext();
-  let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
-  // force mono
-  if (audioBuffer.numberOfChannels > 1) {
-    const monoBuffer = new AudioContext().createBuffer(
-      1,
-      audioBuffer.length,
-      audioBuffer.sampleRate
-    );
-    const monoData = monoBuffer.getChannelData(0);
-    for (let i = 0; i < audioBuffer.length; i++) {
-      let sum = 0;
-      for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
-        sum += audioBuffer.getChannelData(channel)[i];
-      }
-      monoData[i] = sum / audioBuffer.numberOfChannels;
-    }
-    audioBuffer = monoBuffer;
-  }
-  return audioBuffer;
-};
-export function audioBufferToWav(
-  buffer: AudioBuffer,
-  options: { float32?: boolean } = {}
-): ArrayBuffer {
-  const numChannels = buffer.numberOfChannels;
-  const sampleRate = buffer.sampleRate;
-  const format = options.float32 ? 3 : 1; // 3 = IEEE float, 1 = PCM
-  const bitDepth = options.float32 ? 32 : 16;
-  const numSamples = buffer.length;
-  const headerLength = 44;
-  const bytesPerSample = bitDepth / 8;
-  const dataLength = numSamples * numChannels * bytesPerSample;
-  const bufferLength = headerLength + dataLength;
-  const arrayBuffer = new ArrayBuffer(bufferLength);
-  const view = new DataView(arrayBuffer);
-  let offset = 0;
-  function writeString(str: string) {
-    for (let i = 0; i < str.length; i++) {
-      view.setUint8(offset, str.charCodeAt(i));
-      offset++;
-    }
-  }
-  // Write WAV header
-  writeString('RIFF');
-  view.setUint32(offset, 36 + dataLength, true);
-  offset += 4;
-  writeString('WAVE');
-  writeString('fmt ');
-  view.setUint32(offset, 16, true);
-  offset += 4;
-  view.setUint16(offset, format, true);
-  offset += 2;
-  view.setUint16(offset, numChannels, true);
-  offset += 2;
-  view.setUint32(offset, sampleRate, true);
-  offset += 4;
-  view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true);
-  offset += 4;
-  view.setUint16(offset, numChannels * bytesPerSample, true);
-  offset += 2;
-  view.setUint16(offset, bitDepth, true);
-  offset += 2;
-  writeString('data');
-  view.setUint32(offset, dataLength, true);
-  offset += 4;
-  // Write PCM samples: interleave channels
-  const channels: Float32Array[] = [];
-  for (let i = 0; i < numChannels; i++) {
-    channels.push(buffer.getChannelData(i));
-  }
-  for (let i = 0; i < numSamples; i++) {
-    for (let channel = 0; channel < numChannels; channel++) {
-      let sample = channels[channel][i];
-      // Clamp the sample to [-1, 1]
-      sample = Math.max(-1, Math.min(1, sample));
-      if (options.float32) {
-        view.setFloat32(offset, sample, true);
-        offset += 4;
-      } else {
-        // Convert to 16-bit PCM sample
-        const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
-        view.setInt16(offset, intSample, true);
-        offset += 2;
-      }
-    }
-  }
-  return arrayBuffer;
-}
-export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
-  // Using 16-bit PCM for compatibility.
-  const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
-  return new Blob([wavArrayBuffer], { type: 'audio/wav' });
-};
-export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
-  const numChannels = buffer.numberOfChannels;
-  const sampleRate = buffer.sampleRate;
-  const bitRate = 128; // kbps - adjust as desired
-  // Initialize MP3 encoder.
-  // Note: If more than 2 channels are present, only the first 2 channels will be used.
-  const mp3encoder = new lamejs.Mp3Encoder(
-    numChannels >= 2 ? 2 : 1,
-    sampleRate,
-    bitRate
-  );
-  const samples = buffer.length;
-  const chunkSize = 1152; // Frame size for MP3 encoding
-  // Prepare channel data.
-  const channels: Float32Array[] = [];
-  for (let ch = 0; ch < numChannels; ch++) {
-    channels.push(buffer.getChannelData(ch));
-  }
-  const mp3Data: Uint8Array[] = [];
-  // For mono audio, encode directly.
-  if (numChannels === 1) {
-    for (let i = 0; i < samples; i += chunkSize) {
-      const sampleChunk = channels[0].subarray(i, i + chunkSize);
-      const int16Buffer = floatTo16BitPCM(sampleChunk);
-      const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
-      if (mp3buf.length > 0) {
-        mp3Data.push(new Uint8Array(mp3buf));
-      }
-    }
-  } else {
-    // For stereo (or more channels, use first two channels).
-    const left = channels[0];
-    const right = channels[1];
-    for (let i = 0; i < samples; i += chunkSize) {
-      const leftChunk = left.subarray(i, i + chunkSize);
-      const rightChunk = right.subarray(i, i + chunkSize);
-      const leftInt16 = floatTo16BitPCM(leftChunk);
-      const rightInt16 = floatTo16BitPCM(rightChunk);
-      const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
-      if (mp3buf.length > 0) {
-        mp3Data.push(new Uint8Array(mp3buf));
-      }
-    }
-  }
-  // Flush the encoder to get any remaining MP3 data.
-  const endBuf = mp3encoder.flush();
-  if (endBuf.length > 0) {
-    mp3Data.push(new Uint8Array(endBuf));
-  }
-  // Concatenate all MP3 chunks into a single ArrayBuffer.
-  const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
-  const result = new Uint8Array(totalLength);
-  let offset = 0;
-  for (const chunk of mp3Data) {
-    result.set(chunk, offset);
-    offset += chunk.length;
-  }
-  return result.buffer;
-}
-/**
- * Helper function that converts a Float32Array of PCM samples (range -1..1)
- * into an Int16Array (range -32768..32767).
- */
-function floatTo16BitPCM(input: Float32Array): Int16Array {
-  const output = new Int16Array(input.length);
-  for (let i = 0; i < input.length; i++) {
-    const s = Math.max(-1, Math.min(1, input[i]));
-    output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
-  }
-  return output;
-}

+// @ts-expect-error this package does not have typing
+import TextLineStream from 'textlinestream';
+import { Client } from '@gradio/client';
+import * as lamejs from '@breezystack/lamejs';
+// ponyfill for missing ReadableStream asyncIterator on Safari
+import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
+import { CONFIG } from '../config';
+import { uploadFiles } from '@huggingface/hub';
+import { denoiseBuffer } from 'audio-denoiser';  // Import the denoiseBuffer function
+export const isDev: boolean = import.meta.env.MODE === 'development';
+export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
+export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
+// return URL to the WAV file
+export const generateAudio = async (
+  content: string,
+  voice: string,
+  speed: number = 1.1
+): Promise<string> => {
+  const maxRetries = 3;
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      const client = await Client.connect(CONFIG.ttsSpaceId);
+      const result = await client.predict('/tts', {
+        text: content,
+        voice,
+        speed,
+      });
+      console.log(result.data);
+      return (result.data as any)[0].url;
+    } catch (e) {
+      if (i === maxRetries - 1) {
+        throw e; // last retry, throw error
+      }
+      console.error('Failed to generate audio, retrying...', e);
+    }
+    continue;
+  }
+  return ''; // should never reach here
+};
+export const pickRand = <T>(arr: T[]): T => {
+  return arr[Math.floor(Math.random() * arr.length)];
+};
+// wrapper for SSE
+export async function* getSSEStreamAsync(fetchResponse: Response) {
+  if (!fetchResponse.body) throw new Error('Response body is empty');
+  const lines: ReadableStream<string> = fetchResponse.body
+    .pipeThrough(new TextDecoderStream())
+    .pipeThrough(new TextLineStream());
+  // @ts-expect-error asyncIterator complains about type, but it should work
+  for await (const line of asyncIterator(lines)) {
+    //if (isDev) console.log({ line });
+    if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
+      const data = JSON.parse(line.slice(5));
+      yield data;
+    } else if (line.startsWith('error:')) {
+      const data = JSON.parse(line.slice(6));
+      throw new Error(data.message || 'Unknown error');
+    }
+  }
+}
+export const uploadFileToHub = async (
+  buf: ArrayBuffer,
+  filename: string,
+  repoId: string,
+  hfToken: string
+) => {
+  await uploadFiles({
+    accessToken: hfToken,
+    repo: repoId,
+    files: [
+      {
+        path: filename,
+        content: new Blob([buf], { type: 'audio/wav' }),
+      },
+    ],
+  });
+};
+/**
+ * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
+ */
+////////////////////////////////////////
+// Audio manipulation utils
+export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
+  const threshold = 0.01; // Amplitude below which a sample is considered silent.
+  const numChannels = audioBuffer.numberOfChannels;
+  const totalSamples = audioBuffer.length;
+  // Helper function to check if a sample at the given index is silent in all channels.
+  const isSilent = (index: number): boolean => {
+    for (let channel = 0; channel < numChannels; channel++) {
+      const channelData = audioBuffer.getChannelData(channel);
+      if (Math.abs(channelData[index]) > threshold) {
+        return false;
+      }
+    }
+    return true;
+  };
+  // Find the first non-silent sample.
+  let startSample = 0;
+  while (startSample < totalSamples && isSilent(startSample)) {
+    startSample++;
+  }
+  // Find the last non-silent sample.
+  let endSample = totalSamples - 1;
+  while (endSample >= startSample && isSilent(endSample)) {
+    endSample--;
+  }
+  // If no non-silent samples were found, return an empty AudioBuffer.
+  if (startSample >= totalSamples || endSample < startSample) {
+    return new AudioBuffer({
+      length: 1,
+      numberOfChannels: numChannels,
+      sampleRate: audioBuffer.sampleRate,
+    });
+  }
+  const newLength = endSample - startSample + 1;
+  const newBuffer = new AudioBuffer({
+    length: newLength,
+    numberOfChannels: numChannels,
+    sampleRate: audioBuffer.sampleRate,
+  });
+  // Copy the trimmed audio samples from the original buffer to the new buffer.
+  for (let channel = 0; channel < numChannels; channel++) {
+    const oldData = audioBuffer.getChannelData(channel);
+    const newData = newBuffer.getChannelData(channel);
+    for (let i = 0; i < newLength; i++) {
+      newData[i] = oldData[startSample + i];
+    }
+  }
+  return newBuffer;
+};
+export const joinAudio = (
+  audio1: AudioBuffer,
+  audio2: AudioBuffer,
+  gapMilisecs: number,
+  overlap: 'none' | 'cross-fade' = 'none'
+): AudioBuffer => {
+  const sampleRate = audio1.sampleRate;
+  const numChannels = audio1.numberOfChannels;
+  // Ensure both audio buffers are compatible.
+  if (audio2.sampleRate !== sampleRate) {
+    throw new Error('Audio buffers must have the same sample rate');
+  }
+  if (audio2.numberOfChannels !== numChannels) {
+    throw new Error('Audio buffers must have the same number of channels');
+  }
+  const gapSeconds = gapMilisecs / 1000;
+  let newLength: number;
+  if (gapSeconds > 0) {
+    // Pad with silence: gapSamples of silence in between.
+    const gapSamples = Math.round(gapSeconds * sampleRate);
+    newLength = audio1.length + gapSamples + audio2.length;
+  } else if (gapSeconds === 0) {
+    // Simply join one after the other.
+    newLength = audio1.length + audio2.length;
+  } else {
+    // gapSeconds < 0 means we blend (overlap) the end of audio1 with the beginning of audio2.
+    const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
+    // Ensure we don't overlap more than available in either buffer.
+    const effectiveOverlap = Math.min(
+      overlapSamplesRequested,
+      audio1.length,
+      audio2.length
+    );
+    newLength = audio1.length + audio2.length - effectiveOverlap;
+  }
+  // Create a new AudioBuffer for the joined result.
+  const newBuffer = new AudioBuffer({
+    length: newLength,
+    numberOfChannels: numChannels,
+    sampleRate: sampleRate,
+  });
+  // Process each channel.
+  for (let channel = 0; channel < numChannels; channel++) {
+    const outputData = newBuffer.getChannelData(channel);
+    const data1 = audio1.getChannelData(channel);
+    const data2 = audio2.getChannelData(channel);
+    let offset = 0;
+    if (gapSeconds < 0) {
+      // Blend the join section.
+      const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate);
+      const effectiveOverlap = Math.min(
+        overlapSamplesRequested,
+        audio1.length,
+        audio2.length
+      );
+      // Copy audio1 data up to the start of the overlapping section.
+      const nonOverlapLength = audio1.length - effectiveOverlap;
+      outputData.set(data1.subarray(0, nonOverlapLength), offset);
+      offset += nonOverlapLength;
+      // Blend overlapping region.
+      if (overlap === 'cross-fade') {
+        for (let i = 0; i < effectiveOverlap; i++) {
+          // Linear crossfade:
+          const fadeOut = 1 - i / effectiveOverlap;
+          const fadeIn = i / effectiveOverlap;
+          outputData[offset + i] =
+            data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
+        }
+      } else {
+        for (let i = 0; i < effectiveOverlap; i++) {
+          outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
+        }
+      }
+      offset += effectiveOverlap;
+      // Append remaining audio2 data.
+      outputData.set(data2.subarray(effectiveOverlap), offset);
+    } else if (gapSeconds === 0) {
+      // Directly concatenate: copy audio1 then audio2.
+      outputData.set(data1, offset);
+      offset += audio1.length;
+      outputData.set(data2, offset);
+    } else {
+      // gapSeconds > 0: insert silence between audio1 and audio2.
+      const gapSamples = Math.round(gapSeconds * sampleRate);
+      outputData.set(data1, offset);
+      offset += audio1.length;
+      // Silence: the buffer is initialized with zeros, so we simply move the offset.
+      offset += gapSamples;
+      outputData.set(data2, offset);
+    }
+  }
+  return newBuffer;
+};
+export const addNoise = (
+  audioBuffer: AudioBuffer,
+  magnitude: number
+): AudioBuffer => {
+  const { numberOfChannels, sampleRate, length } = audioBuffer;
+  const newBuffer = new AudioBuffer({
+    length,
+    numberOfChannels,
+    sampleRate,
+  });
+  for (let channel = 0; channel < numberOfChannels; channel++) {
+    const inputData = audioBuffer.getChannelData(channel);
+    const outputData = newBuffer.getChannelData(channel);
+    for (let i = 0; i < length; i++) {
+      // Generate white noise in the range [-magnitude, +magnitude].
+      const noise = (Math.random() * 2 - 1) * magnitude;
+      outputData[i] = inputData[i] + noise;
+    }
+  }
+  return newBuffer;
+};
+export const addSilence = (
+  audioBuffer: AudioBuffer,
+  toBeginning: boolean,
+  durationMilisecs: number
+): AudioBuffer => {
+  // Convert duration from milliseconds to samples.
+  const sampleRate = audioBuffer.sampleRate;
+  const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate);
+  const numChannels = audioBuffer.numberOfChannels;
+  const originalLength = audioBuffer.length;
+  const newLength = originalLength + silenceSamples;
+  // Create a new AudioBuffer with extra space for the silence.
+  const newBuffer = new AudioBuffer({
+    length: newLength,
+    numberOfChannels: numChannels,
+    sampleRate: sampleRate,
+  });
+  // Process each channel: copy original audio into the correct position.
+  for (let channel = 0; channel < numChannels; channel++) {
+    const originalData = audioBuffer.getChannelData(channel);
+    const newData = newBuffer.getChannelData(channel);
+    if (toBeginning) {
+      // Leave the first `silenceSamples` as zeros, then copy the original data.
+      newData.set(originalData, silenceSamples);
+    } else {
+      // Copy the original data first; the remaining samples are already zeros.
+      newData.set(originalData, 0);
+    }
+  }
+  return newBuffer;
+};
+export const denoiseAudioBuffer = async (audioBuffer: AudioBuffer): Promise<AudioBuffer> => {
+    try {
+      console.log("Denoising audio...");
+      const denoisedBuffer = await denoiseBuffer(audioBuffer);
+      if (!denoisedBuffer) {
+            console.warn("Denoising returned null. Returning original buffer.");
+            return audioBuffer;
+      }
+      return denoisedBuffer;
+    } catch (error) {
+        console.error("Error during denoising:", error);
+        // Return the original buffer if denoising fails
+        return audioBuffer;
+    }
+};
+////////////////////////////////////////
+// Audio formatting utils
+export const loadWavAndDecode = async (url: string): Promise<AudioBuffer> => {
+  const response = await fetch(url);
+  const arrayBuffer = await response.arrayBuffer();
+  // @ts-expect-error this is fine
+  const AudioContext = window.AudioContext || window.webkitAudioContext;
+  if (!AudioContext) {
+    throw new Error('AudioContext is not supported on this browser');
+  }
+  const audioCtx = new AudioContext();
+  let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
+  // force mono
+  if (audioBuffer.numberOfChannels > 1) {
+    const monoBuffer = new AudioContext().createBuffer(
+      1,
+      audioBuffer.length,
+      audioBuffer.sampleRate
+    );
+    const monoData = monoBuffer.getChannelData(0);
+    for (let i = 0; i < audioBuffer.length; i++) {
+      let sum = 0;
+      for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
+        sum += audioBuffer.getChannelData(channel)[i];
+      }
+      monoData[i] = sum / audioBuffer.numberOfChannels;
+    }
+    audioBuffer = monoBuffer;
+  }
+  return audioBuffer;
+};
+export function audioBufferToWav(
+  buffer: AudioBuffer,
+  options: { float32?: boolean } = {}
+): ArrayBuffer {
+  const numChannels = buffer.numberOfChannels;
+  const sampleRate = buffer.sampleRate;
+  const format = options.float32 ? 3 : 1; // 3 = IEEE float, 1 = PCM
+  const bitDepth = options.float32 ? 32 : 16;
+  const numSamples = buffer.length;
+  const headerLength = 44;
+  const bytesPerSample = bitDepth / 8;
+  const dataLength = numSamples * numChannels * bytesPerSample;
+  const bufferLength = headerLength + dataLength;
+  const arrayBuffer = new ArrayBuffer(bufferLength);
+  const view = new DataView(arrayBuffer);
+  let offset = 0;
+  function writeString(str: string) {
+    for (let i = 0; i < str.length; i++) {
+      view.setUint8(offset, str.charCodeAt(i));
+      offset++;
+    }
+  }
+  // Write WAV header
+  writeString('RIFF');
+  view.setUint32(offset, 36 + dataLength, true);
+  offset += 4;
+  writeString('WAVE');
+  writeString('fmt ');
+  view.setUint32(offset, 16, true);
+  offset += 4;
+  view.setUint16(offset, format, true);
+  offset += 2;
+  view.setUint16(offset, numChannels, true);
+  offset += 2;
+  view.setUint32(offset, sampleRate, true);
+  offset += 4;
+  view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true);
+  offset += 4;
+  view.setUint16(offset, numChannels * bytesPerSample, true);
+  offset += 2;
+  view.setUint16(offset, bitDepth, true);
+  offset += 2;
+  writeString('data');
+  view.setUint32(offset, dataLength, true);
+  offset += 4;
+  // Write PCM samples: interleave channels
+  const channels: Float32Array[] = [];
+  for (let i = 0; i < numChannels; i++) {
+    channels.push(buffer.getChannelData(i));
+  }
+  for (let i = 0; i < numSamples; i++) {
+    for (let channel = 0; channel < numChannels; channel++) {
+      let sample = channels[channel][i];
+      // Clamp the sample to [-1, 1]
+      sample = Math.max(-1, Math.min(1, sample));
+      if (options.float32) {
+        view.setFloat32(offset, sample, true);
+        offset += 4;
+      } else {
+        // Convert to 16-bit PCM sample
+        const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
+        view.setInt16(offset, intSample, true);
+        offset += 2;
+      }
+    }
+  }
+  return arrayBuffer;
+}
+export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
+  // Using 16-bit PCM for compatibility.
+  const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
+  return new Blob([wavArrayBuffer], { type: 'audio/wav' });
+};
+export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
+  const numChannels = buffer.numberOfChannels;
+  const sampleRate = buffer.sampleRate;
+  const bitRate = 128; // kbps - adjust as desired
+  // Initialize MP3 encoder.
+  // Note: If more than 2 channels are present, only the first 2 channels will be used.
+  const mp3encoder = new lamejs.Mp3Encoder(
+    numChannels >= 2 ? 2 : 1,
+    sampleRate,
+    bitRate
+  );
+  const samples = buffer.length;
+  const chunkSize = 1152; // Frame size for MP3 encoding
+  // Prepare channel data.
+  const channels: Float32Array[] = [];
+  for (let ch = 0; ch < numChannels; ch++) {
+    channels.push(buffer.getChannelData(ch));
+  }
+  const mp3Data: Uint8Array[] = [];
+  // For mono audio, encode directly.
+  if (numChannels === 1) {
+    for (let i = 0; i < samples; i += chunkSize) {
+      const sampleChunk = channels[0].subarray(i, i + chunkSize);
+      const int16Buffer = floatTo16BitPCM(sampleChunk);
+      const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
+      if (mp3buf.length > 0) {
+        mp3Data.push(new Uint8Array(mp3buf));
+      }
+    }
+  } else {
+    // For stereo (or more channels, use first two channels).
+    const left = channels[0];
+    const right = channels[1];
+    for (let i = 0; i < samples; i += chunkSize) {
+      const leftChunk = left.subarray(i, i + chunkSize);
+      const rightChunk = right.subarray(i, i + chunkSize);
+      const leftInt16 = floatTo16BitPCM(leftChunk);
+      const rightInt16 = floatTo16BitPCM(rightChunk);
+      const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
+      if (mp3buf.length > 0) {
+        mp3Data.push(new Uint8Array(mp3buf));
+      }
+    }
+  }
+  // Flush the encoder to get any remaining MP3 data.
+  const endBuf = mp3encoder.flush();
+  if (endBuf.length > 0) {
+    mp3Data.push(new Uint8Array(endBuf));
+  }
+  // Concatenate all MP3 chunks into a single ArrayBuffer.
+  const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
+  const result = new Uint8Array(totalLength);
+  let offset = 0;
+  for (const chunk of mp3Data) {
+    result.set(chunk, offset);
+    offset += chunk.length;
+  }
+  return result.buffer;
+}
+/**
+ * Helper function that converts a Float32Array of PCM samples (range -1..1)
+ * into an Int16Array (range -32768..32767).
+ */
+function floatTo16BitPCM(input: Float32Array): Int16Array {
+  const output = new Int16Array(input.length);
+  for (let i = 0; i < input.length; i++) {
+    const s = Math.max(-1, Math.min(1, input[i]));
+    output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
+  }
+  return output;
+}