{
  "@context": "https://schema.org",
  "@type": "article",
  "@id": "https://anchorfact.org/kb/audio-source-separation",
  "headline": "Audio Source Separation: Demixing Speech, Music, and Environmental Sounds with Deep Learning",
  "description": "Audio source separation -- the \"cocktail party problem\" -- isolates individual sound sources from a mixture: extracting vocals from a song, separating overlapping speakers in a meeting, or isolating a target voice in a noisy environment. Deep learning has achieved human-level separation quality, enabling applications from music production to hearing aid enhancement.",
  "dateCreated": "2026-05-24T02:49:13.581Z",
  "dateModified": "2026-05-24",
  "author": {
    "@type": "Organization",
    "name": "AnchorFact"
  },
  "publisher": {
    "@type": "Organization",
    "name": "AnchorFact",
    "url": "https://anchorfact.org"
  },
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "anchorfact:confidence": "high",
  "anchorfact:generationMethod": "ai_assisted",
  "citation": [
    {
      "@type": "CreativeWork",
      "name": "Music Source Separation in the Waveform Domain (Demucs)",
      "sameAs": "https://github.com/facebookresearch/demucs"
    },
    {
      "@type": "CreativeWork",
      "name": "Conv-TasNet: Surpassing Ideal Time-Frequency Masking for Speech Separation",
      "sameAs": "https://ieeexplore.ieee.org/document/8707065"
    }
  ]
}