你如何在独占模式下使用 WASAPI？

问题描述

我正在编写一个钢琴模拟器，我不断地向 WASAPI API 发送缓冲区。我正在尝试在 AUDCLNT_SHAREMODE_EXCLUSIVE 模式下执行此操作，但我仍然不明白如何处理。

使用下面的代码，我为每次调用 PlayBuf() 实例化一个单独的线程。

问题是在实例化第一个线程后，如果我尝试实例化第二个线程，则会出现 AUDCLNT_E_DEVICE_IN_USE 消息。

这当然是我的错，因为我还没有理解如何在 EXCLUSIVE 模式下使用 wasapi。

谢谢

void PlayBuf(short *fileBytes,int fileSize)
{
    HRESULT hr;
    IMMDeviceEnumerator *deviceEnumerator = NULL;
    IMMDevice* audioDevice;
    IAudioClient2* audioClient;
    WAVEFORMATEX wfx = {};
    IAudioRenderClient* audioRenderClient;
    UINT32 bufferSizeInFrames;
    UINT32 bufferPadding;
    int16_t* buffer;
    
    CoInitialize(NULL);

    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL,__uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
    assert (hr == S_OK);

    hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
    assert(hr == S_OK);
    deviceEnumerator->Release();

    hr = audioDevice->Activate(__uuidof(IAudioClient2),(LPVOID*)(&audioClient));
    assert(hr == S_OK);
    audioDevice->Release();

    wfx.wFormatTag = WAVE_FORMAT_PCM;
    wfx.nChannels = 2;
    wfx.nSamplesPerSec = 44100;
    wfx.wBitsPerSample = 16;
    wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
    wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;

    const int64_t REFTIMES_PER_SEC = 10000000;
    REFERENCE_TIME requestedSoundBufferDuration = REFTIMES_PER_SEC * DurataSuono;
    DWORD initStreamFlags = ( AUDCLNT_STREAMFLAGS_RATEADJUST);

    hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,initStreamFlags,requestedSoundBufferDuration,&wfx,NULL);
    assert(hr == S_OK);

    hr = audioClient->GetService(__uuidof(IAudioRenderClient),(LPVOID*)(&audioRenderClient));
    assert(hr == S_OK);

    hr = audioClient->GetBufferSize(&bufferSizeInFrames);
    assert(hr == S_OK);


    audioClient->Reset();
    hr = audioClient->Start();
    assert(hr == S_OK);

    hr = audioRenderClient->GetBuffer(fileSize,(BYTE**)(&buffer));
    assert(hr == S_OK);

    hr = audioRenderClient->ReleaseBuffer(fileSize,0);
    assert(hr == S_OK);

    Sleep(2000);

    audioClient->Stop();
    audioClient->Release();
    audioRenderClient->Release();
}

解决方法

我花了一个小时为您制作了一个基本示例。它在 C# 中使用我自己的音频 I/O 库 XT-Audio（因此打算插入），但在 C++ 中使用原始 wasapi 可能需要我半天时间。无论如何，我相信这与您正在寻找的非常接近。正如您在下面看到的，这个应用程序拥有世界上最棒的 GUI：

Demo UI

只要您按下开始，应用就会开始将键盘输入转换为音频。您可以按住 c、d、e、f 和 g 键盘键来生成音符。它也处理多个重叠的音符（和弦）。我选择选择 wasapi 共享模式作为后端，因为它支持浮点音频，但如果您将音频转换为 16 位整数格式，这将同样适用于独占模式。

使用此库与原始 wasapi 的不同之处在于，音频线程由库管理，应用程序定期调用其音频回调函数以合成音频数据。然而，这可以使用 C++ 轻松转换回原生 wasapi：只需在后台线程的循环中调用 IAudioRenderClient::GetBuffer/ReleaseBuffer，并在这些调用之间进行处理。

无论如何，关键部分是：这个应用程序只使用 2 个线程，一个用于 UI（由 winforms 管理），一个用于音频（由音频库管理），但它能够同时播放多个音符，我相信这是您问题的核心。

我在此处上传了完整的 Visual Studio 解决方案和二进制文件：WasapiSynthSample 但为了完整起见，我将在下面发布代码中有趣的部分。

using System;
using System.Threading;
using System.Windows.Forms;
using Xt;

namespace WasapiSynthSample
{
    public partial class Program : Form
    {
        // sampling rate
        const int Rate = 48000;        
        // stereo
        const int Channels = 2;
        // default format for wasapi shared mode
        const XtSample Sample = XtSample.Float32;
        // C,D,E,F,G
        static readonly float[] NoteFrequencies = { 523.25f,587.33f,659.25f,698.46f,783.99f };

        [STAThread]
        static void Main()
        {
            // initialize audio library
            using (var platform = XtAudio.Init(null,IntPtr.Zero,null))
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                Application.ThreadException += OnApplicationThreadException;
                AppDomain.CurrentDomain.UnhandledException += OnCurrentDomainUnhandledException;
                Application.Run(new Program(platform));
            }
        }

        // pop a messagebox on any error
        static void OnApplicationThreadException(object sender,ThreadExceptionEventArgs e)
        => OnError(e.Exception);
        static void OnCurrentDomainUnhandledException(object sender,UnhandledExceptionEventArgs e)
        => OnError((Exception)e.ExceptionObject);
        static void OnError(Exception e)
        {
            var text = e.ToString();
            if (e is XtException xte) text = XtAudio.GetErrorInfo(xte.GetError()).ToString();
            MessageBox.Show(text);
        }

        XtStream _stream;
        readonly XtPlatform _platform;

        // note phases
        readonly float[] _phases = new float[5];
        // tracks key down/up
        readonly bool[] _notesActive = new bool[5];

        public Program(XtPlatform platform)
        {
            _platform = platform;
            InitializeComponent();
        }

        // activate note
        protected override void OnKeyDown(KeyEventArgs e)
        {
            base.OnKeyDown(e);
            if (e.KeyCode == Keys.C) _notesActive[0] = true;
            if (e.KeyCode == Keys.D) _notesActive[1] = true;
            if (e.KeyCode == Keys.E) _notesActive[2] = true;
            if (e.KeyCode == Keys.F) _notesActive[3] = true;
            if (e.KeyCode == Keys.G) _notesActive[4] = true;
        }

        // deactive note
        protected override void OnKeyUp(KeyEventArgs e)
        {
            base.OnKeyUp(e);
            if (e.KeyCode == Keys.C) _notesActive[0] = false;
            if (e.KeyCode == Keys.D) _notesActive[1] = false;
            if (e.KeyCode == Keys.E) _notesActive[2] = false;
            if (e.KeyCode == Keys.F) _notesActive[3] = false;
            if (e.KeyCode == Keys.G) _notesActive[4] = false;
        }

        // stop stream
        void OnStop(object sender,EventArgs e)
        {
            _stream?.Stop();
            _stream?.Dispose();
            _stream = null;
            _start.Enabled = true;
            _stop.Enabled = false;
        }

        // start stream
        void OnStart(object sender,EventArgs e)
        {
            var service = _platform.GetService(XtSystem.WASAPI);
            var id = service.GetDefaultDeviceId(true);
            using (var device = service.OpenDevice(id))
            {
                var mix = new XtMix(Rate,Sample);
                var channels = new XtChannels(0,Channels,0);
                var format = new XtFormat(in mix,in channels);
                var buffer = device.GetBufferSize(in format).current;
                var streamParams = new XtStreamParams(true,OnBuffer,null,null);
                var deviceParams = new XtDeviceStreamParams(in streamParams,in format,buffer);
                _stream = device.OpenStream(in deviceParams,null);
                _stream.Start();
                _start.Enabled = false;
                _stop.Enabled = true;
            }
        }

        // this gets called on the audio thread by the audio library
        // but could just as well be your c++ code managing its own threads
        unsafe int OnBuffer(XtStream stream,in XtBuffer buffer,object user)
        {
            // process audio buffer of N frames
            for (int f = 0; f < buffer.frames; f++)
            {
                // compose current sample of all currently active notes
                float sample = 0.0f;
                for (int n = 0; n < NoteFrequencies.Length; n++)
                {
                    if (_notesActive[n])
                    {
                        _phases[n] += NoteFrequencies[n] / Rate;
                        if (_phases[n] >= 1.0f) _phases[n] = -1.0f;
                        float noteSample = (float)Math.Sin(2.0 * _phases[n] * Math.PI);
                        sample += noteSample / NoteFrequencies.Length;
                    }
                }

                // write current sample to output buffer
                for (int c = 0; c < Channels; c++)
                    ((float*)buffer.output)[f * Channels + c] = sample;
            }
            return 0;
        }
    }
}

我在线程代码中加入了FillBufferWasapi()是为了给我一个更清晰的代码，我没有很多实时应用的经验，但是我看不到错误

int wavPlaybackSample = 0;
int k=0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
    DWORD retval = WaitForSingleObject(hEvent,2000);

    for(int ii=0;ii<255;ii++)
    {
        if(MyKeyDown[ii] == 1)
        {
            hr = audioRenderClient->GetBuffer(bufferSizeInFrames,(BYTE**)(&buffer));
            assert(hr == S_OK);

            for (UINT32 frameIndex = 0+k; frameIndex < bufferSizeInFrames+k; ++frameIndex)
            {
                *buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // left
                *buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // right
            }

            k+=bufferSizeInFrames;

            hr = audioRenderClient->ReleaseBuffer(bufferSizeInFrames,flags);
            assert(hr == S_OK);

            if(k >= MyBufferLength/4)
            {
                k=0;
                wavPlaybackSample=0;
            }
        }
    }

每次我按下一个键时，我都会将相应的标志设置为 1，以便对包含样本的缓冲区求和。

我的版本和你的合成器版本的区别在于我的版本使用了 88 个预加载的缓冲区，其中包含真正钢琴的声音 (wav)。

int16_t* buffer;
int MyKeyDown[255];
int MyKeyCode[255];

short *fileBytes = new short[MyBufferLength];

void __fastcall TMyThread::Execute()
{
    HRESULT hr;
    int16_t* buffer;

    HANDLE hEvent = NULL;
    REFERENCE_TIME hnsRequestedDuration = 0;
    DWORD flags = 0;

    CoInitialize(NULL);
    //CoInitializeEx( NULL,COINIT_MULTITHREADED );

    IMMDeviceEnumerator *deviceEnumerator;
    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL,__uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
    assert (hr == S_OK);

    IMMDevice* audioDevice;
    hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
    assert(hr == S_OK);
    deviceEnumerator->Release();

    IAudioClient2* audioClient;
    hr = audioDevice->Activate(__uuidof(IAudioClient2),(LPVOID*)(&audioClient));
    assert(hr == S_OK);
    audioDevice->Release();


    WAVEFORMATEX wfx = {};
    wfx.wFormatTag = WAVE_FORMAT_PCM;
    wfx.nChannels = 2;
    wfx.nSamplesPerSec = 44100;
    wfx.wBitsPerSample = 16;
    wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
    wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;

    hr = audioClient->GetDevicePeriod(NULL,&hnsRequestedDuration);
    assert(hr == S_OK);

    hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,AUDCLNT_STREAMFLAGS_EVENTCALLBACK,hnsRequestedDuration,&wfx,NULL);

    // If the requested buffer size is not aligned...
    UINT32 nFrames = 0;
    if(hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED)
    {
        // Get the next aligned frame.
        hr = audioClient->GetBufferSize(&nFrames);
        assert (hr == S_OK);

        hnsRequestedDuration = (REFERENCE_TIME)
        ((10000.0 * 1000 / wfx.nSamplesPerSec * nFrames) + 0.5);

        // Create a new audio client.
        hr = audioDevice->Activate(__uuidof(IAudioClient2),(LPVOID*)(&audioClient));
        assert(hr == S_OK);

        // Open the stream and associate it with an audio session.
        hr = audioClient->Initialize(
        AUDCLNT_SHAREMODE_EXCLUSIVE,NULL);
        assert(hr == S_OK);
    }

    hEvent = CreateEvent(NULL,FALSE,NULL);
    if (hEvent == NULL)
    {
        hr = E_FAIL;
        ShowMessage("CreateEvent fail!!!");
    }

    hr = audioClient->SetEventHandle(hEvent);
    assert(hr == S_OK);

    IAudioRenderClient *audioRenderClient;
    hr = audioClient->GetService(__uuidof(IAudioRenderClient),(LPVOID*)(&audioRenderClient));
    assert(hr == S_OK);

    UINT32 bufferSizeInFrames;
    hr = audioClient->GetBufferSize(&bufferSizeInFrames);
    assert(hr == S_OK);

    // from here play buffer
    hr = audioClient->Start();
    assert(hr == S_OK);

    int wavPlaybackSample = 0;

    while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
    {
        DWORD retval = WaitForSingleObject(hEvent,2000);

        UINT32 bufferPadding;
        hr = audioClient->GetCurrentPadding(&bufferPadding);
        assert(hr == S_OK);

        UINT32 soundBufferLatency = bufferSizeInFrames / 1;
        UINT32 numFramesToWrite = soundBufferLatency - bufferPadding;
        
        FillBufferWasapi();

        hr = audioRenderClient->GetBuffer(numFramesToWrite,(BYTE**)(&buffer));
        assert(hr == S_OK);


        for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
        {
            *buffer++ = fileBytes[wavPlaybackSample]; // left
            *buffer++ = fileBytes[wavPlaybackSample]; // right

            ++wavPlaybackSample;
            //wavPlaybackSample %= fileSize;
        }
        hr = audioRenderClient->ReleaseBuffer(numFramesToWrite,flags);
        assert(hr == S_OK);

        //Sleep((DWORD)(hnsRequestedDuration/10000000));
    }

    audioClient->Stop();
    audioClient->Release();
    audioRenderClient->Release();

    CoUninitialize();
}
//---------------------------------------------------------------------------

void FillBufferWasapi()
{
    for(int ii=0;ii<255;ii++)
    {
        if(MyKeyDown[ii] == 1)
        {
            for(int i=0; i<MyBufferLength;i++)
            fileBytes[i]+=bfn[KeyCode[ii]][i];
        }
    }
}
//---------------------------------------------------------------------------

void __fastcall TForm1::AppMessage(MSG &Msg,bool &Handled)
{
    MyKeyCode['Z']=3; // C1
    MyKeyCode['X']=5; // D1
    MyKeyCode['C']=7; // E1
    
    switch (Msg.message)
    {
    case WM_KEYDOWN:
        
        if(MyKeyDown[Msg.wParam] == 0)
        {
            MyKeyDown[Msg.wParam] = 1;
        }
        break;

    case WM_KEYUP:
        if(MyKeyDown[Msg.wParam] == 1)
        {
            MyKeyDown[Msg.wParam] = 0;
        }
        break;
    }
}

这应该 99% 满足您的需求，它是使用 wasapi 的纯 C++ 示例播放器。

编译和链接：

需要符合 c++17(+) 的编译器
安装boost库，用于无锁队列
可能需要 MS c++ 编译器（使用 conio.h）
参考 avrt.lib 获取实时音频线程（使用 AvSetMmThreadPriority）
如果您需要，full vs2019 project

运行：

您需要 5 个 44100 16 位立体声格式的 .wav 文件，称为 c4.wav 到 g4.wav。
见SamplePack

它的作用：

控制台应用程序运行 getchar() 循环，c、d、e、f、g，触发 note-on，q 退出
因为它是一个控制台应用程序，所以没有注释消息。每次按键都会触发完整样本的播放。
记事本使用时间戳标记并发布到共享队列（这是无提升锁的东西，大小上限为 64）。
因此，您可以通过在 3 毫秒的时间间隔内按下 64 个以上的键（最小 wasapi 独占延迟）来使其崩溃。
音频线程接收这些消息，并将它们放入音频线程本地的“活动笔记”列表中。活动音符受最大复音数 (64) 的限制。
因此，您也可以通过在 [最短样本的长度] 秒内按 64 个以上的键来使其崩溃。
将每个活动音符混合到当前的 wasapi 缓冲区中，直到它到达 .wav 样本的末尾。

代码如下：

#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <fstream>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <boost/lockfree/queue.hpp>

#include <conio.h>
#include <atlbase.h>
#include <Windows.h>
#include <avrt.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>

// for wasapi event callback
static HANDLE event_handle;

// sample data
static const size_t sample_count = 5;
static int16_t* note_samples[sample_count];
static size_t note_frame_counts[sample_count];
static std::vector<char> note_samples_raw[sample_count];
static char const* note_files[sample_count] = { 
  "c4.wav","d4.wav","e4.wav","f4.wav","g4.wav"
};

// user input / audio thread communication
static std::atomic_bool stop_finished;
static std::atomic_bool stop_initiated;

// scale mix volume
static const double mix_scale_amp = 0.4;

// debug stuff
static int32_t prev_note_active_count = 0;
static int32_t prev_note_audible_count = 0;

// timing stuff
static const int64_t millis_per_second = 1000;
static const int64_t reftimes_per_milli = 10000;

// audio format = 44.1khz 16bit stereo
static const int32_t sample_size = 2;
static const int32_t channel_count = 2;
static const int32_t sample_rate = 44100;
static const int32_t frame_size = sample_size * channel_count;

// exclusive mode event driven must use 128-byte aligned buffers
static const int32_t alignment_requirement_bytes = 128;

// note down notification + timestamp
static const size_t note_queue_size = 64;
struct note_down_msg
{
  int32_t note; // 0..4 = c..g
  uint64_t time_stamp_qpc;
};
static boost::lockfree::queue<note_down_msg> 
note_msg_queue(note_queue_size);

// current playing notes
static const size_t max_polyphony = 64;
struct active_note
{
  // slot in use?
  bool in_use;
  // note + timestamp
  note_down_msg msg;
  // position relative to stream pos when it should start
  uint64_t trigger_pos_frames;
  // how many of it has played already
  size_t frames_rendered;
  active_note() = default;
};
static active_note 
active_notes[max_polyphony];

// shared by user input / audio thread
struct audio_thread_data
{
  IAudioClock* clock;
  IAudioClient* client;
  IAudioRenderClient* render;
};

// bail out on any error
#define CHECK_COM(expr) do {                \
  HRESULT hr = expr;                        \
  if(SUCCEEDED(hr)) break;                  \
  std::cout << #expr << ": " << hr << "\n"; \
  std::terminate();                         \
} while(0)

static WAVEFORMATEXTENSIBLE
make_audio_format()
{
  // translate format specification to WAVEFORMATEXTENSIBLE
  WAVEFORMATEXTENSIBLE result = { 0 };
  result.dwChannelMask = 0;
  result.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  result.Samples.wValidBitsPerSample = sample_size * 8;
  result.Format.nChannels = channel_count;
  result.Format.nSamplesPerSec = sample_rate;
  result.Format.wBitsPerSample = sample_size * 8;
  result.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  result.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE);
  result.Format.nBlockAlign = channel_count * sample_size;
  result.Format.nAvgBytesPerSec = channel_count * sample_size * sample_rate;
  return result;
}

static void
load_note_samples()
{
  for(size_t i = 0; i < sample_count; i++)
  {
    // load piano samples to bytes
    auto path = std::filesystem::current_path() / note_files[i];
    std::ifstream input(path,std::ios::binary);
    assert(input);
    input.seekg(0,input.end);
    size_t length = input.tellg();
    input.seekg(0,input.beg);
    note_samples_raw[i].reserve(length);    
    input.read(note_samples_raw[i].data(),length);
    assert(input);
    input.close();

    // compute frame count and set actual audio data
    // 44 bytes skipped for .WAV file header
    note_frame_counts[i] = (length - 44) / (sample_size * channel_count);
    note_samples[i] = reinterpret_cast<int16_t*>(note_samples_raw[i].data() + 44);
  }
}

// this runs audio processing
static DWORD WINAPI
run_audio_thread(void* param)
{
  int16_t* audio;
  BYTE* audio_mem;
  bool slot_found;
  UINT32 buffer_frames;

  HANDLE task;
  BOOL success;
  DWORD wait_result;
  DWORD task_index = 0;

  UINT64 clock_pos;
  UINT64 clock_freq;
  UINT64 clock_qpc_pos;
  LARGE_INTEGER qpc_freq;

  audio_thread_data* data = static_cast<audio_thread_data*>(param);

  // init thread
  CHECK_COM(CoInitializeEx(nullptr,COINIT_APARTMENTTHREADED));
  task = AvSetMmThreadCharacteristicsW(TEXT("Pro Audio"),&task_index);
  assert(task != nullptr);

  // wasapi buffer frame count & clock info
  CHECK_COM(data->client->GetBufferSize(&buffer_frames));
  CHECK_COM(data->clock->GetFrequency(&clock_freq));
  success = QueryPerformanceFrequency(&qpc_freq);
  assert(success);

  // audio loop
  data->client->Start();
  while(!stop_initiated.load())
  {
    wait_result = WaitForSingleObject(event_handle,INFINITE);
    assert(wait_result == WAIT_OBJECT_0);

    // retrieve and clear buffer for this round
    CHECK_COM(data->render->GetBuffer(buffer_frames,&audio_mem));
    audio = reinterpret_cast<int16_t*>(audio_mem);
    memset(audio,buffer_frames * static_cast<uint64_t>(frame_size));
    
    // get timing stuff
    CHECK_COM(data->clock->GetPosition(&clock_pos,&clock_qpc_pos));        
    uint64_t stream_offset_hns = clock_pos * reftimes_per_milli * millis_per_second / clock_freq;
    uint64_t stream_offset_frames = stream_offset_hns * sample_rate / (reftimes_per_milli * millis_per_second);

    // process each frame
    for(size_t f = 0; f < buffer_frames; f++)
    {
      // pop user input,find empty slot in active notes buffer
      // for better performance this can also be outside the frame
      // loop,at start of each buffer round,in that case add 1 additional buffer latency
      note_down_msg msg;
      while(note_msg_queue.pop(msg))
      {
        slot_found = false;
        for(size_t i = 0; i < max_polyphony; i++)
          if(!active_notes[i].in_use) 
          {
            slot_found = true;
            active_notes[i].msg = msg;
            active_notes[i].in_use = true;
            active_notes[i].frames_rendered = 0;
            int64_t clock_note_diff_qpc = clock_qpc_pos - static_cast<int64_t>(active_notes[i].msg.time_stamp_qpc);
            int64_t clock_note_diff_hns = clock_note_diff_qpc * reftimes_per_milli * millis_per_second / qpc_freq.QuadPart;
            int64_t clock_note_diff_frames = clock_note_diff_hns * sample_rate / (reftimes_per_milli * millis_per_second);
            int64_t note_clock_diff_frames = -static_cast<int64_t>(clock_note_diff_frames);
            // allow 1 buffer latency otherwise notes would have to start in the past
            active_notes[i].trigger_pos_frames = stream_offset_frames + note_clock_diff_frames + buffer_frames;
            assert(active_notes[i].trigger_pos_frames <= stream_offset_frames + buffer_frames * 3);
            assert(active_notes[i].trigger_pos_frames >= stream_offset_frames + f);
            break;
          }
        if(!slot_found)       
          assert(!"Max polyphony reached.");
      }
    
      // debugging stuff
      int32_t note_active_count = 0;
      int32_t note_audible_count = 0;      

      // compose frame from all samples active up to max_polyphony
      double current_samples[channel_count] = { 0 };
      for(size_t i = 0; i < max_polyphony; i++)
      {
        // slot not in use
        if(!active_notes[i].in_use) continue;
        note_active_count++;

        // not my turn yet
        // note this very briefly wastes a slot for a sample which starts halfway in the current buffer
        if(active_notes[i].trigger_pos_frames > stream_offset_frames + f) continue;

        if(active_notes[i].frames_rendered == note_frame_counts[active_notes[i].msg.note])
        {
          // reached sample end
          active_notes[i].in_use = false;
          active_notes[i].frames_rendered = 0;
          continue;
        }

        // note is active + audible
        note_audible_count++;
        size_t frame_index = active_notes[i].frames_rendered++;
        for(size_t c = 0; c < channel_count; c++)
        {
          assert(active_notes[i].msg.note < sample_count);
          assert(frame_index < note_frame_counts[active_notes[i].msg.note]);
          current_samples[c] += static_cast<double>(note_samples[active_notes[i].msg.note][frame_index * channel_count + c] * mix_scale_amp) / SHRT_MAX;        
        }
      }

      // normally never do io on the audio thread,just debugging
      if(prev_note_active_count != note_active_count || prev_note_audible_count != note_audible_count)
        ;//std::cout << "\nactive: " << note_active_count << " audible: " << note_audible_count << "\n";
      prev_note_active_count = note_active_count;
      prev_note_audible_count = note_audible_count;

      // convert to int16 and write to wasapi
      for(size_t c = 0; c < channel_count; c++)
        audio[f * channel_count + c] = static_cast<int16_t>(current_samples[c] * SHRT_MAX);
    }

    CHECK_COM(data->render->ReleaseBuffer(buffer_frames,0));
  }
  data->client->Stop();

  // cleanup
  success = AvRevertMmThreadCharacteristics(task);
  assert(success);
  CoUninitialize();
  stop_finished.store(true);
  return 0;
}

// this runs user input
static void
run_user_input_thread()
{
  int32_t chr;
  int32_t note;
  BOOL success;
  UINT32 buffer_frames;
  REFERENCE_TIME engine;
  REFERENCE_TIME period;
  LARGE_INTEGER qpc_count;
  CComPtr<IMMDevice> device;
  CComPtr<IAudioClock> clock;
  CComPtr<IAudioClient> client;
  CComPtr<IAudioRenderClient> render;
  CComPtr<IMMDeviceEnumerator> enumerator;  
  WAVEFORMATEXTENSIBLE format = make_audio_format();

  // get default render endpoint
  CHECK_COM(CoCreateInstance(__uuidof(MMDeviceEnumerator),nullptr,reinterpret_cast<void**>(&enumerator)));
  CHECK_COM(enumerator->GetDefaultAudioEndpoint(eRender,eMultimedia,&device));
  CHECK_COM(device->Activate(__uuidof(IAudioClient),reinterpret_cast<void**>(&client)));

  // open exclusive mode event driven stream
  CHECK_COM(client->GetDevicePeriod(&engine,&period));
  buffer_frames = static_cast<uint32_t>(period / reftimes_per_milli * sample_rate / millis_per_second);
  while((buffer_frames * frame_size) % alignment_requirement_bytes != 0) buffer_frames++;
  period = buffer_frames * millis_per_second * reftimes_per_milli / sample_rate;
  CHECK_COM(client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,period,reinterpret_cast<WAVEFORMATEX*>(&format),nullptr));  
  event_handle = CreateEvent(nullptr,nullptr);
  assert(event_handle != nullptr);
  CHECK_COM(client->SetEventHandle(event_handle));
  CHECK_COM(client->GetService(__uuidof(IAudioClock),reinterpret_cast<void**>(&clock)));
  CHECK_COM(client->GetService(__uuidof(IAudioRenderClient),reinterpret_cast<void**>(&render)));

  // start audio thread
  audio_thread_data data = { 0 };
  data.clock = clock;
  data.client = client;
  data.render = render;
  CreateThread(nullptr,run_audio_thread,&data,nullptr);

  // process user input
  // cdefg = notes,q = quit
  while((chr = _getch()) != 'q')
  {
    if(chr == 'c') note = 0;
    else if(chr == 'd') note = 1;
    else if(chr == 'e') note = 2;
    else if(chr == 'f') note = 3;
    else if(chr == 'g') note = 4;
    else continue;
    success = QueryPerformanceCounter(&qpc_count);
    note_down_msg msg;
    msg.note = note;
    msg.time_stamp_qpc = qpc_count.QuadPart;
    assert(success);
    note_msg_queue.push(msg);
    _putch(chr);
  }

  // cleanup
  stop_initiated.store(true);
  while(!stop_finished.load());
  success = CloseHandle(event_handle);
  assert(success);
}

int 
main(int argc,char** argv)
{
  // wraps COM init/cleanup
  CHECK_COM(CoInitializeEx(nullptr,COINIT_APARTMENTTHREADED));
  load_note_samples();
  run_user_input_thread();
  CoUninitialize();
  return 0;
}

wasapi winapi