73 lines
2.3 KiB
Python
73 lines
2.3 KiB
Python
|
import warnings
|
||
|
from sys import platform
|
||
|
from typing import Optional
|
||
|
|
||
|
import torch
|
||
|
import torchaudio
|
||
|
|
||
|
dict_format = {
|
||
|
torch.uint8: "u8",
|
||
|
torch.int16: "s16",
|
||
|
torch.int32: "s32",
|
||
|
torch.int64: "s64",
|
||
|
torch.float32: "flt",
|
||
|
torch.float64: "dbl",
|
||
|
}
|
||
|
|
||
|
|
||
|
def play_audio(
|
||
|
waveform: torch.Tensor,
|
||
|
sample_rate: Optional[float],
|
||
|
device: Optional[str] = None,
|
||
|
) -> None:
|
||
|
"""Plays audio through specified or available output device.
|
||
|
|
||
|
.. warning::
|
||
|
This function is currently only supported on MacOS, and requires
|
||
|
libavdevice (FFmpeg) with ``audiotoolbox`` output device.
|
||
|
|
||
|
.. note::
|
||
|
This function can play up to two audio channels.
|
||
|
|
||
|
Args:
|
||
|
waveform: Tensor containing the audio to play.
|
||
|
Expected shape: `(time, num_channels)`.
|
||
|
sample_rate: Sample rate of the audio to play.
|
||
|
device: Output device to use. If None, the default device is used.
|
||
|
"""
|
||
|
|
||
|
if platform == "darwin":
|
||
|
device = device or "audiotoolbox"
|
||
|
path = "-"
|
||
|
else:
|
||
|
raise ValueError(f"This function only supports MacOS, but current OS is {platform}")
|
||
|
|
||
|
available_devices = list(torchaudio.utils.ffmpeg_utils.get_output_devices().keys())
|
||
|
if device not in available_devices:
|
||
|
raise ValueError(f"Device {device} is not available. Available devices are: {available_devices}")
|
||
|
|
||
|
if waveform.dtype not in dict_format:
|
||
|
raise ValueError(f"Unsupported type {waveform.dtype}. The list of supported types is: {dict_format.keys()}")
|
||
|
format = dict_format[waveform.dtype]
|
||
|
|
||
|
if waveform.ndim != 2:
|
||
|
raise ValueError(f"Expected 2D tensor with shape `(time, num_channels)`, got {waveform.ndim}D tensor instead")
|
||
|
|
||
|
time, num_channels = waveform.size()
|
||
|
if num_channels > 2:
|
||
|
warnings.warn(
|
||
|
f"Expected up to 2 channels, got {num_channels} channels instead. "
|
||
|
"Only the first 2 channels will be played.",
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
# Write to speaker device
|
||
|
s = torchaudio.io.StreamWriter(dst=path, format=device)
|
||
|
s.add_audio_stream(sample_rate, num_channels, format=format)
|
||
|
|
||
|
# write audio to the device
|
||
|
block_size = 256
|
||
|
with s.open():
|
||
|
for i in range(0, time, block_size):
|
||
|
s.write_audio_chunk(0, waveform[i : i + block_size, :])
|