"""3D-MARCo Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
**3D-MARCo: database of 3D sound recordings of musical performances and room impulse responses**
*Created By:*
| Hyunkook Lee, Dale Johnson, Bogdan Bacila.
| Centre for Audio and Psychoacoustic Engineering, University of Huddersfield.
Version 1.0.1
*Description:*
3D-MARCo is an open-access database of 3D sound recordings of musical performances and room impulse responses.
The recordings were made in the St. Paul's concert hall in Huddersfield, UK
A total of 71 microphone capsules were used simultaneously.
The main microphone arrays included in the database comprise PCMA-3D, OCT-3D, 2L-Cube, Decca Cubioid, First-order Ambisonics (FOA), Higher-order Ambisonics (HOA) and Hamasaki Square with height.
In addition, ORTF, side/height, Voice of God and floor channels as well as a dummy head and spot microphones are included.
The sound sources recorded are string quartet, piano trio, piano solo, organ, a cappella group, various single sources and room impulse responses of a virtual ensemble with 13 source positions captured by all of the microphones.
3D-MARCo would be useful for spatial audio research, recording education, critical ear training, etc.
*Audio Files Included:*
* For each musical performance sound source (Acappella, Organ, Piano Solo 1, Piano solo 2, Quartet, Trio), there are 65 wav files that correspond to:
* 64 individual capsules (24-bit / 96kHz resolution)
* one 32-channel EigenMike file in A-format (24-bit / 48kHz resolution).
* The piano recordings contain two more channels (left and right) that correspond to spot microphones placed just outside the piano pointing toward the hammers.
* The quartet recordings contain four more channels corresponding to spot microphones placed above the instruments (violin 1, violin 2, cello, viola) pointing toward the F hole.
* The trio recordins contain four more channels corresponding to spot microphones, two placed above the string instruments (violin, cello) pointing toward the F hole, and two placed just outside the piano pointing toward the hammers.
* The single sources were recorded at 7 different azimuth angles. For each angle there are also 65 wav files.
* The impulse responses were recorded at 13 different azimuth angles. For each angle there are 66 wav files. The extra one is the EigenMike 4th-order B-format ambisonics (ACN SN3D; 24-bit / 48kHz resolution).
*Annotations Included:*
* No event labels associated with this dataset
* No predefined training, validation, or testing splits.
* Angular orientation for "impulse responses" and "single sources" (follows the ITU-R convention where positive angles in the left-hand side and negative angles in the right-hand side, e.g. +30° for Front Left and -30° for Front Right).
*Please Acknowledge 3D-MARCo in Academic Research:*
If you use this dataset please cite its original publication:
.. code-block:: latex
Lee H, Johnson D. An open-access database of 3D microphone array recordings. InAudio Engineering Society Convention 147 2019 Oct 8. Audio Engineering Society.
*License:*
* CC-BY NC 3.0 license (free to share and adapt the material, but not permitted to use it for commercial purposes)
"""
import os
from typing import BinaryIO, Optional, TextIO, Tuple
import librosa
import numpy as np
import csv
import jams
import json
import glob
import numbers
from itertools import cycle
from soundata import download_utils, jams_utils, core, annotations, io
BIBTEX = """
@inproceedings{lee2019open,
title={An open-access database of 3D microphone array recordings},
author={Lee, Hyunkook and Johnson, Dale},
booktitle={Audio Engineering Society Convention 147},
year={2019},
organization={Audio Engineering Society}
}
"""
REMOTES = {
"ImpulseResponses": download_utils.RemoteFileMetadata(
filename="03 3D-MARCo Impulse Responses.zip",
url="https://zenodo.org/record/3477602/files/03%203D-MARCo%20Impulse%20Responses.zip?download=1",
checksum="d328425ee2d1e847e225d78b676cd81e",
),
"Quartet": download_utils.RemoteFileMetadata(
filename="04 3D-MARCo Samples_Quartet.zip",
url="https://zenodo.org/record/3477602/files/04%203D-MARCo%20Samples_Quartet.zip?download=1",
checksum="cce3442ae5a11ea869412c2e6a4cadcd",
),
"Trio": download_utils.RemoteFileMetadata(
filename="05 3D-MARCo Samples_Trio.zip",
url="https://zenodo.org/record/3477602/files/05%203D-MARCo%20Samples_Trio.zip?download=1",
checksum="48262496ecb6a32843e4b69393eeeec1",
),
"Organ": download_utils.RemoteFileMetadata(
filename="06 3D-MARCo Samples_Organ.zip",
url="https://zenodo.org/record/3477602/files/06%203D-MARCo%20Samples_Organ.zip?download=1",
checksum="cd015829e0a2bfc0aac239adc2b86321",
),
"PianoSolo1": download_utils.RemoteFileMetadata(
filename="07 3D-MARCo Samples_Piano solo 1.zip",
url="https://zenodo.org/record/3477602/files/07%203D-MARCo%20Samples_Piano%20solo%201.zip?download=1",
checksum="4a27da19a0bc857967e47b0044abf128",
),
"PianoSolo2": download_utils.RemoteFileMetadata(
filename="08 3D-MARCo Samples_Piano solo 2.zip",
url="https://zenodo.org/record/3477602/files/08%203D-MARCo%20Samples_Piano%20solo%202.zip?download=1",
checksum="7372b3a1273bcf10ade09472c3a92eed",
),
"Acappella": download_utils.RemoteFileMetadata(
filename="09 3D-MARCo Samples_Acappella.zip",
url="https://zenodo.org/record/3477602/files/09%203D-MARCo%20Samples_Acappella.zip?download=1",
checksum="9ce5a1e973fa04c084495f509f855225",
),
"SingleSources": download_utils.RemoteFileMetadata(
filename="10 3D-MARCo Samples_Single sources.zip",
url="https://zenodo.org/record/3477602/files/10%203D-MARCo%20Samples_Single%20sources.zip?download=1",
checksum="389e774c829a0729047bd8802021b239",
),
}
LICENSE_INFO = """
CC-BY NC 3.0 license
"""
[docs]class Clip(core.Clip):
"""3D-MARCo Clip class
Args:
clip_id (str): id of the clip
Attributes:
source_label (str): label of the source being recorded
source_angle (str): angle of the source being recorded
audio_path (str): path to the audio file
clip_id (str): clip id
microphone_info (list): list of strings with all relevant microphone metadata
"""
def __init__(self, clip_id, data_home, dataset_name, index, metadata):
super().__init__(clip_id, data_home, dataset_name, index, metadata)
self.audio_path = self.get_path("audio")
source_label = self._clip_metadata.get("source_label")
self.source_label = source_label
source_angle = self._clip_metadata.get("source_angle")
if source_angle is None:
self.source_angle = None
else:
self.source_angle = source_angle
self.microphone_info = self._clip_metadata.get("microphone_info")
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The clip's audio
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)
[docs] def to_jams(self):
"""Get the clip's data in jams format
Returns:
jams.JAMS: the clip's data in jams format
"""
return jams_utils.jams_converter(
audio_path=self.audio_path, tags=None, metadata=self._clip_metadata
)
[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO, sr=48000) -> Tuple[np.ndarray, float]:
"""Load a 3D-MARCo audio file
Args:
fhandle (str or file-like): file-like object or path to audio file
sr (int or None): sample rate for loaded audio, 48000 by default, which re-samples all files except the EigenMike ones, resulting in constant sampling rate between all clips in the dataset.
Returns:
* np.ndarray - the audio signal
* float - The sample rate of the audio file
"""
audio, sr = librosa.load(fhandle, sr=sr, mono=False)
return audio, sr
[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""The 3D-MARCo dataset"""
def __init__(self, data_home=None):
super().__init__(
data_home,
name="marco",
clip_class=Clip,
bibtex=BIBTEX,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
[docs] @core.copy_docs(load_audio)
def load_audio(self, *args, **kwargs):
return load_audio(*args, **kwargs)
@core.cached_property
def _metadata(self):
# parsing the data from the filenames due to lack of metadata file
json_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "indexes/marco_index.json"
)
metadata_index = {}
with open(json_path) as f:
marco_index = json.load(f)
all_paths_filenames = list(marco_index["clips"].keys())
for path_filename in all_paths_filenames:
clip_id = path_filename
path, filename = path_filename.split("/")
source_label = path
clip_metadata = filename.split("_")
# remove arbitrary clip numbering used by dataset authors
clip_metadata = [
data for data in clip_metadata if data != "" and data[0] != "0"
]
microphone_info = clip_metadata[1:]
if "deg" in clip_metadata[0]:
source_angle = "".join(clip_metadata[0].partition("deg")[:2])
else:
source_angle = None
metadata_index[clip_id] = {
"source_label": source_label,
"source_angle": source_angle,
"microphone_info": microphone_info,
}
return metadata_index