"""DCASE-BIOACOUSTIC Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
**DCASE-BIOACOUSTIC**
*Development set:*
The development set for task 5 of DCASE 2022 "Few-shot Bioacoustic Event Detection" consists of 192 audio files acquired from different bioacoustic sources. The dataset is split into training and validation sets.
Multi-class annotations are provided for the training set with positive (POS), negative (NEG) and unkwown (UNK) values for each class. UNK indicates uncertainty about a class.
Single-class (class of interest) annotations are provided for the validation set, with events marked as positive (POS) or unkwown (UNK) provided for the class of interest.
This version (3) fixes issues with annotations from HB set. Development_Set_Annotations.zip has the same structure but contains only the .csv files.
*Annotation structure*
Each line of the annotation csv represents an event in the audio file. The column descriptions are as follows:
TRAINING SET: Audiofilename, Starttime, Endtime, CLASS_1, CLASS_2, ...CLASS_N
VALIDATION SET: Audiofilename, Starttime, Endtime, Q
*Classes*
DCASE2022_task5_training_set_classes.csv and DCASE2022_task5_validation_set_classes.csv provide a table with class code correspondence to class name for all classes in the Development set.
DCASE2022_task5_training_set_classes.csv: dataset, class_code, class_name
DCASE2022_task5_validation_set_classes.csv: dataset, recording, class_code, class_name
*Evaluation set*
The evaluation set for task 5 of DCASE 2022 "Few-shot Bioacoustic Event Detection" consists of 46 audio files acquired from different bioacoustic sources.
The first 5 annotations are provided for each file, with events marked as positive (POS) for the class of interest.
This dataset is to be used for evaluation purposes during the task and the rest of the annotations will be released after the end of the DCASE 2022 challenge (July 1st).
Evaluation_Set_5shots.zip has the same structure but contains only the .wav files.
Evaluation_Set_5shots_annotations_only.zip has the same structure but contains only the .csv files
The subfolders denote different recording sources and there may or may not be overlap between classes of interest from different wav files.
Annotation structure
Each line of the annotation csv represents an event in the audio file. The column descriptions are as follows:
[ Audiofilename, Starttime, Endtime, Q ]
*Open Access:*
This dataset is available under a Creative Commons Attribution 4.0 International (CC BY 4.0) license.
*Contact info:*
Please send any feedback or questions to:
Ines Nolasco - i.dealmeidanolasco@qmul.ac.uk
"""
import os
from typing import BinaryIO, Optional, TextIO, Tuple
import librosa
import numpy as np
import csv
import jams
import glob
import json
from soundata import download_utils
from soundata import jams_utils
from soundata import core
from soundata import annotations
from soundata import io
BIBTEX = """
@dataset{nolasco_ines_2022_6482837,
author = {Nolasco, Ines and
Singh, Shubhr and
Strandburg-Peshkin, Ariana and
Gill, Lisa and
Pamula, Hanna and
Morford, Joe and
Emmerson, Michael and
Jensen, Frants and
Whitehead, Helen and
Kiskin, Ivan and
VidaƱa-Vila, Ester and
Lostanlen, Vincent and
Morfi, Veronica and
Stowell, Dan},
title = {{DCASE 2022 Task 5: Few-shot Bioacoustic Event
Detection Development Set}},
month = mar,
year = 2022,
publisher = {Zenodo},
doi = {10.5281/zenodo.6482837},
url = {https://doi.org/10.5281/zenodo.6482837}
}
"""
REMOTES = {
"dev": download_utils.RemoteFileMetadata(
filename="Development_Set.zip",
url="https://zenodo.org/record/6482837/files/Development_Set.zip?download=1",
checksum="cf4d3540c6c78ac2b3df2026c4f1f7ea",
# unpack_directories=["URBAN-SED_v2.0.0"],
),
"train-classes": download_utils.RemoteFileMetadata(
filename="DCASE2022_task5_Training_set_classes.csv",
url="https://zenodo.org/record/6482837/files/DCASE2022_task5_Training_set_classes.csv?download=1",
checksum="abce1818ba10436971bad0b6a3464aa6",
# unpack_directories=["URBAN-SED_v2.0.0"],
),
"validation-classes": download_utils.RemoteFileMetadata(
filename="DCASE2022_task5_Validation_set_classes.csv",
url="https://zenodo.org/record/6482837/files/DCASE2022_task5_Validation_set_classes.csv?download=1",
checksum="0c05ff0c9e1662ff8958c4c812abffdb",
# unpack_directories=["URBAN-SED_v2.0.0"],
),
"eval": download_utils.RemoteFileMetadata(
filename="Evaluation_set_5shots.zip",
url="https://zenodo.org/record/6517414/files/Evaluation_set_5shots.zip?download=1",
checksum="5212c0e133874bba1ee25c81ced0de99",
# unpack_directories=["URBAN-SED_v2.0.0"],
),
}
LICENSE_INFO = "Creative Commons Attribution 4.0 International"
[docs]class Clip(core.Clip):
"""DCASE bioacoustic Clip class
Args:
clip_id (str): id of the clip
Attributes:
audio (np.ndarray, float): path to the audio file
audio_path (str): path to the audio file
csv_path (str): path to the csv file
clip_id (str): clip id
split (str): subset the clip belongs to (for experiments): train, validate, or test
Cached properties:
events_classes (list): list of classes annotated for the file
events (soundata.annotations.Events): sound events with start time, end time, labels (list for all classes) and confidence
POSevents (soundata.annotations.Events): sound events for the positive class with start time, end time, label and confidence
"""
def __init__(self, clip_id, data_home, dataset_name, index, metadata):
super().__init__(clip_id, data_home, dataset_name, index, metadata)
self.audio_path = self.get_path("audio")
self.csv_path = self.get_path("csv")
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The clip's audio
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)
@property
def split(self):
"""The data splits (e.g. train)
Returns
* str - split
"""
return self._clip_metadata.get("split")
@property
def subdataset(self):
"""The (sub)dataset
Returns
* str - subdataset
"""
return self._clip_metadata.get("subdataset")
@core.cached_property
def events_classes(self) -> Optional[list]:
"""The audio events
Returns
* list - list of the annotated events
"""
return load_events_classes(self.csv_path)
@core.cached_property
def events(self) -> Optional[annotations.Events]:
"""The audio events
Returns
* annotations.Events - audio event object
"""
return load_events(self.csv_path)
@core.cached_property
def POSevents(self) -> Optional[annotations.Events]:
"""The audio events for POS (positive) class
Returns
* annotations.Events - audio event object
"""
return load_POSevents(self.csv_path)
[docs] def to_jams(self):
"""Get the clip's data in jams format
Returns:
jams.JAMS: the clip's data in jams format
"""
return jams_utils.jams_converter(
audio_path=self.audio_path,
events=self.events,
metadata={
"split": self._clip_metadata.get("split"),
"subdataset": self._clip_metadata.get("subdataset"),
},
)
[docs]@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO, sr=None) -> Tuple[np.ndarray, float]:
"""Load a DCASE bioacoustic audio file.
Args:
fhandle (str or file-like): File-like object or path to audio file
sr (int or None): sample rate for loaded audio, None by default, which
uses the file's original sample rate without resampling.
Returns:
* np.ndarray - the mono audio signal
* float - The sample rate of the audio file
"""
audio, sr = librosa.load(fhandle, sr=sr, mono=True)
return audio, sr
[docs]@io.coerce_to_string_io
def load_events(fhandle: TextIO) -> annotations.Events:
"""Load an DCASE bioacoustic sound events annotation file
Args:
fhandle (str or file-like): File-like object or path to the sound events annotation file
Raises:
IOError: if csv_path doesn't exist
Returns:
Events: sound events annotation data
"""
times = []
labels = []
confidence = []
reader = csv.reader(fhandle, delimiter=",")
headers = next(reader)
class_ids = headers[3:]
for line in reader:
times.append([float(line[1]), float(line[2])])
classes = [class_ids[i] for i, l in enumerate(line[3:])]
labels.append(",".join(classes))
confidence.append(1.0)
events_data = annotations.Events(
intervals=np.array(times),
intervals_unit="seconds",
labels=labels,
labels_unit="open",
confidence=np.array(confidence),
)
return events_data
[docs]@io.coerce_to_string_io
def load_POSevents(fhandle: TextIO) -> annotations.Events:
"""Load an DCASE bioacoustic sound events annotation file, just for POS labels
Args:
fhandle (str or file-like): File-like object or path to the sound events annotation file
Raises:
IOError: if csv_path doesn't exist
Returns:
Events: sound events annotation data
"""
times = []
labels = []
confidence = []
reader = csv.reader(fhandle, delimiter=",")
headers = next(reader)
class_ids = headers[3:]
for line in reader:
times.append([float(line[1]), float(line[2])])
classes = [class_ids[i] for i, l in enumerate(line[3:]) if l == "POS"]
labels.append(",".join(classes))
confidence.append(1.0)
events_data = annotations.Events(
intervals=np.array(times),
intervals_unit="seconds",
labels=labels,
labels_unit="open",
confidence=np.array(confidence),
)
return events_data
[docs]@io.coerce_to_string_io
def load_events_classes(fhandle: TextIO) -> list:
"""Load an DCASE bioacoustic sound events annotation file
Args:
fhandle (str or file-like): File-like object or path to the sound events annotation file
positive (bool): False get all labels, True get just POS labels
Raises:
IOError: if csv_path doesn't exist
Returns:
class_ids: list of events classes
"""
reader = csv.reader(fhandle, delimiter=",")
headers = next(reader)
class_ids = headers[3:]
return class_ids
[docs]@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""The DCASE bioacoustic dataset"""
def __init__(self, data_home=None):
super().__init__(
data_home,
name="dcase_bioacoustic",
clip_class=Clip,
bibtex=BIBTEX,
remotes=REMOTES,
license_info=LICENSE_INFO,
)
[docs] @core.copy_docs(load_audio)
def load_audio(self, *args, **kwargs):
return load_audio(*args, **kwargs)
@core.cached_property
def _metadata(self):
metadata_index = {
clip_id: {
"subdataset": os.path.normpath(v["csv"][0])
.split(clip_id)[0]
.split(os.path.sep)[-2],
"split": (
"train"
if "Training" in os.path.normpath(v["csv"][0]).split(clip_id)[0]
else (
"validation"
if "Validation"
in os.path.normpath(v["csv"][0]).split(clip_id)[0]
else "evaluation"
)
),
}
for clip_id, v in self._index["clips"].items()
}
metadata_paths = {
"train": os.path.join(
self.data_home, "DCASE2022_task5_Training_set_classes.csv"
),
"validation": os.path.join(
self.data_home, "DCASE2022_task5_Validation_set_classes.csv"
),
}
metadata_index["class_codes"] = {}
metadata_index["subdatasets"] = {}
for split, metadata_path in metadata_paths.items():
metadata_path = os.path.normpath(metadata_path)
if not os.path.exists(metadata_path):
raise FileNotFoundError("Metadata not found. Did you run .download()?")
with open(metadata_path, "r") as fhandle:
reader = csv.reader(fhandle, delimiter=",")
headers = next(reader)
class_code_id = headers.index("class_code")
class_name_id = headers.index("class_name")
dataset_id = headers.index("dataset")
for line in reader:
metadata_index["class_codes"][line[class_code_id]] = {
"subdataset": line[dataset_id],
"class_name": line[class_name_id],
"split": split,
}
if line[dataset_id] not in metadata_index["subdatasets"]:
metadata_index["subdatasets"][line[dataset_id]] = [
line[class_code_id]
]
else:
metadata_index["subdatasets"][line[dataset_id]].append(
line[class_code_id]
)
return metadata_index