Source code for soundata.datasets.dcase_bioacoustic

"""DCASE-BIOACOUSTIC Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    **DCASE-BIOACOUSTIC**

    *Development set:*

    The development set for task 5 of DCASE 2022 "Few-shot Bioacoustic Event Detection" consists of 192 audio files acquired from different bioacoustic sources. The dataset is split into training and validation sets. 

    Multi-class annotations are provided for the training set with positive (POS), negative (NEG) and unkwown (UNK) values for each class. UNK indicates uncertainty about a class. 

    Single-class (class of interest) annotations are provided for the validation set, with events marked as positive (POS) or unkwown (UNK) provided for the class of interest. 

    This version (3) fixes issues with annotations from HB set. Development_Set_Annotations.zip has the same structure but contains only the .csv files.

    
    *Annotation structure*

    Each line of the annotation csv represents an event in the audio file. The column descriptions are as follows:

    TRAINING SET: Audiofilename, Starttime, Endtime, CLASS_1, CLASS_2, ...CLASS_N

    VALIDATION SET: Audiofilename, Starttime, Endtime, Q

    

    *Classes*

    DCASE2022_task5_training_set_classes.csv and DCASE2022_task5_validation_set_classes.csv provide a table with class code correspondence to class name for all classes in the Development set.

    DCASE2022_task5_training_set_classes.csv: dataset, class_code, class_name

    DCASE2022_task5_validation_set_classes.csv: dataset, recording, class_code, class_name

    

    *Evaluation set*

    The evaluation set for task 5 of DCASE 2022 "Few-shot Bioacoustic Event Detection" consists of 46 audio files acquired from different bioacoustic sources. 

    The first 5 annotations are provided for each file, with events marked as positive (POS) for the class of interest. 

    This dataset is to be used for evaluation purposes during the task and the rest of the annotations will be released after the end of the DCASE 2022 challenge (July 1st).    

    Evaluation_Set_5shots.zip has the same structure but contains only the .wav files.

    Evaluation_Set_5shots_annotations_only.zip has the same structure but contains only the .csv files

    The subfolders denote different recording sources and there may or may not be overlap between classes of interest from different wav files.

    Annotation structure

    Each line of the annotation csv represents an event in the audio file. The column descriptions are as follows:
    [ Audiofilename, Starttime, Endtime, Q ]


    *Open Access:*

    This dataset is available under a Creative Commons Attribution 4.0 International (CC BY 4.0) license.
    

    *Contact info:*

    Please send any feedback or questions to:

    Ines Nolasco -  i.dealmeidanolasco@qmul.ac.uk
"""

import os
from typing import BinaryIO, Optional, TextIO, Tuple

import librosa
import numpy as np
import csv
import jams
import glob
import json

from soundata import download_utils
from soundata import jams_utils
from soundata import core
from soundata import annotations
from soundata import io


BIBTEX = """
@dataset{nolasco_ines_2022_6482837,
  author       = {Nolasco, Ines and
                  Singh, Shubhr and
                  Strandburg-Peshkin, Ariana and
                  Gill, Lisa and
                  Pamula, Hanna and
                  Morford, Joe and
                  Emmerson, Michael and
                  Jensen, Frants and
                  Whitehead, Helen and
                  Kiskin, Ivan and
                  VidaƱa-Vila, Ester and
                  Lostanlen, Vincent and
                  Morfi, Veronica and
                  Stowell, Dan},
  title        = {{DCASE 2022 Task 5: Few-shot Bioacoustic Event 
                   Detection Development Set}},
  month        = mar,
  year         = 2022,
  publisher    = {Zenodo},
  doi          = {10.5281/zenodo.6482837},
  url          = {https://doi.org/10.5281/zenodo.6482837}
}
"""
REMOTES = {
    "dev": download_utils.RemoteFileMetadata(
        filename="Development_Set.zip",
        url="https://zenodo.org/record/6482837/files/Development_Set.zip?download=1",
        checksum="cf4d3540c6c78ac2b3df2026c4f1f7ea",
        # unpack_directories=["URBAN-SED_v2.0.0"],
    ),
    "train-classes": download_utils.RemoteFileMetadata(
        filename="DCASE2022_task5_Training_set_classes.csv",
        url="https://zenodo.org/record/6482837/files/DCASE2022_task5_Training_set_classes.csv?download=1",
        checksum="abce1818ba10436971bad0b6a3464aa6",
        # unpack_directories=["URBAN-SED_v2.0.0"],
    ),
    "validation-classes": download_utils.RemoteFileMetadata(
        filename="DCASE2022_task5_Validation_set_classes.csv",
        url="https://zenodo.org/record/6482837/files/DCASE2022_task5_Validation_set_classes.csv?download=1",
        checksum="0c05ff0c9e1662ff8958c4c812abffdb",
        # unpack_directories=["URBAN-SED_v2.0.0"],
    ),
    "eval": download_utils.RemoteFileMetadata(
        filename="Evaluation_set_5shots.zip",
        url="https://zenodo.org/record/6517414/files/Evaluation_set_5shots.zip?download=1",
        checksum="5212c0e133874bba1ee25c81ced0de99",
        # unpack_directories=["URBAN-SED_v2.0.0"],
    ),
}

LICENSE_INFO = "Creative Commons Attribution 4.0 International"


[docs]class Clip(core.Clip): """DCASE bioacoustic Clip class Args: clip_id (str): id of the clip Attributes: audio (np.ndarray, float): path to the audio file audio_path (str): path to the audio file csv_path (str): path to the csv file clip_id (str): clip id split (str): subset the clip belongs to (for experiments): train, validate, or test Cached properties: events_classes (list): list of classes annotated for the file events (soundata.annotations.Events): sound events with start time, end time, labels (list for all classes) and confidence POSevents (soundata.annotations.Events): sound events for the positive class with start time, end time, label and confidence """ def __init__(self, clip_id, data_home, dataset_name, index, metadata): super().__init__(clip_id, data_home, dataset_name, index, metadata) self.audio_path = self.get_path("audio") self.csv_path = self.get_path("csv") @property def audio(self) -> Optional[Tuple[np.ndarray, float]]: """The clip's audio Returns: * np.ndarray - audio signal * float - sample rate """ return load_audio(self.audio_path) @property def split(self): """The data splits (e.g. train) Returns * str - split """ return self._clip_metadata.get("split") @property def subdataset(self): """The (sub)dataset Returns * str - subdataset """ return self._clip_metadata.get("subdataset") @core.cached_property def events_classes(self) -> Optional[list]: """The audio events Returns * list - list of the annotated events """ return load_events_classes(self.csv_path) @core.cached_property def events(self) -> Optional[annotations.Events]: """The audio events Returns * annotations.Events - audio event object """ return load_events(self.csv_path) @core.cached_property def POSevents(self) -> Optional[annotations.Events]: """The audio events for POS (positive) class Returns * annotations.Events - audio event object """ return load_POSevents(self.csv_path)
[docs] def to_jams(self): """Get the clip's data in jams format Returns: jams.JAMS: the clip's data in jams format """ return jams_utils.jams_converter( audio_path=self.audio_path, events=self.events, metadata={ "split": self._clip_metadata.get("split"), "subdataset": self._clip_metadata.get("subdataset"), }, )
[docs]@io.coerce_to_bytes_io def load_audio(fhandle: BinaryIO, sr=None) -> Tuple[np.ndarray, float]: """Load a DCASE bioacoustic audio file. Args: fhandle (str or file-like): File-like object or path to audio file sr (int or None): sample rate for loaded audio, None by default, which uses the file's original sample rate without resampling. Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ audio, sr = librosa.load(fhandle, sr=sr, mono=True) return audio, sr
[docs]@io.coerce_to_string_io def load_events(fhandle: TextIO) -> annotations.Events: """Load an DCASE bioacoustic sound events annotation file Args: fhandle (str or file-like): File-like object or path to the sound events annotation file Raises: IOError: if csv_path doesn't exist Returns: Events: sound events annotation data """ times = [] labels = [] confidence = [] reader = csv.reader(fhandle, delimiter=",") headers = next(reader) class_ids = headers[3:] for line in reader: times.append([float(line[1]), float(line[2])]) classes = [class_ids[i] for i, l in enumerate(line[3:])] labels.append(",".join(classes)) confidence.append(1.0) events_data = annotations.Events( intervals=np.array(times), intervals_unit="seconds", labels=labels, labels_unit="open", confidence=np.array(confidence), ) return events_data
[docs]@io.coerce_to_string_io def load_POSevents(fhandle: TextIO) -> annotations.Events: """Load an DCASE bioacoustic sound events annotation file, just for POS labels Args: fhandle (str or file-like): File-like object or path to the sound events annotation file Raises: IOError: if csv_path doesn't exist Returns: Events: sound events annotation data """ times = [] labels = [] confidence = [] reader = csv.reader(fhandle, delimiter=",") headers = next(reader) class_ids = headers[3:] for line in reader: times.append([float(line[1]), float(line[2])]) classes = [class_ids[i] for i, l in enumerate(line[3:]) if l == "POS"] labels.append(",".join(classes)) confidence.append(1.0) events_data = annotations.Events( intervals=np.array(times), intervals_unit="seconds", labels=labels, labels_unit="open", confidence=np.array(confidence), ) return events_data
[docs]@io.coerce_to_string_io def load_events_classes(fhandle: TextIO) -> list: """Load an DCASE bioacoustic sound events annotation file Args: fhandle (str or file-like): File-like object or path to the sound events annotation file positive (bool): False get all labels, True get just POS labels Raises: IOError: if csv_path doesn't exist Returns: class_ids: list of events classes """ reader = csv.reader(fhandle, delimiter=",") headers = next(reader) class_ids = headers[3:] return class_ids
[docs]@core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """The DCASE bioacoustic dataset""" def __init__(self, data_home=None): super().__init__( data_home, name="dcase_bioacoustic", clip_class=Clip, bibtex=BIBTEX, remotes=REMOTES, license_info=LICENSE_INFO, )
[docs] @core.copy_docs(load_audio) def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs)
@core.cached_property def _metadata(self): metadata_index = { clip_id: { "subdataset": os.path.normpath(v["csv"][0]) .split(clip_id)[0] .split(os.path.sep)[-2], "split": ( "train" if "Training" in os.path.normpath(v["csv"][0]).split(clip_id)[0] else ( "validation" if "Validation" in os.path.normpath(v["csv"][0]).split(clip_id)[0] else "evaluation" ) ), } for clip_id, v in self._index["clips"].items() } metadata_paths = { "train": os.path.join( self.data_home, "DCASE2022_task5_Training_set_classes.csv" ), "validation": os.path.join( self.data_home, "DCASE2022_task5_Validation_set_classes.csv" ), } metadata_index["class_codes"] = {} metadata_index["subdatasets"] = {} for split, metadata_path in metadata_paths.items(): metadata_path = os.path.normpath(metadata_path) if not os.path.exists(metadata_path): raise FileNotFoundError("Metadata not found. Did you run .download()?") with open(metadata_path, "r") as fhandle: reader = csv.reader(fhandle, delimiter=",") headers = next(reader) class_code_id = headers.index("class_code") class_name_id = headers.index("class_name") dataset_id = headers.index("dataset") for line in reader: metadata_index["class_codes"][line[class_code_id]] = { "subdataset": line[dataset_id], "class_name": line[class_name_id], "split": split, } if line[dataset_id] not in metadata_index["subdatasets"]: metadata_index["subdatasets"][line[dataset_id]] = [ line[class_code_id] ] else: metadata_index["subdatasets"][line[dataset_id]].append( line[class_code_id] ) return metadata_index