Source code for soundata.datasets.dcase23_task2

"""DCASE23_Task2 Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    *Created By*  
        | Noboru Harada, Daisuke Niizumi, Yasunori Ohishi, Daiki Takeuchi, and Masahiro Yasuda (Hitachi, Ltd. and NTT Corporation).

    *Version*  
        1.0 

    *Description*  
        The DCASE 2023 Task 2 "First-Shot Unsupervised Anomalous Sound Detection for Machine Condition Monitoring" dataset provides the operating sounds of seven real/toy machines: ToyCar, ToyTrain, Fan, Gearbox, Bearing, Slide rail, and Valve. Each recording is a single-channel, 10-second audio that includes both a machine's operating sound and environmental noise. The dataset contains training clips containing normal sounds in the source and target domain and test clips of both normal and anomalous sounds. 

    *Audio Files Included*  
        10,000 ten-second audio recordings for each machine type in WAV format. The `raw` directory contains recordings as WAV files, with the source/target domain and attributes provided in the file name.

    *Meta-data Files Included*  
        Attribute csv files accompany the audio files for easy access to attributes that cause domain shifts. Each file lists the file names, domain shift parameters, and the value or type of these parameters.

    *Please Acknowledge DCASE 2023 Task 2 in Academic Research*  
        When the DCASE 2023 Task 2 dataset is used for academic research, we would highly appreciate it if scientific publications of works partly based on this dataset cite the following publications:

        .. code-block:: latex
            Noboru Harada, Daisuke Niizumi, Yasunori Ohishi, Daiki Takeuchi, and Masahiro Yasuda. "First-shot anomaly detection for machine condition monitoring: A domain generalization baseline", arXiv e-prints: 2303.00455, 2023.
            Kota Dohi, Tomoya Nishida, Harsh Purohit, Ryo Tanabe, Takashi Endo, Masaaki Yamamoto, Yuki Nikaido, and Yohei Kawaguchi. "MIMII DG: sound dataset for malfunctioning industrial machine investigation and inspection for domain generalization task", Proceedings of the 7th Detection and Classification of Acoustic Scenes and Events 2022 Workshop (DCASE2022), 31-35. Nancy, France, November 2022.
            Noboru Harada, Daisuke Niizumi, Daiki Takeuchi, Yasunori Ohishi, Masahiro Yasuda, and Shoichiro Saito. "ToyADMOS2: another dataset of miniature-machine operating sounds for anomalous sound detection under domain shift conditions", Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021), 1–5. Barcelona, Spain, November 2021.

    *Conditions of Use*  
        The DCASE 2023 Task 2 dataset was created jointly by Hitachi, Ltd. and NTT Corporation. It is available under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license.

    *Feedback*  
        For any issues or feedback regarding the dataset, please reach out to:  
        | * Kota Dohi: kota.dohi.gr@hitachi.com  
        | * Keisuke Imoto: keisuke.imoto@ieee.org  
        | * Noboru Harada: noboru@ieee.org  
        | * Daisuke Niizumi: daisuke.niizumi.dt@hco.ntt.co.jp  
        | * Yohei Kawaguchi: yohei.kawaguchi.xk@hitachi.com.  
"""

import os
from typing import BinaryIO, Optional, TextIO, Tuple

import librosa
import numpy as np
import csv

from soundata import download_utils
from soundata import jams_utils
from soundata import core
from soundata import annotations
from soundata import io


BIBTEX = """
@article{harada2023firstshot,
  title={First-shot anomaly detection for machine condition monitoring: A domain generalization baseline},
  author={Harada, Noboru and Niizumi, Daisuke and Ohishi, Yasunori and Takeuchi, Daiki and Yasuda, Masahiro},
  journal={arXiv e-prints},
  volume={2303.00455},
  year={2023},
}

@inproceedings{dohi2022mimii,
  title={MIMII DG: sound dataset for malfunctioning industrial machine investigation and inspection for domain generalization task},
  author={Dohi, Kota and Nishida, Tomoya and Purohit, Harsh and Tanabe, Ryo and Endo, Takashi and Yamamoto, Masaaki and Nikaido, Yuki and Kawaguchi, Yohei},
  booktitle={Proceedings of the 7th Detection and Classification of Acoustic Scenes and Events 2022 Workshop (DCASE2022)},
  pages={31-35},
  year={2022},
  address={Nancy, France},
  month={November},
}

@inproceedings{harada2021toyadmos2,
  title={ToyADMOS2: another dataset of miniature-machine operating sounds for anomalous sound detection under domain shift conditions},
  author={Harada, Noboru and Niizumi, Daisuke and Takeuchi, Daiki and Ohishi, Yasunori and Yasuda, Masahiro and Saito, Shoichiro},
  booktitle={Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)},
  pages={1–5},
  year={2021},
  address={Barcelona, Spain},
  month={November},
}

"""
REMOTES = {
    "dev_bearing": download_utils.RemoteFileMetadata(
        filename="dev_bearing.zip",
        url="https://zenodo.org/records/7882613/files/dev_bearing.zip?download=1",
        checksum="8a813bc8d8f156b5395bfccdfac7673c",
        destination_dir="7882613",
    ),
    "dev_fan": download_utils.RemoteFileMetadata(
        filename="dev_fan.zip",
        url="https://zenodo.org/records/7882613/files/dev_fan.zip?download=1",
        checksum="9348591e96fb0ad499a1e33b082562fc",
        destination_dir="7882613",
    ),
    "dev_gearbox": download_utils.RemoteFileMetadata(
        filename="dev_gearbox.zip",
        url="https://zenodo.org/records/7882613/files/dev_gearbox.zip?download=1",
        checksum="b6e55f6a31faa0fc8569ec0afdd53ccf",
        destination_dir="7882613",
    ),
    "dev_slider": download_utils.RemoteFileMetadata(
        filename="dev_slider.zip",
        url="https://zenodo.org/records/7882613/files/dev_slider.zip?download=1",
        checksum="b3f8dee36b4718c36d659a4fd1c4afe0",
        destination_dir="7882613",
    ),
    "dev_ToyCar": download_utils.RemoteFileMetadata(
        filename="dev_ToyCar.zip",
        url="https://zenodo.org/records/7882613/files/dev_ToyCar.zip?download=1",
        checksum="4e3bf15f4101ed4ed4f1fecde2e2b2a3",
        destination_dir="7882613",
    ),
    "dev_ToyTrain": download_utils.RemoteFileMetadata(
        filename="dev_ToyTrain.zip",
        url="https://zenodo.org/records/7882613/files/dev_ToyTrain.zip?download=1",
        checksum="6b02a6c65eebb3b8b1ae59a6b25bb897",
        destination_dir="7882613",
    ),
    "dev_valve": download_utils.RemoteFileMetadata(
        filename="dev_valve.zip",
        url="https://zenodo.org/records/7882613/files/dev_valve.zip?download=1",
        checksum="b2051a2022eadb53cd97581120811cae",
        destination_dir="7882613",
    ),
    "add_train_bandsaw": download_utils.RemoteFileMetadata(
        filename="eval_data_bandsaw_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_bandsaw_train.zip?download=1",
        checksum="9274dfe63de028743823f1123f8b4b47",
        destination_dir="7830345",
    ),
    "add_train_grinder": download_utils.RemoteFileMetadata(
        filename="eval_data_grinder_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_grinder_train.zip?download=1",
        checksum="17569c1f9df23621a0dbabc430684a35",
        destination_dir="7830345",
    ),
    "add_train_shaker": download_utils.RemoteFileMetadata(
        filename="eval_data_shaker_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_shaker_train.zip?download=1",
        checksum="35f821b5645b731fb5a1750e33b95fc3",
        destination_dir="7830345",
    ),
    "add_train_ToyDrone": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyDrone_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_ToyDrone_train.zip?download=1",
        checksum="7fea367d1384a1521ae24f72203238de",
        destination_dir="7830345",
    ),
    "add_train_ToyNscale": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyNscale_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_ToyNscale_train.zip?download=1",
        checksum="9332822f3e47afd984c01f2ecb5ca3af",
        destination_dir="7830345",
    ),
    "add_train_ToyTank": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyTank_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_ToyTank_train.zip?download=1",
        checksum="b1fd3ab7de7561290df2d477de1c9d33",
        destination_dir="7830345",
    ),
    "add_train_Vacuum": download_utils.RemoteFileMetadata(
        filename="eval_data_Vacuum_train.zip",
        url="https://zenodo.org/records/7830345/files/eval_data_Vacuum_train.zip?download=1",
        checksum="1c8de33d9a8c7850a1f7aaddb97d87be",
        destination_dir="7830345",
    ),
    "eval_bandsaw": download_utils.RemoteFileMetadata(
        filename="eval_data_bandsaw_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_bandsaw_test.zip?download=1",
        checksum="2a8e8f39f6584ab366a8f4da52d4d7a6",
        destination_dir="7860847",
    ),
    "eval_grinder": download_utils.RemoteFileMetadata(
        filename="eval_data_grinder_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_grinder_test.zip?download=1",
        checksum="631b3e1608b6077772829a6e68c82c77",
        destination_dir="7860847",
    ),
    "eval_shaker": download_utils.RemoteFileMetadata(
        filename="eval_data_shaker_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_shaker_test.zip?download=1",
        checksum="ba98c98caa96051ec80e24e44b8fca56",
        destination_dir="7860847",
    ),
    "eval_ToyDrone": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyDrone_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_ToyDrone_test.zip?download=1",
        checksum="fdae7b8d1f4cadb2bea88bc93e2367db",
        destination_dir="7860847",
    ),
    "eval_ToyNscale": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyNscale_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_ToyNscale_test.zip?download=1",
        checksum="62f5f5043d8fb3a305b1c2e1025872de",
        destination_dir="7860847",
    ),
    "eval_ToyTank": download_utils.RemoteFileMetadata(
        filename="eval_data_ToyTank_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_ToyTank_test.zip?download=1",
        checksum="f5639bf58c47169c622751f19c6fc321",
        destination_dir="7860847",
    ),
    "eval_Vacuum": download_utils.RemoteFileMetadata(
        filename="eval_data_Vacuum_test.zip",
        url="https://zenodo.org/records/7860847/files/eval_data_Vacuum_test.zip?download=1",
        checksum="a32524fd8c45b574a560685b38acc4e1",
        destination_dir="7860847",
    ),
}


LICENSE_INFO = "Creative Commons Attribution Non Commercial 4.0 International"


[docs]class Clip(core.Clip): """DCASE23_Task2 Clip class Args: clip_id (str): ID of the clip Attributes: audio (np.ndarray, float): Array representation of the audio clip audio_path (str): Path to the audio file file_name (str): Name of the clip file, useful for cross-referencing d1p (str): First domain shift parameter specifying the attribute causing the domain shift d1v (str): First domain shift value or type associated with the domain shift parameter """ def __init__(self, clip_id, data_home, dataset_name, index, metadata): super().__init__(clip_id, data_home, dataset_name, index, metadata) self.audio_path = self.get_path("audio") @property def audio(self) -> Optional[Tuple[np.ndarray, float]]: """The clip's audio Returns: * np.ndarray - audio signal * float - sample rate """ return load_audio(self.audio_path) @property def file_name(self): """The clip's file name. Used for cross-referencing with attribute CSV files for additional metadata. Returns: * str - name of the clip file """ return self._clip_metadata.get("file_name") @property def d1p(self): """The clip's first domain shift parameter (d1p). Returns: * str - first domain shift parameter of the clip """ return self._clip_metadata.get("d1p") @property def d1v(self): """The clip's first domain shift value (d1v). Returns: * str - first domain shift value of the clip """ return self._clip_metadata.get("d1v")
[docs] def to_jams(self): """Get the clip's data in jams format Returns: jams.JAMS: the clip's data in jams format """ return jams_utils.jams_converter( audio_path=self.audio_path, metadata=self._clip_metadata )
[docs]@io.coerce_to_bytes_io def load_audio(fhandle: BinaryIO, sr=44100) -> Tuple[np.ndarray, float]: """Load a DCASE23_Task2 audio file. Args: fhandle (str or file-like): File-like object or path to audio file sr (int or None): sample rate for loaded audio, 44100 Hz by default. If different from file's sample rate it will be resampled on load. Use None to load the file using its original sample rate (sample rate varies from file to file). Returns: * np.ndarray - the mono audio signal * float - The sample rate of the audio file """ audio, sr = librosa.load(fhandle, sr=sr, mono=True) return audio, sr
[docs]@core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The DCASE23_Task2 dataset """ def __init__(self, data_home=None): super().__init__( data_home, name="dcase23_task2", clip_class=Clip, bibtex=BIBTEX, remotes=REMOTES, license_info=LICENSE_INFO, )
[docs] @core.copy_docs(load_audio) def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs)
@core.cached_property def _metadata(self): machines_dev = [ "fan", "gearbox", "bearing", "slider", "ToyCar", "ToyTrain", "valve", ] machines_add_train = [ "Vacuum", "ToyTank", "ToyNscale", "ToyDrone", "bandsaw", "grinder", "shaker", ] metadata_index = {} # Loop through each machine type for dev_data for machine in machines_dev: # Paths for metadata files metadata_dev_path = os.path.join( self.data_home, "7882613", machine, "attributes_00.csv" ) # Check for file existence if not os.path.exists(metadata_dev_path): raise FileNotFoundError( f"Development metadata for {machine} not found. Did you run .download()?" ) # Parsing development metadata for each machine with open(metadata_dev_path, "r") as f: reader = csv.reader(f, delimiter=",") next(reader) # skipping header for row in reader: key = row[0].split("/")[-1].replace(".wav", "") metadata_index[key] = { "file_name": row[0], "d1p": row[1], "d1v": row[2], } # Loop through each machine type for add_train_data for machine in machines_add_train: # Paths for metadata files metadata_add_train_path = os.path.join( self.data_home, "7830345", machine, "attributes_00.csv" ) # Parsing additional training metadata for each machine with open(metadata_add_train_path, "r") as f: reader = csv.reader(f, delimiter=",") next(reader) # skipping header for row in reader: key = row[0].split("/")[-1].replace(".wav", "") metadata_index[key] = { "file_name": row[0], "d1p": row[1], "d1v": row[2], } return metadata_index