Skip to content

dataset

Classes

Dataset

Dataset(path: os.PathLike | None = None, **kwargs)

Dataset serves as a base class to download and provide unified access to datasets.

Parameters:

  • path (PathLike | None, default: None ) –

    Path to dataset base path. Defaults to None.

Example:

import numpy as np
import sleepkit as sk

class MyDataset(sk.Dataset):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    @property
    def name(self) -> str:
        return 'my-dataset'

    @property
    def sampling_rate(self) -> int:
        return 100

    def get_train_patient_ids(self) -> npt.NDArray:
        return np.arange(80)

    def get_test_patient_ids(self) -> npt.NDArray:
        return np.arange(80, 100)

    @contextlib.contextmanager
    def patient_data(self, patient_id: int) -> Generator[PatientData, None, None]:
        data = np.random.randn(1000)
        segs = np.random.randint(0, 1000, (10, 2))
        yield {"data": data, "segmentations": segs}

    def signal_generator(
        self,
        patient_generator: PatientGenerator,
        frame_size: int,
        samples_per_patient: int = 1,
        target_rate: int | None = None,
    ) -> Generator[npt.NDArray, None, None]:
        for patient in patient_generator:
            for _ in range(samples_per_patient):
                with self.patient_data(patient) as pt:
                    yield pt["data"]

    def download(self, num_workers: int | None = None, force: bool = False):
        pass

# Register dataset
sk.DatasetFactory.register("my-dataset", MyDataset)
Source code in sleepkit/datasets/dataset.py
def __init__(self, path: os.PathLike | None = None, **kwargs) -> None:
    """Dataset serves as a base class to download and provide unified access to datasets.

    Args:
        path (os.PathLike|None, optional): Path to dataset base path. Defaults to None.

    Example:

    ```python
    import numpy as np
    import sleepkit as sk

    class MyDataset(sk.Dataset):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)

        @property
        def name(self) -> str:
            return 'my-dataset'

        @property
        def sampling_rate(self) -> int:
            return 100

        def get_train_patient_ids(self) -> npt.NDArray:
            return np.arange(80)

        def get_test_patient_ids(self) -> npt.NDArray:
            return np.arange(80, 100)

        @contextlib.contextmanager
        def patient_data(self, patient_id: int) -> Generator[PatientData, None, None]:
            data = np.random.randn(1000)
            segs = np.random.randint(0, 1000, (10, 2))
            yield {"data": data, "segmentations": segs}

        def signal_generator(
            self,
            patient_generator: PatientGenerator,
            frame_size: int,
            samples_per_patient: int = 1,
            target_rate: int | None = None,
        ) -> Generator[npt.NDArray, None, None]:
            for patient in patient_generator:
                for _ in range(samples_per_patient):
                    with self.patient_data(patient) as pt:
                        yield pt["data"]

        def download(self, num_workers: int | None = None, force: bool = False):
            pass

    # Register dataset
    sk.DatasetFactory.register("my-dataset", MyDataset)
    ```

    """

    if path is None:
        path = os.environ.get("SK_DATASET_PATH", None)
    if path is None:
        raise ValueError("Root dataset path is not set")
    self.path = Path(path)

Attributes

subject_ids property
subject_ids: list[str]

Get dataset subject IDs

Returns:

  • list[str]

    list[str]: Subject IDs

train_subject_ids property
train_subject_ids: list[str]

Get train subject ids

test_subject_ids property
test_subject_ids: list[str]

Get test subject ids

Functions

uniform_subject_generator
uniform_subject_generator(
    subject_ids: list[str] | None = None, repeat: bool = True, shuffle: bool = True
) -> SubjectGenerator

Yield data for each subject in the array.

Parameters:

  • subject_ids (ArrayLike, default: None ) –

    Array of subject ids

  • repeat (bool, default: True ) –

    Whether to repeat generator. Defaults to True.

  • shuffle (bool, default: True ) –

    Whether to shuffle subject ids.. Defaults to True.

Returns:

  • SubjectGenerator ( SubjectGenerator ) –

    Subject generator

Yields:

  • SubjectGenerator

    Iterator[SubjectGenerator]

Source code in sleepkit/datasets/dataset.py
def uniform_subject_generator(
    self,
    subject_ids: list[str] | None = None,
    repeat: bool = True,
    shuffle: bool = True,
) -> SubjectGenerator:
    """Yield data for each subject in the array.

    Args:
        subject_ids (pt.ArrayLike): Array of subject ids
        repeat (bool, optional): Whether to repeat generator. Defaults to True.
        shuffle (bool, optional): Whether to shuffle subject ids.. Defaults to True.

    Returns:
        SubjectGenerator: Subject generator

    Yields:
        Iterator[SubjectGenerator]
    """
    raise NotImplementedError()
download
download(num_workers: int | None = None, force: bool = False)

Download dataset

This will download preprocessed HDF5 files from S3.

Parameters:

  • num_workers (int | None, default: None ) –
    parallel workers. Defaults to None.
  • force (bool, default: False ) –

    Force redownload. Defaults to False.

Source code in sleepkit/datasets/dataset.py
def download(self, num_workers: int | None = None, force: bool = False):
    """Download dataset

    This will download preprocessed HDF5 files from S3.

    Args:
        num_workers (int | None, optional): # parallel workers. Defaults to None.
        force (bool, optional): Force redownload. Defaults to False.
    """
    raise NotImplementedError()