HKDataloader(
ds: HKDataset,
frame_size: int = 1000,
sampling_rate: int = 100,
label_map: dict[int, int] | None = None,
label_type: str | None = None,
**kwargs
)
HKDataloader is used to create a task specific dataloader for a dataset.
This class should be subclassed for specific task and dataset. If multiple datasets are needed for given task,
multiple dataloaders can be created. To simplify the process, the dataloaders can be placed in an ItemFactory.
Parameters:
-
ds
(HKDataset)
–
-
frame_size
(int, default:
1000
)
–
Frame size. Defaults to 1000.
-
sampling_rate
(int, default:
100
)
–
Sampling rate. Defaults to 100.
-
label_map
(dict[int, int], default:
None
)
–
Label map. Defaults to None.
-
label_type
(str, default:
None
)
–
Label type. Defaults to None.
Example:
from typing import Generator
import numpy as np
import numpy.typing as npt
import heartkit as hk
class MyDataloader(hk.HKDataloader):
def __init__(self, ds: hk.HKDataset, **kwargs):
super().__init__(ds=ds, **kwargs)
def patient_generator(
self,
patient_id: int,
samples_per_patient: list[int],
) -> Generator[npt.NDArray, None, None]:
# Implement patient generator
with ds.patient_data(patient_id) as pt:
for _ in range(samples_per_patient):
data = pt["data"][:]
# Grab random frame and lead
lead = np.random.randint(0, data.shape[0])
start = np.random.randint(0, data.shape[1] - self.frame_size)
frame = data[lead, start : start + self.frame_size]
yield frame
def data_generator(
self,
patient_ids: list[int],
samples_per_patient: int | list[int],
shuffle: bool = False,
) -> Generator[npt.NDArray, None, None]:
for pt_id in helia.utils.uniform_id_generator(patient_ids, shuffle=shuffle):
# Implement data generator
yield data
# END FOR
Source code in heartkit/datasets/dataloader.py
| def __init__(
self,
ds: HKDataset,
frame_size: int = 1000,
sampling_rate: int = 100,
label_map: dict[int, int] | None = None,
label_type: str | None = None,
**kwargs,
):
"""HKDataloader is used to create a task specific dataloader for a dataset.
This class should be subclassed for specific task and dataset. If multiple datasets are needed for given task,
multiple dataloaders can be created. To simplify the process, the dataloaders can be placed in an ItemFactory.
Args:
ds (HKDataset): Dataset
frame_size (int, optional): Frame size. Defaults to 1000.
sampling_rate (int, optional): Sampling rate. Defaults to 100.
label_map (dict[int, int], optional): Label map. Defaults to None.
label_type (str, optional): Label type. Defaults to None.
Example:
```python
from typing import Generator
import numpy as np
import numpy.typing as npt
import heartkit as hk
class MyDataloader(hk.HKDataloader):
def __init__(self, ds: hk.HKDataset, **kwargs):
super().__init__(ds=ds, **kwargs)
def patient_generator(
self,
patient_id: int,
samples_per_patient: list[int],
) -> Generator[npt.NDArray, None, None]:
# Implement patient generator
with ds.patient_data(patient_id) as pt:
for _ in range(samples_per_patient):
data = pt["data"][:]
# Grab random frame and lead
lead = np.random.randint(0, data.shape[0])
start = np.random.randint(0, data.shape[1] - self.frame_size)
frame = data[lead, start : start + self.frame_size]
yield frame
def data_generator(
self,
patient_ids: list[int],
samples_per_patient: int | list[int],
shuffle: bool = False,
) -> Generator[npt.NDArray, None, None]:
for pt_id in helia.utils.uniform_id_generator(patient_ids, shuffle=shuffle):
# Implement data generator
yield data
# END FOR
```
"""
self.ds = ds
self.frame_size = frame_size
self.sampling_rate = sampling_rate
self.label_map = label_map
self.label_type = label_type
|
Functions
split_train_val_patients
split_train_val_patients(
train_patients: list[int] | float | None = None, val_patients: list[int] | float | None = None
) -> tuple[list[int], list[int]]
Split patients into training and validation sets. Unless train_patients or
val_patients are provided, the default is to call the dataset's split_train_test_patients
Parameters:
-
train_patients
(list[int] | float | None, default:
None
)
–
Training patients. Defaults to None.
-
val_patients
(list[int] | float | None, default:
None
)
–
Validation patients. Defaults to None.
Returns:
Source code in heartkit/datasets/dataloader.py
| def split_train_val_patients(
self,
train_patients: list[int] | float | None = None,
val_patients: list[int] | float | None = None,
) -> tuple[list[int], list[int]]:
"""Split patients into training and validation sets. Unless train_patients or
val_patients are provided, the default is to call the dataset's split_train_test_patients
Args:
train_patients (list[int] | float | None, optional): Training patients. Defaults to None.
val_patients (list[int] | float | None, optional): Validation patients. Defaults to None.
Returns:
tuple[list[int], list[int]]: Training and validation patient ids
"""
# Get train patients
train_patient_ids = self.ds.get_train_patient_ids()
train_patient_ids = self.ds.filter_patients_for_labels(
patient_ids=train_patient_ids,
label_map=self.label_map,
label_type=self.label_type,
)
# Use subset of training patients
if isinstance(train_patients, Iterable):
train_patient_ids = train_patients
if train_patients is not None:
num_pts = int(train_patients) if train_patients > 1 else int(train_patients * len(train_patient_ids))
train_patient_ids = train_patient_ids[:num_pts]
logger.debug(f"Using {len(train_patient_ids)} training patients")
# END IF
# Use subset of validation patients
if isinstance(val_patients, Iterable):
val_patient_ids = val_patients
train_patient_ids = np.setdiff1d(train_patient_ids, val_patient_ids).tolist()
return train_patient_ids, val_patient_ids
if val_patients is not None and val_patients >= 1:
val_patients = int(val_patients)
train_patient_ids, val_patient_ids = self.ds.split_train_test_patients(
patient_ids=train_patient_ids,
test_size=val_patients,
label_map=self.label_map,
label_type=self.label_type,
)
return train_patient_ids, val_patient_ids
|
test_patient_ids
test_patient_ids(test_patients: float | None = None) -> list[int]
Get test patient ids
Parameters:
-
test_patients
(float | None, default:
None
)
–
Test patients. Defaults to None.
Returns:
-
list[int]
–
list[int]: Test patient ids
Source code in heartkit/datasets/dataloader.py
| def test_patient_ids(
self,
test_patients: float | None = None,
) -> list[int]:
"""Get test patient ids
Args:
test_patients (float | None, optional): Test patients. Defaults to None.
Returns:
list[int]: Test patient ids
"""
test_patient_ids = self.ds.get_test_patient_ids()
test_patient_ids = self.ds.filter_patients_for_labels(
patient_ids=test_patient_ids,
label_map=self.label_map,
label_type=self.label_type,
)
if test_patients is not None:
num_pts = int(test_patients) if test_patients > 1 else int(test_patients * len(test_patient_ids))
test_patient_ids = test_patient_ids[:num_pts]
return test_patient_ids
|
patient_data_generator
patient_data_generator(
patient_id: int, samples_per_patient: list[int]
) -> Generator[tuple[npt.NDArray, ...], None, None]
Generate data for given patient id
Parameters:
-
patient_id
(int)
–
-
samples_per_patient
(list[int])
–
Returns:
-
None
–
Generator[tuple[npt.NDArray, ...], None, None]: Data generator
Note
This method should be implemented in the subclass
Source code in heartkit/datasets/dataloader.py
| def patient_data_generator(
self,
patient_id: int,
samples_per_patient: list[int],
) -> Generator[tuple[npt.NDArray, ...], None, None]:
"""Generate data for given patient id
Args:
patient_id (int): Patient ID
samples_per_patient (list[int]): Samples per patient
Returns:
Generator[tuple[npt.NDArray, ...], None, None]: Data generator
!!! note
This method should be implemented in the subclass
"""
raise NotImplementedError()
|
data_generator
data_generator(
patient_ids: list[int], samples_per_patient: int | list[int], shuffle: bool = False
) -> Generator[tuple[npt.NDArray, ...], None, None]
Generate data for given patient ids
Parameters:
-
patient_ids
(list[int])
–
-
samples_per_patient
(int | list[int])
–
-
shuffle
(bool, default:
False
)
–
Shuffle data. Defaults to False.
Returns:
-
None
–
Generator[tuple[npt.NDArray, ...], None, None]: Data generator
Source code in heartkit/datasets/dataloader.py
| def data_generator(
self,
patient_ids: list[int],
samples_per_patient: int | list[int],
shuffle: bool = False,
) -> Generator[tuple[npt.NDArray, ...], None, None]:
"""Generate data for given patient ids
Args:
patient_ids (list[int]): Patient IDs
samples_per_patient (int | list[int]): Samples per patient
shuffle (bool, optional): Shuffle data. Defaults to False.
Returns:
Generator[tuple[npt.NDArray, ...], None, None]: Data generator
"""
for pt_id in helia.utils.uniform_id_generator(patient_ids, shuffle=shuffle):
for data in self.patient_data_generator(pt_id, samples_per_patient):
yield data
|
create_dataloader
create_dataloader(
patient_ids: list[int], samples_per_patient: int | list[int], shuffle: bool = False
) -> tf.data.Dataset
Create tf.data.Dataset from internal data generator
Parameters:
-
patient_ids
(list[int])
–
-
samples_per_patient
(int | list[int])
–
-
shuffle
(bool, default:
False
)
–
Shuffle data. Defaults to False.
Returns:
Source code in heartkit/datasets/dataloader.py
| def create_dataloader(
self, patient_ids: list[int], samples_per_patient: int | list[int], shuffle: bool = False
) -> tf.data.Dataset:
"""Create tf.data.Dataset from internal data generator
Args:
patient_ids (list[int]): Patient IDs
samples_per_patient (int | list[int]): Samples per patient
shuffle (bool, optional): Shuffle data. Defaults to False.
Returns:
tf.data.Dataset: Dataset
"""
data_gen = functools.partial(
self.data_generator,
patient_ids=patient_ids,
samples_per_patient=samples_per_patient,
shuffle=shuffle,
)
# Compute output signature from generator
sig = helia.utils.get_output_signature_from_gen(data_gen)
ds = tf.data.Dataset.from_generator(
data_gen,
output_signature=sig,
)
return ds
|