from datasets import Dataset
from utils import CustomDataset
# create menu of slices from the 3D volumes
pretrain_dataset = CustomDataset(
root_dir="./training_data",
num_slices=300, # number of slices in one direction of the cube; 300 for
cheesecake_factory_mode=True, # much like the menu at Cheesecake Factory, you slice menu will include everything :-)
# limit=50, # if cheese_cake_factory_mode=False, the limit must be an int value to limit the menu
data_prefix="seismicCubes", # what does the data name start with
label_prefix="", # what does the label name start with; leave label_prefix blank when pretraining a model.
pretraining=True,
)
# this function will be used to iterate through all 2D slices created in the CustomDataset
def gen():
for idx in range(len(pretrain_dataset)):
yield pretrain_dataset[idx]
# using from_generator() we import the torch dataset to a HF dataset
hf_dataset = Dataset.from_generator(gen)
dataloader = DataLoader(hf_dataset, batch_size=16, num_workers=4)
Error after generating 600 examples
Generating train split: 600 examples [00:00, 895.05 examples/s]
builder.py 1786 _prepare_split_single
raise DatasetGenerationError("An error occurred while generating the dataset") from e
datasets.exceptions.DatasetGenerationError:
An error occurred while generating the dataset