Skip to content

Commit

Permalink
implement Dataset.from_documents
Browse files Browse the repository at this point in the history
  • Loading branch information
ArneBinder committed Jul 15, 2024
1 parent 38b9580 commit e134555
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/pie_datasets/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,23 @@ def from_hf_dataset(
)
return document_dataset

@classmethod
def from_documents(
cls,
documents: List[Document],
document_converters: Optional[DocumentConvertersType] = None,
**dataset_kwargs,
) -> "Dataset":
if len(documents) == 0:
raise ValueError("No documents to create dataset from")
document_type = type(documents[0])
data = [doc.asdict() for doc in documents]
hf_dataset = datasets.Dataset.from_list(mapping=data, **dataset_kwargs)
dataset = cls.from_hf_dataset(

Check warning on line 307 in src/pie_datasets/core/dataset.py

View check run for this annotation

Codecov / codecov/patch

src/pie_datasets/core/dataset.py#L302-L307

Added lines #L302 - L307 were not covered by tests
hf_dataset, document_type=document_type, document_converters=document_converters
)
return dataset

Check warning on line 310 in src/pie_datasets/core/dataset.py

View check run for this annotation

Codecov / codecov/patch

src/pie_datasets/core/dataset.py#L310

Added line #L310 was not covered by tests

def apply_hf_func(self, func, **kwargs) -> "Dataset":
return Dataset.from_hf_dataset(
func(self, **kwargs),
Expand Down Expand Up @@ -470,6 +487,15 @@ def from_hf_dataset(
)
return dataset

@classmethod
def from_documents(
cls,
documents: List[Document],
document_converters: Optional[DocumentConvertersType] = None,
**dataset_kwargs,
) -> "Dataset":
raise NotImplementedError("from_documents is not implemented for IterableDataset")

def __iter__(self):
for example in iter(super().__iter__()):
yield self.document_type.fromdict(example)
Expand Down

0 comments on commit e134555

Please sign in to comment.