Skip to content

Commit

Permalink
Added drugprot2example() method, enabling conversion from DrugprotDoc…
Browse files Browse the repository at this point in the history
…ument back to Example format
  • Loading branch information
kai-car committed Aug 9, 2024
1 parent d1df6df commit 0d6b675
Showing 1 changed file with 39 additions and 7 deletions.
46 changes: 39 additions & 7 deletions dataset_builders/pie/drugprot/drugprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class DrugprotBigbioDocument(TextBasedDocument):


def example2drugprot(example: Dict[str, Any]) -> DrugprotDocument:
metadata = {"entity_ids": []}
metadata = {"entity_ids": [], "relation_ids": []}
id2labeled_span: Dict[str, LabeledSpan] = {}

document = DrugprotDocument(
Expand All @@ -58,6 +58,7 @@ def example2drugprot(example: Dict[str, Any]) -> DrugprotDocument:
label=relation["type"],
)
)
document.metadata["relation_ids"].append(relation["id"])
return document


Expand All @@ -80,7 +81,7 @@ def example2drugprot_bigbio(example: Dict[str, Any]) -> DrugprotBigbioDocument:
label=passage["type"],
)
)
# We sort labels and relation to always have an deterministic order for testing purposes.
# We sort labels and relation to always have a deterministic order for testing purposes.
for span in example["entities"]:
labeled_span = LabeledSpan(
start=span["offsets"][0][0],
Expand All @@ -101,6 +102,40 @@ def example2drugprot_bigbio(example: Dict[str, Any]) -> DrugprotBigbioDocument:
return document


def drugprot2example(doc: DrugprotDocument) -> Dict[str, Any]:

entities = []
for i, entity in enumerate(doc.entities):
entities.append(
{
"id": doc.metadata["entity_ids"][i],
"type": entity.label,
"text": doc.text[entity.start : entity.end],
"offset": [entity.start, entity.end],
}
)

relations = []
for i, relation in enumerate(doc.relations):
relations.append(
{
"id": doc.metadata["relation_ids"][i],
"arg1_id": doc.metadata["entity_ids"][doc.entities.index(relation.head)],
"arg2_id": doc.metadata["entity_ids"][doc.entities.index(relation.tail)],
"type": relation.label,
}
)

return {
"document_id": doc.id,
"title": doc.title,
"abstract": doc.abstract,
"text": doc.text,
"entities": entities,
"relations": relations,
}


def drugprot_bigbio2example(doc: DrugprotBigbioDocument) -> Dict[str, Any]:
return {}

Expand Down Expand Up @@ -152,18 +187,15 @@ def document_converters(self):
else:
raise ValueError(f"Unknown dataset name: {self.config.name}")

def _generate_document(
self,
example: Dict[str, Any],
) -> Union[DrugprotDocument, DrugprotBigbioDocument]:
def _generate_document(self, example: Dict[str, Any], **kwargs) -> Union[DrugprotDocument, DrugprotBigbioDocument]:
if self.config.name == "drugprot_source":
return example2drugprot(example)
elif self.config.name == "drugprot_bigbio_kb":
return example2drugprot_bigbio(example)
else:
raise ValueError(f"Unknown dataset config name: {self.config.name}")

def _generate_example(self, document: Document, **kwargs) -> Dict[str, Any]:
def _generate_example(self, document: Union[DrugprotDocument, DrugprotBigbioDocument], **kwargs) -> Dict[str, Any]:
if isinstance(document, DrugprotBigbioDocument):
return drugprot_bigbio2example(document)
elif isinstance(document, DrugprotDocument):
Expand Down

0 comments on commit 0d6b675

Please sign in to comment.