You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm having a problem running Drill on my dataset(21,518,759 entities, 918 properties, 72,737,644 triples), I got the following error message:
(ontolearn0.8.0) quannian@eml4u:~/Drill/Ontolearn-0.7.3/Ontolearn/examples/Drill_DB$ python Drill_QALD9_DB.py
Goal Concept: EducationalOrganization E^+:[3] E^-:[3]
Traceback (most recent call last):
File "/local/upb/users/q/quannian/profiles/unix/cs/Drill/Ontolearn-0.7.3/Ontolearn/examples/Drill_DB/Drill_QALD9_DB.py", line 117, in <module>
start(parser.parse_args())
File "/local/upb/users/q/quannian/profiles/unix/cs/Drill/Ontolearn-0.7.3/Ontolearn/examples/Drill_DB/Drill_QALD9_DB.py", line 46, in start
drill.train(num_of_target_concepts=args.num_of_target_concepts,
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/site-packages/ontolearn/learners/drill.py", line 263, in train
sum_of_rewards_per_actions = self.rl_learning_loop(pos_uri=frozenset(positives),
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/site-packages/ontolearn/learners/drill.py", line 221, in rl_learning_loop
sequence_of_states, rewards = self.sequence_of_actions(root_rl_state)
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/site-packages/ontolearn/learners/drill.py", line 464, in sequence_of_actions
next_selected_rl_state, reward = self.select_next_state(current_state, next_rl_states)
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/site-packages/ontolearn/learners/drill.py", line 447, in select_next_state
next_selected_rl_state = self.exploration_exploitation_tradeoff(current_state, next_rl_states)
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/site-packages/ontolearn/learners/drill.py", line 622, in exploration_exploitation_tradeoff
next_state = random.choice(next_states)
File "/upb/users/q/quannian/profiles/unix/cs/.conda/envs/ontolearn0.8.0/lib/python3.10/random.py", line 378, in choice
return seq[self._randbelow(len(seq))]
IndexError: list index out of range
The code I used that lead to error:
import json
from argparse import ArgumentParser
from ontolearn.triple_store import TripleStoreKnowledgeBase
from ontolearn.triple_store import TripleStore
import numpy as np
from sklearn.model_selection import StratifiedKFold
from ontolearn.utils.static_funcs import compute_f1_score
from ontolearn.knowledge_base import KnowledgeBase
from ontolearn.learning_problem import PosNegLPStandard
from ontolearn.refinement_operators import LengthBasedRefinement
from ontolearn.learners import Drill
from ontolearn.metrics import F1
from ontolearn.heuristics import CeloeBasedReward
from owlapy.owl_individual import OWLNamedIndividual, IRI
from owlapy.render import DLSyntaxObjectRenderer
def start(args):
kb = TripleStore(url=args.path_sparql_endpoint)
drill = Drill(knowledge_base=kb,
path_embeddings=args.path_embeddings,
refinement_operator=LengthBasedRefinement(knowledge_base=kb),
quality_func=F1(),
reward_func=CeloeBasedReward(),
epsilon_decay=args.epsilon_decay,
learning_rate=args.learning_rate,
num_of_sequential_actions=args.num_of_sequential_actions,
num_episode=args.num_episode,
iter_bound=args.iter_bound,
max_runtime=args.max_runtime)
if args.path_pretrained_dir:
drill.load(directory=args.path_pretrained_dir)
else:
drill.train(num_of_target_concepts=args.num_of_target_concepts,
num_learning_problems=args.num_of_training_learning_problems)
drill.save(directory="pretrained_drill")
with open(args.path_learning_problem) as json_file:
examples = json.load(json_file)
p = examples['problems']['QALD9_plus_dbpedia']['positive_examples']
n = examples['problems']['QALD9_plus_dbpedia']['negative_examples']
kf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.random_seed)
X = np.array(p + n)
Y = np.array([1.0 for _ in p] + [0.0 for _ in n])
dl_render = DLSyntaxObjectRenderer()
for (ith, (train_index, test_index)) in enumerate(kf.split(X, Y)):
train_pos = {pos_individual for pos_individual in X[train_index][Y[train_index] == 1]}
train_neg = {neg_individual for neg_individual in X[train_index][Y[train_index] == 0]}
test_pos = {pos_individual for pos_individual in X[test_index][Y[test_index] == 1]}
test_neg = {neg_individual for neg_individual in X[test_index][Y[test_index] == 0]}
train_lp = PosNegLPStandard(pos=set(map(OWLNamedIndividual, map(IRI.create, train_pos))),
neg=set(map(OWLNamedIndividual, map(IRI.create, train_neg))))
test_lp = PosNegLPStandard(pos=set(map(OWLNamedIndividual, map(IRI.create, test_pos))),
neg=set(map(OWLNamedIndividual, map(IRI.create, test_neg))))
pred_drill = drill.fit(train_lp).best_hypotheses()
train_f1_drill = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_drill)}),
pos=train_lp.pos,
neg=train_lp.neg)
# () Quality on test data
test_f1_drill = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_drill)}),
pos=test_lp.pos,
neg=test_lp.neg)
print(
f"Prediction: {dl_render.render(pred_drill)} | Train Quality: {train_f1_drill:.3f} | Test Quality: {test_f1_drill:.3f} \n")
if __name__ == '__main__':
parser = ArgumentParser()
# General
parser.add_argument("--path_sparql_endpoint", type=str,
default="http://localhost:9050/sparql")
parser.add_argument("--path_embeddings", type=str,
default='/upb/users/q/quannian/profiles/unix/cs/Embedding/QALD9_plus_dbpedia/2024-10-21-16-09-12/Merge_entity_relation.csv')
parser.add_argument("--num_of_target_concepts",
type=int,
default=1)
parser.add_argument("--num_of_training_learning_problems",
type=int,
default=1)
parser.add_argument("--path_pretrained_dir", type=str, default=None)
parser.add_argument("--path_learning_problem", type=str, default='/upb/users/q/quannian/profiles/unix/cs/Drill/Ontolearn-0.7.3/Ontolearn/LPs/QALD9DB/TandF_MST5_reverse.json',
help="Path to a .json file that contains 2 properties 'positive_examples' and "
"'negative_examples'. Each of this properties should contain the IRIs of the respective"
"instances. e.g. 'some/path/lp.json'")
parser.add_argument("--max_runtime", type=int, default=10, help="Max runtime")
parser.add_argument("--folds", type=int, default=10, help="Number of folds of cross validation.")
parser.add_argument("--random_seed", type=int, default=1)
parser.add_argument("--iter_bound", type=int, default=10_000, help='iter_bound during testing.')
# DQL related
parser.add_argument("--num_episode", type=int, default=1, help='Number of trajectories created for a given lp.')
parser.add_argument("--epsilon_decay", type=float, default=.01, help='Epsilon greedy trade off per epoch')
parser.add_argument("--max_len_replay_memory", type=int, default=1024,
help='Maximum size of the experience replay')
parser.add_argument("--num_epochs_per_replay", type=int, default=2,
help='Number of epochs on experience replay memory')
parser.add_argument('--num_of_sequential_actions', type=int, default=1, help='Length of the trajectory.')
# NN related
parser.add_argument("--learning_rate", type=int, default=.01)
start(parser.parse_args())
Any hints as to what might be wrong with the dataset? or the code itself?
The text was updated successfully, but these errors were encountered:
I'm having a problem running Drill on my dataset(21,518,759 entities, 918 properties, 72,737,644 triples), I got the following error message:
The code I used that lead to error:
Any hints as to what might be wrong with the dataset? or the code itself?
The text was updated successfully, but these errors were encountered: