Skip to content

Commit

Permalink
Generate Parses: select random stems rather than first N (#719)
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-martian committed Sep 30, 2024
1 parent 47377bb commit b50cb8a
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions GenerateParses.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import copy
import itertools
from collections import defaultdict
import random

from SIL.LCModel import (
IMoStemMsa,
Expand Down Expand Up @@ -449,11 +450,6 @@ def MainFunction(DB, report, modifyAllowed):

# This number can be adjusted in the config file, if you want to stop after a greater number of stems
## (There could be better logic here!! Maybe a while loop would be better.)
if stemCount >= maxStems:
# After the first n stems (of the focus POS type), don't process stem type entries,
# but continue the loop, looking for more entries
# (We want to traverse all the affixes.)
continue

##
# Get the Citation Form of this entry (or Lexeme Form, if Citation Form is empty)
Expand Down Expand Up @@ -662,6 +658,11 @@ def MainFunction(DB, report, modifyAllowed):
if len(badSlots) > 0:
return

if len(standardSpellList) > maxStems:
random.shuffle(standardSpellList)
standardSpellList = standardSpellList[:maxStems]
standardSpellList.sort()

report.Info('Finished collecting templates. Now generating words.')
## Open output files, before constructing parses

Expand Down

0 comments on commit b50cb8a

Please sign in to comment.