Skip to content

Commit

Permalink
Fixes #823.
Browse files Browse the repository at this point in the history
Use the same logic that's in the Import from Ptx module for Insert Target Text to mark sfms as analysis writing system.
  • Loading branch information
rmlockwood committed Dec 4, 2024
1 parent 1a5ce5f commit e135429
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 91 deletions.
74 changes: 5 additions & 69 deletions ImportFromParatext.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# SIL International
# 10/30/21
#
# Version 3.12.2 - 12/4/24 - Ron Lockwood
# Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
#
# Version 3.12.1 - 11/26/24 - Ron Lockwood
# Allow intro. to be imported with chapter 1.
# Fixed bug with excluding \r by using DOTALL
Expand Down Expand Up @@ -139,7 +142,7 @@
# Documentation that the user sees:

docs = {FTM_Name : "Import Text From Paratext",
FTM_Version : "3.12.1",
FTM_Version : "3.12.2",
FTM_ModifiesDB : True,
FTM_Synopsis : "Import chapters from Paratext.",
FTM_Help : "",
Expand Down Expand Up @@ -387,74 +390,7 @@ def do_import(DB, report, chapSelectObj, tree):
# Set StText object as the Text contents
text.ContentsOA = stText

# Split the text into sfm marker (or ref) and non-sfm marker (or ref), i.e. text contenct. The sfm marker or reference will later get marked as analysis lang. so it doesn't
# have to be interlinearized. Always put the marker + ref with dash before the plain marker + ref. \\w+* catches all end markers and \\w+ catches everything else (it needs to be at the end)
# We have the \d+:\d+-\d+ and \d+:\d+ as their own expressions to catch places in the text that have a verse reference like after a \r or \xt. It's nice if these get marked as analysis WS.
# Attributes are of the form |x=123 ... \s*
# You can't have parens inside of the split expression since it is already in parens. It will mess up the output.
# eg \+xt
# attribs end mrk footnt footnt ref+dash footnt ref cr ref note cr ref cr ref orig+dash cr ref orig verse+dash verse pub verse chap ref+dash ref marker+ any marker
segs = re.split(r'(\|.+?\*|\\\w+\*|\\f \+ |\\fr \d+[:.]\d+-\d+|\\fr \d+[:.]\d+|\\xt .+?\\x\*|\\x \+ |\\xo \d+[:.]\d+-\d+|\\xo \d+[:.]\d+|\\v \d+-\d+ |\\v \d+ |\\vp \S+ |\\c \d+|\d+[:.]\d+-\d+|\d+[:.]\d+|\\\+\w+|\\\w+)', chapterContent)

# Create 1st paragraph object
stTxtPara = m_stTxtParaFactory.Create()

# Add it to the stText object
stText.ParagraphsOS.Add(stTxtPara)

bldr = TsStringUtils.MakeStrBldr()

# See if we have a script that has both upper and lower case.
if len(segs) >= 2:

# Find a non-zero segment vernacular string (an even numbered index)
for i in range(2, len(segs), 2):

if len(segs[i]) > 0:

# if the lower case is equal to the upper case, assume this script has no upper case
if segs[i].lower() == segs[i].upper():

upperCase = False
else:
upperCase = True

break

# SFMs to start a new paragraph in FLEx
#newPar = r'\\[cpsqm]'
newPar = r'\n' # just start a new paragraph at every line feed

for _, seg in enumerate(segs):

if not (seg is None or len(seg) == 0 or seg == '\n'):

# Either an sfm marker or a verse ref should get marked as Analysis WS
if re.search(r'\\|\d+[.:]\d+', seg):

# make this in the Analysis WS
tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultAnalWs)
bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)

else:
# make this in the Vernacular WS
tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultVernWs)
bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)

if seg and re.search(newPar, seg): # or first segment if not blank

# Save the built up string to the Contents member
stTxtPara.Contents = bldr.GetString()

# Create paragraph object
stTxtPara = m_stTxtParaFactory.Create()

# Add it to the stText object
stText.ParagraphsOS.Add(stTxtPara)

bldr = TsStringUtils.MakeStrBldr()

stTxtPara.Contents = bldr.GetString()
Utils.insertParagraphs(DB, chapterContent, m_stTxtParaFactory, stText)

# Build the title string from book abbreviation and chapter.
title = bibleBook + ' ' + str(titleChapNum).zfill(2)
Expand Down
31 changes: 10 additions & 21 deletions InsertTargetText.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# University of Washington, SIL International
# 12/5/14
#
# Version 3.12.1 - 12/4/24 - Ron Lockwood
# Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
#
# Version 3.12 - 11/2/24 - Ron Lockwood
# Bumped to 3.12.
#
Expand Down Expand Up @@ -93,7 +96,7 @@
# Documentation that the user sees:

docs = {FTM_Name : "Insert Target Text",
FTM_Version : "3.12",
FTM_Version : "3.12.1",
FTM_ModifiesDB : True,
FTM_Synopsis : "Insert a translated text into the target FLEx project.",
FTM_Help : "",
Expand Down Expand Up @@ -150,6 +153,7 @@ def MainFunction(DB, report, modify=True):

try:
f = open(synFile, encoding='utf-8')
fullText = f.read()
except:
TargetDB.CloseProject()
report.Error('Could not open the file: "'+synFile+'".')
Expand All @@ -163,38 +167,23 @@ def MainFunction(DB, report, modify=True):
m_stTextFactory = TargetDB.project.ServiceLocator.GetService(IStTextFactory)
m_stTxtParaFactory = TargetDB.project.ServiceLocator.GetService(IStTxtParaFactory)

# Start an Undo Task
# TargetDB.db.MainCacheAccessor.BeginNonUndoableTask()

# Create a text and add it to the project
text = m_textFactory.Create()
stText = m_stTextFactory.Create()

# Set StText object as the Text contents
text.ContentsOA = stText

# Add paragraphs from the synthesized file
for line in f:
# Create paragraph object
stTxtPara = m_stTxtParaFactory.Create()

# Add it to the stText object
stText.ParagraphsOS.Add(stTxtPara)

# Create a TS String to hold the line of text. Use the default vern. writing system
tss = TsStringUtils.MakeString(line, TargetDB.project.DefaultVernWs)

# Set the paragraph contents to the TS String
stTxtPara.Contents = tss


# Insert text into the target DB while marking sfms as analysis writing system
Utils.insertParagraphs(TargetDB, fullText, m_stTxtParaFactory, stText)

# Set the title of the text
tss = TsStringUtils.MakeString(sourceTextName, TargetDB.project.DefaultAnalWs)
text.Name.AnalysisDefaultWritingSystem = tss

report.Info('Text: "'+sourceTextName+'" created in the '+targetProj+' project.')

TargetDB.CloseProject()

f.close()

#----------------------------------------------------------------
# The name 'FlexToolsModule' must be defined like this:
Expand Down
57 changes: 56 additions & 1 deletion Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# SIL International
# 7/23/2014
#
# Version 3.12.3 - 12/4/24 - Ron Lockwood
# Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
#
# Version 3.12.2 - 12/3/24 - Ron Lockwood
# Fixes #821. Don't escape < and > in literal strings. Right now we don't allow them in lemmas anyway
# and this messes up rules that are looking for literal strings starting with <xyz, i.e. a tag.
Expand Down Expand Up @@ -2414,4 +2417,56 @@ def getInflectionTags(MSAobject):

def containsInvalidLemmaChars(myStr):

return True if reInvalidLemmaChars.search(myStr) else False
return True if reInvalidLemmaChars.search(myStr) else False

def insertParagraphs(DB, inputStr, m_stTxtParaFactory, stText):

# Split the text into sfm marker (or ref) and non-sfm marker (or ref), i.e. text contenct. The sfm marker or reference will later get marked as analysis lang. so it doesn't
# have to be interlinearized. Always put the marker + ref with dash before the plain marker + ref. \\w+* catches all end markers and \\w+ catches everything else (it needs to be at the end)
# We have the \d+:\d+-\d+ and \d+:\d+ as their own expressions to catch places in the text that have a verse reference like after a \r or \xt. It's nice if these get marked as analysis WS.
# Attributes are of the form |x=123 ... \s*
# You can't have parens inside of the split expression since it is already in parens. It will mess up the output.
# eg \+xt
# attribs end mrk footnt footnt ref+dash footnt ref cr ref note cr ref cr ref orig+dash cr ref orig verse+dash verse pub verse chap ref+dash ref marker+ any marker
segs = re.split(r'(\|.+?\*|\\\w+\*|\\f \+ |\\fr \d+[:.]\d+-\d+|\\fr \d+[:.]\d+|\\xt .+?\\x\*|\\x \+ |\\xo \d+[:.]\d+-\d+|\\xo \d+[:.]\d+|\\v \d+-\d+ |\\v \d+ |\\vp \S+ |\\c \d+|\d+[:.]\d+-\d+|\d+[:.]\d+|\\\+\w+|\\\w+)', inputStr)

# Create 1st paragraph object
stTxtPara = m_stTxtParaFactory.Create()

# Add it to the stText object
stText.ParagraphsOS.Add(stTxtPara)
bldr = TsStringUtils.MakeStrBldr()

# Start a new paragraph at every line feed
newPar = r'\n'

for _, seg in enumerate(segs):

if not (seg is None or len(seg) == 0 or seg == '\n'):

# Either an sfm marker or a verse ref should get marked as Analysis WS
if re.search(r'\\|\d+[.:]\d+', seg):

# make this in the Analysis WS
tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultAnalWs)
bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)

else:
# make this in the Vernacular WS
tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultVernWs)
bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)

if seg and re.search(newPar, seg): # or first segment if not blank

# Save the built up string to the Contents member
stTxtPara.Contents = bldr.GetString()

# Create paragraph object
stTxtPara = m_stTxtParaFactory.Create()

# Add it to the stText object
stText.ParagraphsOS.Add(stTxtPara)

bldr = TsStringUtils.MakeStrBldr()

stTxtPara.Contents = bldr.GetString()

0 comments on commit e135429

Please sign in to comment.