Fixes #823.

Use the same logic that's in the Import from Ptx module for Insert Target Text to mark sfms as analysis writing system.
rmlockwood · Dec 4, 2024 · e135429 · e135429
1 parent 1a5ce5f
commit e135429
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 91 deletions.
diff --git a/ImportFromParatext.py b/ImportFromParatext.py
@@ -5,6 +5,9 @@
 #   SIL International
 #   10/30/21
 #
+#   Version 3.12.2 - 12/4/24 - Ron Lockwood
+#    Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
+#
 #   Version 3.12.1 - 11/26/24 - Ron Lockwood
 #    Allow intro. to be imported with chapter 1.
 #    Fixed bug with excluding \r by using DOTALL
@@ -139,7 +142,7 @@
 # Documentation that the user sees:
 
 docs = {FTM_Name       : "Import Text From Paratext",
-        FTM_Version    : "3.12.1",
+        FTM_Version    : "3.12.2",
         FTM_ModifiesDB : True,
         FTM_Synopsis   : "Import chapters from Paratext.",
         FTM_Help       : "",
@@ -387,74 +390,7 @@ def do_import(DB, report, chapSelectObj, tree):
         # Set StText object as the Text contents
         text.ContentsOA = stText  
 
-        # Split the text into sfm marker (or ref) and non-sfm marker (or ref), i.e. text contenct. The sfm marker or reference will later get marked as analysis lang. so it doesn't
-        # have to be interlinearized. Always put the marker + ref with dash before the plain marker + ref. \\w+* catches all end markers and \\w+ catches everything else (it needs to be at the end)
-        # We have the \d+:\d+-\d+ and \d+:\d+ as their own expressions to catch places in the text that have a verse reference like after a \r or \xt. It's nice if these get marked as analysis WS.
-        # Attributes are of the form |x=123 ... \s*
-        # You can't have parens inside of the split expression since it is already in parens. It will mess up the output.
-        #                                                                                                                                                                                                  eg \+xt
-        #                  attribs end mrk footnt  footnt ref+dash     footnt ref      cr ref note   cr ref  cr ref orig+dash    cr ref orig     verse+dash   verse    pub verse chap    ref+dash       ref        marker+ any marker
-        segs = re.split(r'(\|.+?\*|\\\w+\*|\\f \+ |\\fr \d+[:.]\d+-\d+|\\fr \d+[:.]\d+|\\xt .+?\\x\*|\\x \+ |\\xo \d+[:.]\d+-\d+|\\xo \d+[:.]\d+|\\v \d+-\d+ |\\v \d+ |\\vp \S+ |\\c \d+|\d+[:.]\d+-\d+|\d+[:.]\d+|\\\+\w+|\\\w+)', chapterContent) 
-
-        # Create 1st paragraph object
-        stTxtPara = m_stTxtParaFactory.Create()
-
-        # Add it to the stText object
-        stText.ParagraphsOS.Add(stTxtPara)    
-
-        bldr = TsStringUtils.MakeStrBldr()
-
-        # See if we have a script that has both upper and lower case. 
-        if len(segs) >= 2:
-
-            # Find a non-zero segment vernacular string (an even numbered index)
-            for i in range(2, len(segs), 2):
-
-                if len(segs[i]) > 0:
-
-                    # if the lower case is equal to the upper case, assume this script has no upper case
-                    if segs[i].lower() == segs[i].upper():
-
-                        upperCase = False
-                    else:
-                        upperCase = True
-
-                    break
-
-        # SFMs to start a new paragraph in FLEx
-        #newPar = r'\\[cpsqm]'
-        newPar = r'\n' # just start a new paragraph at every line feed
-
-        for _, seg in enumerate(segs):
-
-            if not (seg is None or len(seg) == 0 or seg == '\n'):
-
-                # Either an sfm marker or a verse ref should get marked as Analysis WS
-                if re.search(r'\\|\d+[.:]\d+', seg):
-
-                    # make this in the Analysis WS
-                    tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultAnalWs)
-                    bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)
-
-                else:
-                    # make this in the Vernacular WS
-                    tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultVernWs)
-                    bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)
-
-            if seg and re.search(newPar, seg): # or first segment if not blank
-
-                # Save the built up string to the Contents member
-                stTxtPara.Contents = bldr.GetString()
-
-                # Create paragraph object
-                stTxtPara = m_stTxtParaFactory.Create()
-
-                # Add it to the stText object
-                stText.ParagraphsOS.Add(stTxtPara)  
-
-                bldr = TsStringUtils.MakeStrBldr()
-
-        stTxtPara.Contents = bldr.GetString()
+        Utils.insertParagraphs(DB, chapterContent, m_stTxtParaFactory, stText)
 
         # Build the title string from book abbreviation and chapter.
         title = bibleBook + ' ' + str(titleChapNum).zfill(2)

diff --git a/InsertTargetText.py b/InsertTargetText.py
@@ -5,6 +5,9 @@
 #   University of Washington, SIL International
 #   12/5/14
 #
+#   Version 3.12.1 - 12/4/24 - Ron Lockwood
+#    Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
+#
 #   Version 3.12 - 11/2/24 - Ron Lockwood
 #    Bumped to 3.12.
 #
@@ -93,7 +96,7 @@
 # Documentation that the user sees:
 
 docs = {FTM_Name       : "Insert Target Text",
-        FTM_Version    : "3.12",
+        FTM_Version    : "3.12.1",
         FTM_ModifiesDB : True,
         FTM_Synopsis   : "Insert a translated text into the target FLEx project.",
         FTM_Help       : "",
@@ -150,6 +153,7 @@ def MainFunction(DB, report, modify=True):
 
     try:
         f = open(synFile, encoding='utf-8')
+        fullText = f.read()
     except:
         TargetDB.CloseProject()
         report.Error('Could not open the file: "'+synFile+'".')
@@ -163,38 +167,23 @@ def MainFunction(DB, report, modify=True):
     m_stTextFactory = TargetDB.project.ServiceLocator.GetService(IStTextFactory)
     m_stTxtParaFactory = TargetDB.project.ServiceLocator.GetService(IStTxtParaFactory)
 
-    # Start an Undo Task
-#    TargetDB.db.MainCacheAccessor.BeginNonUndoableTask()  
-
     # Create a text and add it to the project      
     text = m_textFactory.Create()           
     stText = m_stTextFactory.Create()
 
     # Set StText object as the Text contents
     text.ContentsOA = stText  
-
-    # Add paragraphs from the synthesized file
-    for line in f:
-        # Create paragraph object
-        stTxtPara = m_stTxtParaFactory.Create()
-
-        # Add it to the stText object
-        stText.ParagraphsOS.Add(stTxtPara)       
-
-        # Create a TS String to hold the line of text. Use the default vern. writing system
-        tss = TsStringUtils.MakeString(line, TargetDB.project.DefaultVernWs)
-
-        # Set the paragraph contents to the TS String
-        stTxtPara.Contents = tss             
-
+
+    # Insert text into the target DB while marking sfms as analysis writing system
+    Utils.insertParagraphs(TargetDB, fullText, m_stTxtParaFactory, stText)
+
     # Set the title of the text
     tss = TsStringUtils.MakeString(sourceTextName, TargetDB.project.DefaultAnalWs)
     text.Name.AnalysisDefaultWritingSystem = tss
 
     report.Info('Text: "'+sourceTextName+'" created in the '+targetProj+' project.')
-
     TargetDB.CloseProject()
-
+    f.close()
 
 #----------------------------------------------------------------
 # The name 'FlexToolsModule' must be defined like this:

diff --git a/Utils.py b/Utils.py
@@ -5,6 +5,9 @@
 #   SIL International
 #   7/23/2014
 #
+#   Version 3.12.3 - 12/4/24 - Ron Lockwood
+#    Fixes #823. Use the same logic that's in the Import from Ptx module to mark sfms as analysis writing system.
+#
 #   Version 3.12.2 - 12/3/24 - Ron Lockwood
 #    Fixes #821. Don't escape < and > in literal strings. Right now we don't allow them in lemmas anyway
 #    and this messes up rules that are looking for literal strings starting with <xyz, i.e. a tag.
@@ -2414,4 +2417,56 @@ def getInflectionTags(MSAobject):
 
 def containsInvalidLemmaChars(myStr):
 
-    return True if reInvalidLemmaChars.search(myStr) else False
+    return True if reInvalidLemmaChars.search(myStr) else False
+
+def insertParagraphs(DB, inputStr, m_stTxtParaFactory, stText):
+
+    # Split the text into sfm marker (or ref) and non-sfm marker (or ref), i.e. text contenct. The sfm marker or reference will later get marked as analysis lang. so it doesn't
+    # have to be interlinearized. Always put the marker + ref with dash before the plain marker + ref. \\w+* catches all end markers and \\w+ catches everything else (it needs to be at the end)
+    # We have the \d+:\d+-\d+ and \d+:\d+ as their own expressions to catch places in the text that have a verse reference like after a \r or \xt. It's nice if these get marked as analysis WS.
+    # Attributes are of the form |x=123 ... \s*
+    # You can't have parens inside of the split expression since it is already in parens. It will mess up the output.
+    #                                                                                                                                                                                                  eg \+xt
+    #                  attribs end mrk footnt  footnt ref+dash     footnt ref      cr ref note   cr ref  cr ref orig+dash    cr ref orig     verse+dash   verse    pub verse chap    ref+dash       ref        marker+ any marker
+    segs = re.split(r'(\|.+?\*|\\\w+\*|\\f \+ |\\fr \d+[:.]\d+-\d+|\\fr \d+[:.]\d+|\\xt .+?\\x\*|\\x \+ |\\xo \d+[:.]\d+-\d+|\\xo \d+[:.]\d+|\\v \d+-\d+ |\\v \d+ |\\vp \S+ |\\c \d+|\d+[:.]\d+-\d+|\d+[:.]\d+|\\\+\w+|\\\w+)', inputStr) 
+
+    # Create 1st paragraph object
+    stTxtPara = m_stTxtParaFactory.Create()
+
+    # Add it to the stText object
+    stText.ParagraphsOS.Add(stTxtPara)    
+    bldr = TsStringUtils.MakeStrBldr()
+
+    # Start a new paragraph at every line feed
+    newPar = r'\n' 
+
+    for _, seg in enumerate(segs):
+
+        if not (seg is None or len(seg) == 0 or seg == '\n'):
+
+            # Either an sfm marker or a verse ref should get marked as Analysis WS
+            if re.search(r'\\|\d+[.:]\d+', seg):
+
+                # make this in the Analysis WS
+                tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultAnalWs)
+                bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)
+
+            else:
+                # make this in the Vernacular WS
+                tss = TsStringUtils.MakeString(re.sub(r'\n','', seg), DB.project.DefaultVernWs)
+                bldr.ReplaceTsString(bldr.Length, bldr.Length, tss)
+
+        if seg and re.search(newPar, seg): # or first segment if not blank
+
+            # Save the built up string to the Contents member
+            stTxtPara.Contents = bldr.GetString()
+
+            # Create paragraph object
+            stTxtPara = m_stTxtParaFactory.Create()
+
+            # Add it to the stText object
+            stText.ParagraphsOS.Add(stTxtPara)  
+
+            bldr = TsStringUtils.MakeStrBldr()
+
+    stTxtPara.Contents = bldr.GetString()