First usable version

2021-01-04 18:04:37 +01:00 · 2021-01-04 18:04:37 +01:00 · 536fef8d88
commit 536fef8d88
parent dd1ab8018f
5 changed files with 137 additions and 20 deletions
--- a/config.ini
+++ b/config.ini
@ -0,0 +1,9 @@
 [Options]
 rainbow_exec = /path/to/rainbow/executable
 custom_filters_folder = /path/to/folder/with/custom/filters
 [Filters]
 mxliff = okf_autoxliff
 xliff = okf_autoxliff
 xlf = okf_autoxliff
 xlz = okf_archive
--- a/okapi-batch-runner.py
+++ b/okapi-batch-runner.py
@ -1,13 +1,12 @@
 import subprocess
 import sys
 import os
 import re
 import io
 import glob
 import configparser
 import xml.etree.ElementTree as ET
 from pathlib import Path
 # Function for selection lists
 def select_item(welcome_text, items):
    print(welcome_text+"\n")
    for i, item in enumerate(items):
@ -26,45 +25,81 @@ def select_item(welcome_text, items):
        print('Error! Please enter one number between 1-{}'.format(len(items)))
        exit()
 # Function to get the correct filter for a file
 def get_filter(path):
-    return "filter"
+    extension = os.path.splitext(path)[1][1:]
    if (extension == ""):
        return extension
    else:
        if (extension in project_config["Filters"]):
            return project_config["Filters"][extension]
        else:
            if (extension in config["Filters"]):
                return config["Filters"][extension]
            else:
                return ""
 # Load config
 config = configparser.ConfigParser()
 config.read(os.path.join(sys.path[0],"config.ini"))
 rainbow = config["Options"]["rainbow_exec"]
-work_directory = sys.argv[1:]
+# Get work directories (ignore 0, which is this Python script)
 work_directories = sys.argv[1:]
 # Load project
 list_projects = os.listdir(os.path.join(sys.path[0],"projects"))
-project_folder = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
+project_dir = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
-project = os.path.join(project_folder,"project.rnb")
+project = os.path.join(project_dir,"project.rnb")
 list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_folder,"*.pln"))]
 pipeline = os.path.join(project_folder,select_item("Available pipelines:",list_pipelines))
 # Load project config
 project_config = configparser.ConfigParser()
-project_config.read(os.path.join(project_folder,"project.ini"))
+project_config.read(os.path.join(project_dir,"project.ini"))
 # Load pipeline
 list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_dir,"*.pln"))]
 pipeline = os.path.join(project_dir,select_item("Available pipelines:",list_pipelines))
 # Build list of files to be processed
 files = []
 files_root = os.getcwd()
-for directory in work_directory:
+for directory in work_directories:
    if any(Path(directory).rglob('*.*')):
        for translationfile in Path(directory).rglob('*.*'):
-            filter = get_filter(translationfile)
+            if (translationfile.is_file()):
-            if (filter != ""):
+                filter = get_filter(translationfile)
-                files.append((translationfile,filter))
+                if (filter != ""):
-
+                    relpath = os.path.relpath(translationfile,files_root)
                    files.append((relpath,filter))
 # Parse project
 project_tree = ET.parse(project)
-root = project_tree.getroot()
+project_tree_root = project_tree.getroot()
 fileset = project_tree_root.find("./fileSet[@id='1']")
 fileset_root = project_tree_root.find("./fileSet[@id='1']/root")
 parameters = project_tree_root.find("./parametersFolder")
-elem = root.find("./fileSet[@id='1']")
+# Set the parameters folder to the one defined in the config
 fileset_root.set("useCustom","1")
 fileset_root.text = config["Options"]["custom_filters_folder"]
 # Set the root of the first fileset to the working folder
 fileset_root.set("useCustom","1")
 fileset_root.text = files_root
 # Add files to first fileset
 for item in files:
    new=ET.Element("fi",attrib={"fs":item[1]})
    new.text=str(item[0])
-    elem.append(new)
+    fileset.append(new)
-project_tree.write("testproject.rnb",encoding="UTF-8",xml_declaration=True)
+# Write temporary custom project file for Okapi Rainbow
 project_tree.write("temp_project.rnb",encoding="UTF-8",xml_declaration=True)
-#subprocess.run([rainbow,"-p",project,"-pln",pipeline,"-np"])
+# Run Okapi Rainbow with the temporary project and the selected pipeline
 subprocess.run([rainbow,"-p","temp_project.rnb","-pln",pipeline,"-np"])
 # Remove temporary project file after executing the pipeline
 os.remove("temp_project.rnb") 
--- a/project/project.ini
+++ b/project/project.ini
@ -0,0 +1,2 @@
 [Filters]
 txt = okf_plaintext
--- a/project/project.rnb
+++ b/project/project.rnb
@ -0,0 +1,21 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <rainbowProject version="4">
 	<fileSet id="1">
 		<root useCustom="0"></root>
 	</fileSet>
 	<fileSet id="2">
 		<root useCustom="0"></root></fileSet>
 	<fileSet id="3">
 		<root useCustom="0"></root>
 	</fileSet>
 	<output>
 		<root use="0"></root>
 		<subFolder use="0"></subFolder>
 		<extension use="1" style="0">.out</extension>
 		<replace use="0" oldText="" newText=""></replace>
 		<prefix use="0"></prefix><suffix use="0"></suffix>
 	</output>
 	<options sourceLanguage="en" sourceEncoding="UTF-8" targetLanguage="ca" targetEncoding="UTF-8"></options>
 	<parametersFolder useCustom="0"></parametersFolder>
 	<utilities xml:spaces="preserve"></utilities>
 </rainbowProject>
--- a/project/wordcount.pln
+++ b/project/wordcount.pln
@ -0,0 +1,50 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
 <step class="net.sf.okapi.steps.wordcount.WordCountStep">#v1
 countInBatch.b=true
 countInBatchItems.b=true
 countInDocuments.b=true
 countInSubDocuments.b=false
 countInGroups.b=false
 bufferSize.i=0</step>
 <step class="net.sf.okapi.steps.wordcount.CharacterCountStep">#v1
 countInBatch.b=true
 countInBatchItems.b=true
 countInDocuments.b=true
 countInSubDocuments.b=false
 countInGroups.b=false
 bufferSize.i=0</step>
 <step class="net.sf.okapi.steps.repetitionanalysis.RepetitionAnalysisStep">#v1
 fuzzyThreshold.i=100
 maxHits.i=20</step>
 <step class="net.sf.okapi.steps.scopingreport.ScopingReportStep">#v1
 projectName=My Project
 customTemplateURI=
 customTemplateString=
 outputPath=${rootDir}/scoping_report.html
 countAsNonTranslatable_GMXProtected.b=true
 countAsNonTranslatable_GMXExactMatched.b=true
 countAsNonTranslatable_GMXLeveragedMatched.b=false
 countAsNonTranslatable_GMXRepetitionMatched.b=false
 countAsNonTranslatable_GMXFuzzyMatch.b=false
 countAsNonTranslatable_GMXAlphanumericOnlyTextUnit.b=true
 countAsNonTranslatable_GMXNumericOnlyTextUnit.b=true
 countAsNonTranslatable_GMXMeasurementOnlyTextUnit.b=true
 countAsNonTranslatable_ExactUniqueIdMatch.b=true
 countAsNonTranslatable_ExactPreviousVersionMatch.b=true
 countAsNonTranslatable_ExactLocalContextMatch.b=false
 countAsNonTranslatable_ExactDocumentContextMatch.b=false
 countAsNonTranslatable_ExactStructuralMatch.b=false
 countAsNonTranslatable_ExactMatch.b=false
 countAsNonTranslatable_ExactTextOnlyPreviousVersionMatch.b=false
 countAsNonTranslatable_ExactTextOnlyUniqueIdMatch.b=false
 countAsNonTranslatable_ExactTextOnly.b=false
 countAsNonTranslatable_ExactRepaired.b=false
 countAsNonTranslatable_FuzzyPreviousVersionMatch.b=false
 countAsNonTranslatable_FuzzyUniqueIdMatch.b=false
 countAsNonTranslatable_FuzzyMatch.b=false
 countAsNonTranslatable_FuzzyRepaired.b=false
 countAsNonTranslatable_PhraseAssembled.b=false
 countAsNonTranslatable_MT.b=false
 countAsNonTranslatable_Concordance.b=false</step>
 </rainbowPipeline>