From 536fef8d88adfbb0f0f55d7841856e59241e5c27 Mon Sep 17 00:00:00 2001 From: Marc Riera Irigoyen Date: Mon, 4 Jan 2021 18:04:37 +0100 Subject: [PATCH] First usable version --- config.ini | 9 ++++ okapi-batch-runner.py | 75 +++++++++++++++++++------- projects/Example project/project.ini | 2 + projects/Example project/project.rnb | 21 ++++++++ projects/Example project/wordcount.pln | 50 +++++++++++++++++ 5 files changed, 137 insertions(+), 20 deletions(-) create mode 100644 config.ini create mode 100644 projects/Example project/project.ini create mode 100644 projects/Example project/project.rnb create mode 100644 projects/Example project/wordcount.pln diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..8f3cfb9 --- /dev/null +++ b/config.ini @@ -0,0 +1,9 @@ +[Options] +rainbow_exec = /path/to/rainbow/executable +custom_filters_folder = /path/to/folder/with/custom/filters + +[Filters] +mxliff = okf_autoxliff +xliff = okf_autoxliff +xlf = okf_autoxliff +xlz = okf_archive diff --git a/okapi-batch-runner.py b/okapi-batch-runner.py index c64254e..e38a8b9 100644 --- a/okapi-batch-runner.py +++ b/okapi-batch-runner.py @@ -1,13 +1,12 @@ import subprocess import sys import os -import re -import io import glob import configparser import xml.etree.ElementTree as ET from pathlib import Path +# Function for selection lists def select_item(welcome_text, items): print(welcome_text+"\n") for i, item in enumerate(items): @@ -26,45 +25,81 @@ def select_item(welcome_text, items): print('Error! Please enter one number between 1-{}'.format(len(items))) exit() +# Function to get the correct filter for a file def get_filter(path): - return "filter" + extension = os.path.splitext(path)[1][1:] + if (extension == ""): + return extension + else: + if (extension in project_config["Filters"]): + return project_config["Filters"][extension] + else: + if (extension in config["Filters"]): + return config["Filters"][extension] + else: + return "" +# Load config config = configparser.ConfigParser() config.read(os.path.join(sys.path[0],"config.ini")) rainbow = config["Options"]["rainbow_exec"] -work_directory = sys.argv[1:] +# Get work directories (ignore 0, which is this Python script) +work_directories = sys.argv[1:] +# Load project list_projects = os.listdir(os.path.join(sys.path[0],"projects")) -project_folder = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects)) -project = os.path.join(project_folder,"project.rnb") - -list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_folder,"*.pln"))] -pipeline = os.path.join(project_folder,select_item("Available pipelines:",list_pipelines)) +project_dir = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects)) +project = os.path.join(project_dir,"project.rnb") +# Load project config project_config = configparser.ConfigParser() -project_config.read(os.path.join(project_folder,"project.ini")) +project_config.read(os.path.join(project_dir,"project.ini")) +# Load pipeline +list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_dir,"*.pln"))] +pipeline = os.path.join(project_dir,select_item("Available pipelines:",list_pipelines)) + +# Build list of files to be processed files = [] +files_root = os.getcwd() -for directory in work_directory: +for directory in work_directories: if any(Path(directory).rglob('*.*')): for translationfile in Path(directory).rglob('*.*'): - filter = get_filter(translationfile) - if (filter != ""): - files.append((translationfile,filter)) - + if (translationfile.is_file()): + filter = get_filter(translationfile) + if (filter != ""): + relpath = os.path.relpath(translationfile,files_root) + files.append((relpath,filter)) +# Parse project project_tree = ET.parse(project) -root = project_tree.getroot() +project_tree_root = project_tree.getroot() +fileset = project_tree_root.find("./fileSet[@id='1']") +fileset_root = project_tree_root.find("./fileSet[@id='1']/root") +parameters = project_tree_root.find("./parametersFolder") -elem = root.find("./fileSet[@id='1']") +# Set the parameters folder to the one defined in the config +fileset_root.set("useCustom","1") +fileset_root.text = config["Options"]["custom_filters_folder"] + +# Set the root of the first fileset to the working folder +fileset_root.set("useCustom","1") +fileset_root.text = files_root + +# Add files to first fileset for item in files: new=ET.Element("fi",attrib={"fs":item[1]}) new.text=str(item[0]) - elem.append(new) + fileset.append(new) -project_tree.write("testproject.rnb",encoding="UTF-8",xml_declaration=True) +# Write temporary custom project file for Okapi Rainbow +project_tree.write("temp_project.rnb",encoding="UTF-8",xml_declaration=True) -#subprocess.run([rainbow,"-p",project,"-pln",pipeline,"-np"]) +# Run Okapi Rainbow with the temporary project and the selected pipeline +subprocess.run([rainbow,"-p","temp_project.rnb","-pln",pipeline,"-np"]) + +# Remove temporary project file after executing the pipeline +os.remove("temp_project.rnb") diff --git a/projects/Example project/project.ini b/projects/Example project/project.ini new file mode 100644 index 0000000..099f77e --- /dev/null +++ b/projects/Example project/project.ini @@ -0,0 +1,2 @@ +[Filters] +txt = okf_plaintext diff --git a/projects/Example project/project.rnb b/projects/Example project/project.rnb new file mode 100644 index 0000000..7d6c28e --- /dev/null +++ b/projects/Example project/project.rnb @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + .out + + + + + + + diff --git a/projects/Example project/wordcount.pln b/projects/Example project/wordcount.pln new file mode 100644 index 0000000..ca974b7 --- /dev/null +++ b/projects/Example project/wordcount.pln @@ -0,0 +1,50 @@ + + +#v1 +countInBatch.b=true +countInBatchItems.b=true +countInDocuments.b=true +countInSubDocuments.b=false +countInGroups.b=false +bufferSize.i=0 +#v1 +countInBatch.b=true +countInBatchItems.b=true +countInDocuments.b=true +countInSubDocuments.b=false +countInGroups.b=false +bufferSize.i=0 +#v1 +fuzzyThreshold.i=100 +maxHits.i=20 +#v1 +projectName=My Project +customTemplateURI= +customTemplateString= +outputPath=${rootDir}/scoping_report.html +countAsNonTranslatable_GMXProtected.b=true +countAsNonTranslatable_GMXExactMatched.b=true +countAsNonTranslatable_GMXLeveragedMatched.b=false +countAsNonTranslatable_GMXRepetitionMatched.b=false +countAsNonTranslatable_GMXFuzzyMatch.b=false +countAsNonTranslatable_GMXAlphanumericOnlyTextUnit.b=true +countAsNonTranslatable_GMXNumericOnlyTextUnit.b=true +countAsNonTranslatable_GMXMeasurementOnlyTextUnit.b=true +countAsNonTranslatable_ExactUniqueIdMatch.b=true +countAsNonTranslatable_ExactPreviousVersionMatch.b=true +countAsNonTranslatable_ExactLocalContextMatch.b=false +countAsNonTranslatable_ExactDocumentContextMatch.b=false +countAsNonTranslatable_ExactStructuralMatch.b=false +countAsNonTranslatable_ExactMatch.b=false +countAsNonTranslatable_ExactTextOnlyPreviousVersionMatch.b=false +countAsNonTranslatable_ExactTextOnlyUniqueIdMatch.b=false +countAsNonTranslatable_ExactTextOnly.b=false +countAsNonTranslatable_ExactRepaired.b=false +countAsNonTranslatable_FuzzyPreviousVersionMatch.b=false +countAsNonTranslatable_FuzzyUniqueIdMatch.b=false +countAsNonTranslatable_FuzzyMatch.b=false +countAsNonTranslatable_FuzzyRepaired.b=false +countAsNonTranslatable_PhraseAssembled.b=false +countAsNonTranslatable_MT.b=false +countAsNonTranslatable_Concordance.b=false +