First usable version

This commit is contained in:
Marc Riera Irigoyen 2021-01-04 18:04:37 +01:00
parent dd1ab8018f
commit 536fef8d88
5 changed files with 137 additions and 20 deletions

9
config.ini Normal file
View file

@ -0,0 +1,9 @@
[Options]
rainbow_exec = /path/to/rainbow/executable
custom_filters_folder = /path/to/folder/with/custom/filters
[Filters]
mxliff = okf_autoxliff
xliff = okf_autoxliff
xlf = okf_autoxliff
xlz = okf_archive

View file

@ -1,13 +1,12 @@
import subprocess import subprocess
import sys import sys
import os import os
import re
import io
import glob import glob
import configparser import configparser
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from pathlib import Path from pathlib import Path
# Function for selection lists
def select_item(welcome_text, items): def select_item(welcome_text, items):
print(welcome_text+"\n") print(welcome_text+"\n")
for i, item in enumerate(items): for i, item in enumerate(items):
@ -26,45 +25,81 @@ def select_item(welcome_text, items):
print('Error! Please enter one number between 1-{}'.format(len(items))) print('Error! Please enter one number between 1-{}'.format(len(items)))
exit() exit()
# Function to get the correct filter for a file
def get_filter(path): def get_filter(path):
return "filter" extension = os.path.splitext(path)[1][1:]
if (extension == ""):
return extension
else:
if (extension in project_config["Filters"]):
return project_config["Filters"][extension]
else:
if (extension in config["Filters"]):
return config["Filters"][extension]
else:
return ""
# Load config
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read(os.path.join(sys.path[0],"config.ini")) config.read(os.path.join(sys.path[0],"config.ini"))
rainbow = config["Options"]["rainbow_exec"] rainbow = config["Options"]["rainbow_exec"]
work_directory = sys.argv[1:] # Get work directories (ignore 0, which is this Python script)
work_directories = sys.argv[1:]
# Load project
list_projects = os.listdir(os.path.join(sys.path[0],"projects")) list_projects = os.listdir(os.path.join(sys.path[0],"projects"))
project_folder = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects)) project_dir = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
project = os.path.join(project_folder,"project.rnb") project = os.path.join(project_dir,"project.rnb")
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_folder,"*.pln"))]
pipeline = os.path.join(project_folder,select_item("Available pipelines:",list_pipelines))
# Load project config
project_config = configparser.ConfigParser() project_config = configparser.ConfigParser()
project_config.read(os.path.join(project_folder,"project.ini")) project_config.read(os.path.join(project_dir,"project.ini"))
# Load pipeline
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_dir,"*.pln"))]
pipeline = os.path.join(project_dir,select_item("Available pipelines:",list_pipelines))
# Build list of files to be processed
files = [] files = []
files_root = os.getcwd()
for directory in work_directory: for directory in work_directories:
if any(Path(directory).rglob('*.*')): if any(Path(directory).rglob('*.*')):
for translationfile in Path(directory).rglob('*.*'): for translationfile in Path(directory).rglob('*.*'):
filter = get_filter(translationfile) if (translationfile.is_file()):
if (filter != ""): filter = get_filter(translationfile)
files.append((translationfile,filter)) if (filter != ""):
relpath = os.path.relpath(translationfile,files_root)
files.append((relpath,filter))
# Parse project
project_tree = ET.parse(project) project_tree = ET.parse(project)
root = project_tree.getroot() project_tree_root = project_tree.getroot()
fileset = project_tree_root.find("./fileSet[@id='1']")
fileset_root = project_tree_root.find("./fileSet[@id='1']/root")
parameters = project_tree_root.find("./parametersFolder")
elem = root.find("./fileSet[@id='1']") # Set the parameters folder to the one defined in the config
fileset_root.set("useCustom","1")
fileset_root.text = config["Options"]["custom_filters_folder"]
# Set the root of the first fileset to the working folder
fileset_root.set("useCustom","1")
fileset_root.text = files_root
# Add files to first fileset
for item in files: for item in files:
new=ET.Element("fi",attrib={"fs":item[1]}) new=ET.Element("fi",attrib={"fs":item[1]})
new.text=str(item[0]) new.text=str(item[0])
elem.append(new) fileset.append(new)
project_tree.write("testproject.rnb",encoding="UTF-8",xml_declaration=True) # Write temporary custom project file for Okapi Rainbow
project_tree.write("temp_project.rnb",encoding="UTF-8",xml_declaration=True)
#subprocess.run([rainbow,"-p",project,"-pln",pipeline,"-np"]) # Run Okapi Rainbow with the temporary project and the selected pipeline
subprocess.run([rainbow,"-p","temp_project.rnb","-pln",pipeline,"-np"])
# Remove temporary project file after executing the pipeline
os.remove("temp_project.rnb")

View file

@ -0,0 +1,2 @@
[Filters]
txt = okf_plaintext

View file

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<rainbowProject version="4">
<fileSet id="1">
<root useCustom="0"></root>
</fileSet>
<fileSet id="2">
<root useCustom="0"></root></fileSet>
<fileSet id="3">
<root useCustom="0"></root>
</fileSet>
<output>
<root use="0"></root>
<subFolder use="0"></subFolder>
<extension use="1" style="0">.out</extension>
<replace use="0" oldText="" newText=""></replace>
<prefix use="0"></prefix><suffix use="0"></suffix>
</output>
<options sourceLanguage="en" sourceEncoding="UTF-8" targetLanguage="ca" targetEncoding="UTF-8"></options>
<parametersFolder useCustom="0"></parametersFolder>
<utilities xml:spaces="preserve"></utilities>
</rainbowProject>

View file

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8"?>
<rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
<step class="net.sf.okapi.steps.wordcount.WordCountStep">#v1
countInBatch.b=true
countInBatchItems.b=true
countInDocuments.b=true
countInSubDocuments.b=false
countInGroups.b=false
bufferSize.i=0</step>
<step class="net.sf.okapi.steps.wordcount.CharacterCountStep">#v1
countInBatch.b=true
countInBatchItems.b=true
countInDocuments.b=true
countInSubDocuments.b=false
countInGroups.b=false
bufferSize.i=0</step>
<step class="net.sf.okapi.steps.repetitionanalysis.RepetitionAnalysisStep">#v1
fuzzyThreshold.i=100
maxHits.i=20</step>
<step class="net.sf.okapi.steps.scopingreport.ScopingReportStep">#v1
projectName=My Project
customTemplateURI=
customTemplateString=
outputPath=${rootDir}/scoping_report.html
countAsNonTranslatable_GMXProtected.b=true
countAsNonTranslatable_GMXExactMatched.b=true
countAsNonTranslatable_GMXLeveragedMatched.b=false
countAsNonTranslatable_GMXRepetitionMatched.b=false
countAsNonTranslatable_GMXFuzzyMatch.b=false
countAsNonTranslatable_GMXAlphanumericOnlyTextUnit.b=true
countAsNonTranslatable_GMXNumericOnlyTextUnit.b=true
countAsNonTranslatable_GMXMeasurementOnlyTextUnit.b=true
countAsNonTranslatable_ExactUniqueIdMatch.b=true
countAsNonTranslatable_ExactPreviousVersionMatch.b=true
countAsNonTranslatable_ExactLocalContextMatch.b=false
countAsNonTranslatable_ExactDocumentContextMatch.b=false
countAsNonTranslatable_ExactStructuralMatch.b=false
countAsNonTranslatable_ExactMatch.b=false
countAsNonTranslatable_ExactTextOnlyPreviousVersionMatch.b=false
countAsNonTranslatable_ExactTextOnlyUniqueIdMatch.b=false
countAsNonTranslatable_ExactTextOnly.b=false
countAsNonTranslatable_ExactRepaired.b=false
countAsNonTranslatable_FuzzyPreviousVersionMatch.b=false
countAsNonTranslatable_FuzzyUniqueIdMatch.b=false
countAsNonTranslatable_FuzzyMatch.b=false
countAsNonTranslatable_FuzzyRepaired.b=false
countAsNonTranslatable_PhraseAssembled.b=false
countAsNonTranslatable_MT.b=false
countAsNonTranslatable_Concordance.b=false</step>
</rainbowPipeline>