First usable version

This commit is contained in:
Marc Riera Irigoyen 2021-01-04 18:04:37 +01:00
parent dd1ab8018f
commit 536fef8d88
5 changed files with 137 additions and 20 deletions

9
config.ini Normal file
View file

@ -0,0 +1,9 @@
[Options]
rainbow_exec = /path/to/rainbow/executable
custom_filters_folder = /path/to/folder/with/custom/filters
[Filters]
mxliff = okf_autoxliff
xliff = okf_autoxliff
xlf = okf_autoxliff
xlz = okf_archive

View file

@ -1,13 +1,12 @@
import subprocess
import sys
import os
import re
import io
import glob
import configparser
import xml.etree.ElementTree as ET
from pathlib import Path
# Function for selection lists
def select_item(welcome_text, items):
print(welcome_text+"\n")
for i, item in enumerate(items):
@ -26,45 +25,81 @@ def select_item(welcome_text, items):
print('Error! Please enter one number between 1-{}'.format(len(items)))
exit()
# Function to get the correct filter for a file
def get_filter(path):
return "filter"
extension = os.path.splitext(path)[1][1:]
if (extension == ""):
return extension
else:
if (extension in project_config["Filters"]):
return project_config["Filters"][extension]
else:
if (extension in config["Filters"]):
return config["Filters"][extension]
else:
return ""
# Load config
config = configparser.ConfigParser()
config.read(os.path.join(sys.path[0],"config.ini"))
rainbow = config["Options"]["rainbow_exec"]
work_directory = sys.argv[1:]
# Get work directories (ignore 0, which is this Python script)
work_directories = sys.argv[1:]
# Load project
list_projects = os.listdir(os.path.join(sys.path[0],"projects"))
project_folder = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
project = os.path.join(project_folder,"project.rnb")
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_folder,"*.pln"))]
pipeline = os.path.join(project_folder,select_item("Available pipelines:",list_pipelines))
project_dir = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
project = os.path.join(project_dir,"project.rnb")
# Load project config
project_config = configparser.ConfigParser()
project_config.read(os.path.join(project_folder,"project.ini"))
project_config.read(os.path.join(project_dir,"project.ini"))
# Load pipeline
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_dir,"*.pln"))]
pipeline = os.path.join(project_dir,select_item("Available pipelines:",list_pipelines))
# Build list of files to be processed
files = []
files_root = os.getcwd()
for directory in work_directory:
for directory in work_directories:
if any(Path(directory).rglob('*.*')):
for translationfile in Path(directory).rglob('*.*'):
filter = get_filter(translationfile)
if (filter != ""):
files.append((translationfile,filter))
if (translationfile.is_file()):
filter = get_filter(translationfile)
if (filter != ""):
relpath = os.path.relpath(translationfile,files_root)
files.append((relpath,filter))
# Parse project
project_tree = ET.parse(project)
root = project_tree.getroot()
project_tree_root = project_tree.getroot()
fileset = project_tree_root.find("./fileSet[@id='1']")
fileset_root = project_tree_root.find("./fileSet[@id='1']/root")
parameters = project_tree_root.find("./parametersFolder")
elem = root.find("./fileSet[@id='1']")
# Set the parameters folder to the one defined in the config
fileset_root.set("useCustom","1")
fileset_root.text = config["Options"]["custom_filters_folder"]
# Set the root of the first fileset to the working folder
fileset_root.set("useCustom","1")
fileset_root.text = files_root
# Add files to first fileset
for item in files:
new=ET.Element("fi",attrib={"fs":item[1]})
new.text=str(item[0])
elem.append(new)
fileset.append(new)
project_tree.write("testproject.rnb",encoding="UTF-8",xml_declaration=True)
# Write temporary custom project file for Okapi Rainbow
project_tree.write("temp_project.rnb",encoding="UTF-8",xml_declaration=True)
#subprocess.run([rainbow,"-p",project,"-pln",pipeline,"-np"])
# Run Okapi Rainbow with the temporary project and the selected pipeline
subprocess.run([rainbow,"-p","temp_project.rnb","-pln",pipeline,"-np"])
# Remove temporary project file after executing the pipeline
os.remove("temp_project.rnb")

View file

@ -0,0 +1,2 @@
[Filters]
txt = okf_plaintext

View file

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<rainbowProject version="4">
<fileSet id="1">
<root useCustom="0"></root>
</fileSet>
<fileSet id="2">
<root useCustom="0"></root></fileSet>
<fileSet id="3">
<root useCustom="0"></root>
</fileSet>
<output>
<root use="0"></root>
<subFolder use="0"></subFolder>
<extension use="1" style="0">.out</extension>
<replace use="0" oldText="" newText=""></replace>
<prefix use="0"></prefix><suffix use="0"></suffix>
</output>
<options sourceLanguage="en" sourceEncoding="UTF-8" targetLanguage="ca" targetEncoding="UTF-8"></options>
<parametersFolder useCustom="0"></parametersFolder>
<utilities xml:spaces="preserve"></utilities>
</rainbowProject>

View file

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8"?>
<rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
<step class="net.sf.okapi.steps.wordcount.WordCountStep">#v1
countInBatch.b=true
countInBatchItems.b=true
countInDocuments.b=true
countInSubDocuments.b=false
countInGroups.b=false
bufferSize.i=0</step>
<step class="net.sf.okapi.steps.wordcount.CharacterCountStep">#v1
countInBatch.b=true
countInBatchItems.b=true
countInDocuments.b=true
countInSubDocuments.b=false
countInGroups.b=false
bufferSize.i=0</step>
<step class="net.sf.okapi.steps.repetitionanalysis.RepetitionAnalysisStep">#v1
fuzzyThreshold.i=100
maxHits.i=20</step>
<step class="net.sf.okapi.steps.scopingreport.ScopingReportStep">#v1
projectName=My Project
customTemplateURI=
customTemplateString=
outputPath=${rootDir}/scoping_report.html
countAsNonTranslatable_GMXProtected.b=true
countAsNonTranslatable_GMXExactMatched.b=true
countAsNonTranslatable_GMXLeveragedMatched.b=false
countAsNonTranslatable_GMXRepetitionMatched.b=false
countAsNonTranslatable_GMXFuzzyMatch.b=false
countAsNonTranslatable_GMXAlphanumericOnlyTextUnit.b=true
countAsNonTranslatable_GMXNumericOnlyTextUnit.b=true
countAsNonTranslatable_GMXMeasurementOnlyTextUnit.b=true
countAsNonTranslatable_ExactUniqueIdMatch.b=true
countAsNonTranslatable_ExactPreviousVersionMatch.b=true
countAsNonTranslatable_ExactLocalContextMatch.b=false
countAsNonTranslatable_ExactDocumentContextMatch.b=false
countAsNonTranslatable_ExactStructuralMatch.b=false
countAsNonTranslatable_ExactMatch.b=false
countAsNonTranslatable_ExactTextOnlyPreviousVersionMatch.b=false
countAsNonTranslatable_ExactTextOnlyUniqueIdMatch.b=false
countAsNonTranslatable_ExactTextOnly.b=false
countAsNonTranslatable_ExactRepaired.b=false
countAsNonTranslatable_FuzzyPreviousVersionMatch.b=false
countAsNonTranslatable_FuzzyUniqueIdMatch.b=false
countAsNonTranslatable_FuzzyMatch.b=false
countAsNonTranslatable_FuzzyRepaired.b=false
countAsNonTranslatable_PhraseAssembled.b=false
countAsNonTranslatable_MT.b=false
countAsNonTranslatable_Concordance.b=false</step>
</rainbowPipeline>