First usable version
This commit is contained in:
parent
dd1ab8018f
commit
536fef8d88
5 changed files with 137 additions and 20 deletions
9
config.ini
Normal file
9
config.ini
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
[Options]
|
||||||
|
rainbow_exec = /path/to/rainbow/executable
|
||||||
|
custom_filters_folder = /path/to/folder/with/custom/filters
|
||||||
|
|
||||||
|
[Filters]
|
||||||
|
mxliff = okf_autoxliff
|
||||||
|
xliff = okf_autoxliff
|
||||||
|
xlf = okf_autoxliff
|
||||||
|
xlz = okf_archive
|
|
@ -1,13 +1,12 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import io
|
|
||||||
import glob
|
import glob
|
||||||
import configparser
|
import configparser
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Function for selection lists
|
||||||
def select_item(welcome_text, items):
|
def select_item(welcome_text, items):
|
||||||
print(welcome_text+"\n")
|
print(welcome_text+"\n")
|
||||||
for i, item in enumerate(items):
|
for i, item in enumerate(items):
|
||||||
|
@ -26,45 +25,81 @@ def select_item(welcome_text, items):
|
||||||
print('Error! Please enter one number between 1-{}'.format(len(items)))
|
print('Error! Please enter one number between 1-{}'.format(len(items)))
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
# Function to get the correct filter for a file
|
||||||
def get_filter(path):
|
def get_filter(path):
|
||||||
return "filter"
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
if (extension == ""):
|
||||||
|
return extension
|
||||||
|
else:
|
||||||
|
if (extension in project_config["Filters"]):
|
||||||
|
return project_config["Filters"][extension]
|
||||||
|
else:
|
||||||
|
if (extension in config["Filters"]):
|
||||||
|
return config["Filters"][extension]
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
# Load config
|
||||||
config = configparser.ConfigParser()
|
config = configparser.ConfigParser()
|
||||||
config.read(os.path.join(sys.path[0],"config.ini"))
|
config.read(os.path.join(sys.path[0],"config.ini"))
|
||||||
rainbow = config["Options"]["rainbow_exec"]
|
rainbow = config["Options"]["rainbow_exec"]
|
||||||
|
|
||||||
work_directory = sys.argv[1:]
|
# Get work directories (ignore 0, which is this Python script)
|
||||||
|
work_directories = sys.argv[1:]
|
||||||
|
|
||||||
|
# Load project
|
||||||
list_projects = os.listdir(os.path.join(sys.path[0],"projects"))
|
list_projects = os.listdir(os.path.join(sys.path[0],"projects"))
|
||||||
project_folder = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
|
project_dir = os.path.join(sys.path[0],"projects",select_item("Available projects:",list_projects))
|
||||||
project = os.path.join(project_folder,"project.rnb")
|
project = os.path.join(project_dir,"project.rnb")
|
||||||
|
|
||||||
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_folder,"*.pln"))]
|
|
||||||
pipeline = os.path.join(project_folder,select_item("Available pipelines:",list_pipelines))
|
|
||||||
|
|
||||||
|
# Load project config
|
||||||
project_config = configparser.ConfigParser()
|
project_config = configparser.ConfigParser()
|
||||||
project_config.read(os.path.join(project_folder,"project.ini"))
|
project_config.read(os.path.join(project_dir,"project.ini"))
|
||||||
|
|
||||||
|
# Load pipeline
|
||||||
|
list_pipelines = [os.path.basename(x) for x in glob.glob(os.path.join(sys.path[0],"projects",project_dir,"*.pln"))]
|
||||||
|
pipeline = os.path.join(project_dir,select_item("Available pipelines:",list_pipelines))
|
||||||
|
|
||||||
|
# Build list of files to be processed
|
||||||
files = []
|
files = []
|
||||||
|
files_root = os.getcwd()
|
||||||
|
|
||||||
for directory in work_directory:
|
for directory in work_directories:
|
||||||
if any(Path(directory).rglob('*.*')):
|
if any(Path(directory).rglob('*.*')):
|
||||||
for translationfile in Path(directory).rglob('*.*'):
|
for translationfile in Path(directory).rglob('*.*'):
|
||||||
filter = get_filter(translationfile)
|
if (translationfile.is_file()):
|
||||||
if (filter != ""):
|
filter = get_filter(translationfile)
|
||||||
files.append((translationfile,filter))
|
if (filter != ""):
|
||||||
|
relpath = os.path.relpath(translationfile,files_root)
|
||||||
|
files.append((relpath,filter))
|
||||||
|
|
||||||
|
# Parse project
|
||||||
project_tree = ET.parse(project)
|
project_tree = ET.parse(project)
|
||||||
root = project_tree.getroot()
|
project_tree_root = project_tree.getroot()
|
||||||
|
fileset = project_tree_root.find("./fileSet[@id='1']")
|
||||||
|
fileset_root = project_tree_root.find("./fileSet[@id='1']/root")
|
||||||
|
parameters = project_tree_root.find("./parametersFolder")
|
||||||
|
|
||||||
elem = root.find("./fileSet[@id='1']")
|
# Set the parameters folder to the one defined in the config
|
||||||
|
fileset_root.set("useCustom","1")
|
||||||
|
fileset_root.text = config["Options"]["custom_filters_folder"]
|
||||||
|
|
||||||
|
# Set the root of the first fileset to the working folder
|
||||||
|
fileset_root.set("useCustom","1")
|
||||||
|
fileset_root.text = files_root
|
||||||
|
|
||||||
|
# Add files to first fileset
|
||||||
for item in files:
|
for item in files:
|
||||||
new=ET.Element("fi",attrib={"fs":item[1]})
|
new=ET.Element("fi",attrib={"fs":item[1]})
|
||||||
new.text=str(item[0])
|
new.text=str(item[0])
|
||||||
elem.append(new)
|
fileset.append(new)
|
||||||
|
|
||||||
project_tree.write("testproject.rnb",encoding="UTF-8",xml_declaration=True)
|
# Write temporary custom project file for Okapi Rainbow
|
||||||
|
project_tree.write("temp_project.rnb",encoding="UTF-8",xml_declaration=True)
|
||||||
|
|
||||||
#subprocess.run([rainbow,"-p",project,"-pln",pipeline,"-np"])
|
# Run Okapi Rainbow with the temporary project and the selected pipeline
|
||||||
|
subprocess.run([rainbow,"-p","temp_project.rnb","-pln",pipeline,"-np"])
|
||||||
|
|
||||||
|
# Remove temporary project file after executing the pipeline
|
||||||
|
os.remove("temp_project.rnb")
|
||||||
|
|
2
projects/Example project/project.ini
Normal file
2
projects/Example project/project.ini
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[Filters]
|
||||||
|
txt = okf_plaintext
|
21
projects/Example project/project.rnb
Normal file
21
projects/Example project/project.rnb
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rainbowProject version="4">
|
||||||
|
<fileSet id="1">
|
||||||
|
<root useCustom="0"></root>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet id="2">
|
||||||
|
<root useCustom="0"></root></fileSet>
|
||||||
|
<fileSet id="3">
|
||||||
|
<root useCustom="0"></root>
|
||||||
|
</fileSet>
|
||||||
|
<output>
|
||||||
|
<root use="0"></root>
|
||||||
|
<subFolder use="0"></subFolder>
|
||||||
|
<extension use="1" style="0">.out</extension>
|
||||||
|
<replace use="0" oldText="" newText=""></replace>
|
||||||
|
<prefix use="0"></prefix><suffix use="0"></suffix>
|
||||||
|
</output>
|
||||||
|
<options sourceLanguage="en" sourceEncoding="UTF-8" targetLanguage="ca" targetEncoding="UTF-8"></options>
|
||||||
|
<parametersFolder useCustom="0"></parametersFolder>
|
||||||
|
<utilities xml:spaces="preserve"></utilities>
|
||||||
|
</rainbowProject>
|
50
projects/Example project/wordcount.pln
Normal file
50
projects/Example project/wordcount.pln
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
|
||||||
|
<step class="net.sf.okapi.steps.wordcount.WordCountStep">#v1
|
||||||
|
countInBatch.b=true
|
||||||
|
countInBatchItems.b=true
|
||||||
|
countInDocuments.b=true
|
||||||
|
countInSubDocuments.b=false
|
||||||
|
countInGroups.b=false
|
||||||
|
bufferSize.i=0</step>
|
||||||
|
<step class="net.sf.okapi.steps.wordcount.CharacterCountStep">#v1
|
||||||
|
countInBatch.b=true
|
||||||
|
countInBatchItems.b=true
|
||||||
|
countInDocuments.b=true
|
||||||
|
countInSubDocuments.b=false
|
||||||
|
countInGroups.b=false
|
||||||
|
bufferSize.i=0</step>
|
||||||
|
<step class="net.sf.okapi.steps.repetitionanalysis.RepetitionAnalysisStep">#v1
|
||||||
|
fuzzyThreshold.i=100
|
||||||
|
maxHits.i=20</step>
|
||||||
|
<step class="net.sf.okapi.steps.scopingreport.ScopingReportStep">#v1
|
||||||
|
projectName=My Project
|
||||||
|
customTemplateURI=
|
||||||
|
customTemplateString=
|
||||||
|
outputPath=${rootDir}/scoping_report.html
|
||||||
|
countAsNonTranslatable_GMXProtected.b=true
|
||||||
|
countAsNonTranslatable_GMXExactMatched.b=true
|
||||||
|
countAsNonTranslatable_GMXLeveragedMatched.b=false
|
||||||
|
countAsNonTranslatable_GMXRepetitionMatched.b=false
|
||||||
|
countAsNonTranslatable_GMXFuzzyMatch.b=false
|
||||||
|
countAsNonTranslatable_GMXAlphanumericOnlyTextUnit.b=true
|
||||||
|
countAsNonTranslatable_GMXNumericOnlyTextUnit.b=true
|
||||||
|
countAsNonTranslatable_GMXMeasurementOnlyTextUnit.b=true
|
||||||
|
countAsNonTranslatable_ExactUniqueIdMatch.b=true
|
||||||
|
countAsNonTranslatable_ExactPreviousVersionMatch.b=true
|
||||||
|
countAsNonTranslatable_ExactLocalContextMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactDocumentContextMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactStructuralMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactTextOnlyPreviousVersionMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactTextOnlyUniqueIdMatch.b=false
|
||||||
|
countAsNonTranslatable_ExactTextOnly.b=false
|
||||||
|
countAsNonTranslatable_ExactRepaired.b=false
|
||||||
|
countAsNonTranslatable_FuzzyPreviousVersionMatch.b=false
|
||||||
|
countAsNonTranslatable_FuzzyUniqueIdMatch.b=false
|
||||||
|
countAsNonTranslatable_FuzzyMatch.b=false
|
||||||
|
countAsNonTranslatable_FuzzyRepaired.b=false
|
||||||
|
countAsNonTranslatable_PhraseAssembled.b=false
|
||||||
|
countAsNonTranslatable_MT.b=false
|
||||||
|
countAsNonTranslatable_Concordance.b=false</step>
|
||||||
|
</rainbowPipeline>
|
Loading…
Add table
Reference in a new issue