-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSMT training commands and auxiliaries.txt
259 lines (173 loc) · 33.7 KB
/
SMT training commands and auxiliaries.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
###### Placeholder statistics calculation
/usr/bin/perl -ne 'BEGIN{my$PH=my$PHL=my$t=my$l=0}++$l;++$PHL if/\{\d+\}/;$PH+=()=/(\{\d+\})/g;$t+=scalar(split/ /,$_);END{print"Total PHs: $PH (out of $t: ".($PH/$t*100)."%)\nLines with PHs: $PHL (out of $l: ".($PHL/$l*100)."%)\n"}'
###### Main engine training data collection/preparation
### Edit by Samuel on 25 September 2014
### Note: Replace XXX in user and password params by login credentials for the WorldServer DB
###
# (1) Get a list of all WorldServer TMs, including meta data, and store them in 'TMselection.txt'
sh /OptiBay/ADSK_Software/fetchSegmentsFromWorldServerTMs.sh -user=XXX -password=XXX > TMselection.txt
# (2) Edit TMselection.txt: Prefix lines with # for TMs to be excluded
# (3) Fetch all segments from the selected WorldServer TMs and aggregate them into one bzipped file per language in the 'TMs.20xx.xx.xx' folder
mkdir TMs.20xx.xx.xx
sh /OptiBay/ADSK_Software/fetchSegmentsFromWorldServerTMs.sh -user=XXX -password=XXX -selection=TMselection.txt -targetdir=TMs.20xx.xx.xx
###
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_CHS_ALL.bz2 -current=TMs.20xx.xx.xx/segments.zh_CN.bz2 -output=tempTMs/TM_CHS_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_CHT_ALL.bz2 -current=TMs.20xx.xx.xx/segments.zh_TW.bz2 -output=tempTMs/TM_CHT_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_CSY_ALL.bz2 -current=TMs.20xx.xx.xx/segments.cs_CZ.bz2 -output=tempTMs/TM_CSY_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_DNK_ALL.bz2 -current=TMs.20xx.xx.xx/segments.da_DK.bz2 -output=tempTMs/TM_DNK_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_DEU_ALL.bz2 -current=TMs.20xx.xx.xx/segments.de.bz2 -output=tempTMs/TM_DEU_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_ENG_ALL.bz2 -current=TMs.20xx.xx.xx/segments.en_GB.bz2 -output=tempTMs/TM_ENG_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_ESP_ALL.bz2 -current=TMs.20xx.xx.xx/segments.es.bz2 -output=tempTMs/TM_ESP_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_FIN_ALL.bz2 -current=TMs.20xx.xx.xx/segments.fi_FI.bz2 -output=tempTMs/TM_FIN_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_FRA_ALL.bz2 -current=TMs.20xx.xx.xx/segments.fr.bz2 -output=tempTMs/TM_FRA_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_HUN_ALL.bz2 -current=TMs.20xx.xx.xx/segments.hu_HU.bz2 -output=tempTMs/TM_HUN_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_ITA_ALL.bz2 -current=TMs.20xx.xx.xx/segments.it.bz2 -output=tempTMs/TM_ITA_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_JPN_ALL.bz2 -current=TMs.20xx.xx.xx/segments.ja_JP.bz2 -output=tempTMs/TM_JPN_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_KOR_ALL.bz2 -current=TMs.20xx.xx.xx/segments.ko_KR.bz2 -output=tempTMs/TM_KOR_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_NLD_ALL.bz2 -current=TMs.20xx.xx.xx/segments.nl_NL.bz2 -output=tempTMs/TM_NLD_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_NOR_ALL.bz2 -current=TMs.20xx.xx.xx/segments.nb_NO.bz2 -output=tempTMs/TM_NOR_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_PLK_ALL.bz2 -current=TMs.20xx.xx.xx/segments.pl_PL.bz2 -output=tempTMs/TM_PLK_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_PTB_ALL.bz2 -current=TMs.20xx.xx.xx/segments.pt_BR.bz2 -output=tempTMs/TM_PTB_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_RUS_ALL.bz2 -current=TMs.20xx.xx.xx/segments.ru_RU.bz2 -output=tempTMs/TM_RUS_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_SWE_ALL.bz2 -current=TMs.20xx.xx.xx/segments.sv_SE.bz2 -output=tempTMs/TM_SWE_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=../fy13_training/2012.05.09\ Data\ Dump/TM_VIT_ALL.bz2 -current=TMs.20xx.xx.xx/segments.vi_VN.bz2 -output=tempTMs/TM_VIT_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.ar_SA.bz2 -current=TMs.20xx.xx.xx/segments.ar_SA.bz2 -output=tempTMs/TM_ARA_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.el_GR.bz2 -current=TMs.20xx.xx.xx/segments.el_GR.bz2 -output=tempTMs/TM_ELL_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.en_AU.bz2 -current=TMs.20xx.xx.xx/segments.en_AU.bz2 -output=tempTMs/TM_ENA_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.fr_BE.bz2 -current=TMs.20xx.xx.xx/segments.fr_BE.bz2 -output=tempTMs/TM_FRB_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.fr_CA.bz2 -current=TMs.20xx.xx.xx/segments.fr_CA.bz2 -output=tempTMs/TM_FRC_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.he_IL.bz2 -current=TMs.20xx.xx.xx/segments.he_IL.bz2 -output=tempTMs/TM_HEB_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.hi_IN.bz2 -current=TMs.20xx.xx.xx/segments.hi_IN.bz2 -output=tempTMs/TM_HIN_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.id_ID.bz2 -current=TMs.20xx.xx.xx/segments.id_ID.bz2 -output=tempTMs/TM_IND_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.es_MX.bz2 -current=TMs.20xx.xx.xx/segments.es_MX.bz2 -output=tempTMs/TM_LAS_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.pt_PT.bz2 -current=TMs.20xx.xx.xx/segments.pt_PT.bz2 -output=tempTMs/TM_PTG_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.ro_RO.bz2 -current=TMs.20xx.xx.xx/segments.ro_RO.bz2 -output=tempTMs/TM_ROM_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.sk_SK.bz2 -current=TMs.20xx.xx.xx/segments.sk_SK.bz2 -output=tempTMs/TM_SLK_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.th_TH.bz2 -current=TMs.20xx.xx.xx/segments.th_TH.bz2 -output=tempTMs/TM_THA_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=TMs.20xx.xx.xx/segments.tr_TR.bz2 -current=TMs.20xx.xx.xx/segments.tr_TR.bz2 -output=tempTMs/TM_TUR_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=tempTMs/TM_PTB_ALL.bz2 -current=TMs.20xx.xx.xx/TM_PTG_ALL.bz2 -output=tempTMs/TM_PT\*_ALL.bz2
/OptiBay/ADSK_Software/updateTMs.pl -baseLine=sw_corpus/corpus.sw.ptb.bz2 -current=sw_corpus/corpus.sw.ptg.bz2 -output=sw_corpus/corpus.sw.pt\*.bz2
/OptiBay/ADSK_Software/generateCorpora.pl -threads=16
/OptiBay/ADSK_Software/generateTMX.pl -corpusPath=corpora -outputPath=TMXs -threads=8 -gzip
##Training
#EN-CS
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CSY/corpus -source=en -target=cs -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >CSY.a.log 2>&1 &
#EN-DA
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/DNK/corpus -source=en -target=da -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >DNK.a.log 2>&1 &
#EN-DE
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/DEU/corpus -source=en -target=de -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >DEU.a.log 2>&1 &
#EN-EN_GB
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ENG/corpus -source=en -target=en_gb -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >ENG.a.log 2>&1 &
#EN-ES
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ESP/corpus -source=en -target=es -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >ESP.a.log 2>&1 &
#EN-FI
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/FIN/corpus -source=en -target=fi -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >FIN.a.log 2>&1 &
#EN-FR
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/FRA/corpus -source=en -target=fr -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >FRA.a.log 2>&1 &
#EN-HU
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/HUN/corpus -source=en -target=hu -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >HUN.a.log 2>&1 &
#EN-IT
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ITA/corpus -source=en -target=it -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >ITA.a.log 2>&1 &
#EN-JP
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/JPN/corpus -source=en -target=jp -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/jp-0.3.0-utf8-1.mod" -reorder_cmd="/usr/bin/perl -sw /OptiBay/ADSK_Software/reorder_jpn.pl -parser_cmd=\"/usr/bin/perl -sw /OptiBay/ADSK_Software/multiParser.pl -threads=8 -parser_cmd=\\\"/OptiBay/ADSK_Software/segmentation/Hidenori/OPENNLP/1.5/opennlp-tools-1.5.0/bin/opennlp Parser /OptiBay/ADSK_Software/segmentation/Hidenori/OPENNLP/Data/en-parser-chunking.zip 2>LOG/opennlp.log\\\"\"" >JPN.a.log 2>&1 &
#EN-KO
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/KOR/corpus -source=en -target=ko -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >KOR.a.log 2>&1 &
#EN-NL
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/NLD/corpus -source=en -target=nl -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >NLD.a.log 2>&1 &
#EN-NO
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/NOR/corpus -source=en -target=no -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >NOR.a.log 2>&1 &
#EN-PL
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/PLK/corpus -source=en -target=pl -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >PLK.a.log 2>&1 &
#EN-PT_BR
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/PTB/corpus -source=en -target=pt_br -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >PTB.a.log 2>&1 &
#EN-PT_PT
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/PTG/corpus -source=en -target=pt_pt -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >PTG.a.log 2>&1 &
#EN-RU
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/RUS/corpus -source=en -target=ru -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >RUS.a.log 2>&1 &
#EN-SV
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/SWE/corpus -source=en -target=sv -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >SWE.a.log 2>&1 &
#EN-TR
#/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/TUR/corpus -source=en -target=tr -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >TUR.a.log 2>&1 &
#EN-VI
#/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/VIT/corpus -source=en -target=vi -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >VIT.a.log 2>&1 &
#EN-ZH_HANS
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CHS/corpus -source=en -target=zh_hans -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/lcmc-0.3.0-1.mod" >CHS.a.log 2>&1 &
#EN-ZH_HANT
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CHT/corpus -source=en -target=zh_hant -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/lcmc-0.3.0-1.mod" >CHT.a.log 2>&1 &
#XX-EN
#/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ENU/corpus -source=xx -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -first_step=2 -last_step=1 -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 >ENU.a.log 2>&1 &
#CS-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CSY/corpus -source=cs -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toCSY.a.log 2>&1 &
#DE-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/DEU/corpus -source=de -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toDEU.a.log 2>&1 &
#ES-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ESP/corpus -source=es -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toESP.a.log 2>&1 &
#FR-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/FRA/corpus -source=FR -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toFRA.a.log 2>&1 &
#HU-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/HUN/corpus -source=hu -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toHUN.a.log 2>&1 &
#IT-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/ITA/corpus -source=it -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toITA.a.log 2>&1 &
#JP-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/JPN/corpus -source=jp -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/jp-0.3.0-utf8-1.mod" -archive=### >toJPN.a.log 2>&1 &
#KO-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/KOR/corpus -source=ko -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toKOR.a.log 2>&1 &
#PL-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/PLK/corpus -source=pl -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toPLK.a.log 2>&1 &
#PT_BR-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/PTB/corpus -source=pt_br -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toPTB.a.log 2>&1 &
#RU-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/RUS/corpus -source=ru -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -archive=### >toRUS.a.log 2>&1 &
#ZH_HANS-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CHS/corpus -source=zh_hans -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/lcmc-0.3.0-1.mod" -archive=### >toCHS.a.log 2>&1 &
#ZH_HANT-EN
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy16_training/corpora/CHT/corpus -source=zh_hant -target=en -lm_path=lm -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -build_lm=/sw/bin/srilm/macosx -build_recaser=./recaser -device=disk1 -fiscal_year=fy16 -seg_model="/OptiBay/ADSK_Software/segmentation/kytea-models/lcmc-0.3.0-1.mod" -archive=### >toCHT.a.log 2>&1 &
#################################
## Per-Product Trainings for Ambiguous Terminology Extraction
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/CSY/corpus -source=en -target=cs -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >CSY.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/ESP/corpus -source=en -target=es -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >ESP.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/HUN/corpus -source=en -target=hu -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >HUN.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/KOR/corpus -source=en -target=ko -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >KOR.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/PLK/corpus -source=en -target=pl -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >PLK.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/PTB/corpus -source=en -target=pt_br -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >PTB.sw.log 2>&1 &
/OptiBay/ADSK_Software/train.pl -corpus=/Volumes/LaCie_Work/Autodesk/fy15_analysis/corpora/PTG/corpus -source=en -target=pt_pt -lm_path=lm -build_lm=/sw/bin/srilm/macosx -moses_path=/OptiBay/ADSK_Software/moses-trunk/ -bin_dir=/sw/bin/GIZA++/ -log_dir=LOG -device=disk1 -fiscal_year=fy15 -binarise=0 -perProduct >PTG.sw.log 2>&1 &
zcat fy15_EN-CS_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=cs |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-CS_a/model/ambiguousTerms.json |bzip2 >fy15_EN-CS_a/model/ambiguousTerms.log.bz2
zcat fy15_EN-HU_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=hu |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-HU_a/model/ambiguousTerms.json |bzip2 >fy15_EN-HU_a/model/ambiguousTerms.log.bz2
zcat fy15_EN-KO_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=ko |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-KO_a/model/ambiguousTerms.json |bzip2 >fy15_EN-KO_a/model/ambiguousTerms.log.bz2
zcat fy15_EN-PL_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=pl |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-PL_a/model/ambiguousTerms.json |bzip2 >fy15_EN-PL_a/model/ambiguousTerms.log.bz2
zcat fy15_EN-PT_BR_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=pt_br |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-PT_BR_a/model/ambiguousTerms.json |bzip2 >fy15_EN-PT_BR_a/model/ambiguousTerms.log.bz2
zcat fy15_EN-PT_PT_a/model/phrase-table.gz |/OptiBay/ADSK_Software/filterForSorting.pl -sourceLanguage=en -targetLanguage=pt_pt |LC_ALL=C sort -k 1,1 -k 12,12 -k 9nr,9 -S 1024M --parallel=8 --field-separator=\| |/OptiBay/ADSK_Software/conflateTargets.pl |/OptiBay/ADSK_Software/calculateAmbiguities.pl -output=fy15_EN-PT_PT_a/model/ambiguousTerms.json |bzip2 >fy15_EN-PT_PT_a/model/ambiguousTerms.log.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-CS_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-CS_a/corpus/corpus.clean.cs -ambiguousStrings=fy15_EN-CS_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-CS_a/model/ambiguousTerms.occurrence.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-HU_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-HU_a/corpus/corpus.clean.hu -ambiguousStrings=fy15_EN-HU_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-HU_a/model/ambiguousTerms.occurrence.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-KO_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-KO_a/corpus/corpus.clean.ko -ambiguousStrings=fy15_EN-KO_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-KO_a/model/ambiguousTerms.occurrence.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-PL_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-PL_a/corpus/corpus.clean.pl -ambiguousStrings=fy15_EN-PL_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-PL_a/model/ambiguousTerms.occurrence.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-PT_BR_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-PT_BR_a/corpus/corpus.clean.pt_br -ambiguousStrings=fy15_EN-PT_BR_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-PT_BR_a/model/ambiguousTerms.occurrence.bz2
/OptiBay/ADSK_Software/occurrenceCounter.pl -sourceCorpus=fy15_EN-PT_PT_a/corpus/corpus.clean.en -targetCorpus=fy15_EN-PT_PT_a/corpus/corpus.clean.pt_pt -ambiguousStrings=fy15_EN-PT_PT_a/model/ambiguousTerms.json -threads=8 |bzip2 >fy15_EN-PT_PT_a/model/ambiguousTerms.occurrence.bz2
bzcat corpora/CSY/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/ESP/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/HUN/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/ITA/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/KOR/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/PLK/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/PTB/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
bzcat corpora/PTG/corpus.en.bz2 |perl -MEncode -ne 'use utf8;chomp;(undef,$p)=split/◊/,decode("utf-8",$_),-1;++$ps{$p};print STDERR "."unless$.%10000;print STDERR "$."unless$.%500000;END{print"\n";foreach$p(sort{$ps{$b}<=>$ps{$a}}keys%ps){print encode"utf-8","$p\t$ps{$p}\n"}}'
#################################
cat chs_*1.csv |./create_samples.pl -words=`cat chs_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=37 -output=productivity_QA_sample_chs.csv
cat deu_*1.csv |./create_samples.pl -words=`cat deu_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=30 -output=productivity_QA_sample_deu.csv
cat esp_*1.csv |./create_samples.pl -words=`cat esp_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=27 -output=productivity_QA_sample_esp.csv
cat fra_*1.csv |./create_samples.pl -words=`cat fra_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=22 -output=productivity_QA_sample_fra.csv
cat ita_*1.csv |./create_samples.pl -words=`cat ita_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=20 -output=productivity_QA_sample_ita.csv
cat jpn_*1.csv |./create_samples.pl -words=`cat jpn_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=35 -output=productivity_QA_sample_jpn.csv
cat kor_*1.csv |./create_samples.pl -words=`cat kor_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=19 -output=productivity_QA_sample_kor.csv
cat plk_*1.csv |./create_samples.pl -words=`cat plk_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=27 -output=productivity_QA_sample_plk.csv
cat ptb_*1.csv |./create_samples.pl -words=`cat ptb_*1.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -percentage=22 -output=productivity_QA_sample_ptb.csv
cat kor_*2.csv |./recreate_samples.pl -words=`cat kor_*2.csv|/usr/bin/perl -ne 'BEGIN{my$c=0}my(undef,undef,$w)=split//,$_;$c+=$w;END{print"$c\n"}'` -output=productivity_QA_sample_kor_raw.csv -data=productivity_QA_sample_kor.csv
cat corpus.en.txt |/Users/ventzi/courses/EuroMatrixPlus/software/moses-2010-04-08/scripts/scripts-20100409-0412/scripts/lowercase.perl |/Users/ventzi/courses/EuroMatrixPlus/software/tokenise.pl en |/usr/bin/perl -pe 's/\|/│/g;s/</﹤/g;s/>/﹥/g;s/\(/﹙/g;s/\)/﹚/g;' |moses-ventzi -threads 4 -f /OptiBay/Autodesk/fy13_EN-ZH_HANT_a/model/moses.ini 2>/dev/null |moses-ventzi -f /OptiBay/Autodesk/fy13_EN-ZH_HANT_a/recaser/moses.ini 2>/dev/null |/usr/bin/perl -pe 's/│/|/g;s/﹤/</g;s/﹥/>/g;s/﹙/(/g;s/﹚/)/g;' |/usr/bin/perl -pe 'BEGIN{my$c=0}$c+=s/\|UNK\|UNK\|UNK//g;s/ +$//;END{print STDERR "unknowns: $c\n"}' |/Users/ventzi/courses/EuroMatrixPlus/software/detokenise.pl zh_hant >corpus.zh_hant.txt
cat LegacyTranslations/AS/Russian.txt |perl -MEncode -ne 'use utf8;chomp;($s,$t)=split/\t/,decode"utf-8",$_;$s=~s/^\s+|\s+$//g;$t=~s/^\s+|\s+$//g;print encode"utf-8","$s$tADSTPR◊÷\n"' >EN-RU.Graitec.AS
cat LegacyTranslations/AC/Cesky.txt |perl -MEncode -ne 'use utf8;chomp;($s,$t)=split/\t/,decode"utf-8",$_;$s=~s/^\s+|\s+$//g;$t=~s/^\s+|\s+$//g;print encode"utf-8","$s$tADSTCP◊÷\n"' >EN-CS.Graitec.AC
cat AC-TRUFileENCZ.txt |perl -MEncode -e 'use utf8;$/="</TrU>";while(<>){chomp;$l=decode"utf-8",$_;$l=~s/\\{1.*?1\\}//g;$l=~s/\\endash ?/–/g;$l=~s/\\lquote ?/‘/g;$l=~s/\\rquote ?/’/g;$l=~s/\\ldblquote ?/“/g;$l=~s/\\rdblquote ?/”/g;$l=~s/\\~/ /g;$l=~s/\{\\pict\}/◊0◊/g;$l=~s/\}/◊0◊/g;$l=~s/\{\\[\\\w]+ /◊0◊/g;$l=~s/\\b0? /◊0◊/g;($sl,$st,$tl,$tt)=$l=~/<Seg L=(.*?)>(.*)$/gm;next if$st eq"÷"||$tt eq"÷";$sl eq"EN-US"?($s=$st,$t=$tt):($s=$tt,$t=$st);$s=~s/^\s+|\s+$//g;$t=~s/^\s+|\s+$//g;$p=0;$s=~s/◊0◊/"{".++$p."}"/ge;$p=0;$t=~s/◊0◊/"{".++$p."}"/ge;print encode"utf-8","$s$tADSTCP◊÷\n"}' >>EN-CS.Graitec.AC
moses_server.ventzi.pl -engine=fy13_EN-JP_b -hostname=neucmslinux -hostport=2029 -preprocess="/usr/bin/perl -pe 'BEGIN{$|=1}\$_=lc' |tokenise.pl en |reorder_jpn.pl -parser_cmd=\"opennlp Parser /local/cms/opennlp/data/en-parser-chunking.bin 2>LOG/opennlp.trans.log\" |/usr/bin/perl -pe 'BEGIN{$|=1}s/\|/│/g;s/</﹤/g;s/>/﹥/g;s/\(/﹙/g;s/\)/﹚/g;'" -postprocess="/usr/bin/perl -pe 'BEGIN{$|=1}s/│/|/g;s/﹤/</g;s/﹥/>/g;s/﹙/(/g;s/﹚/)/g;' |detokenise.pl jp"
cat URLList.2013.01.07.txt |perl -MEncode -we 'use utf8;$/=encode"utf-8","◊÷\n";%lm=(CHS=>"zh_hans",CHT=>"zh_hant",CSY=>"cs",DEU=>"de",ENG=>"en_uk",ESP=>"es",FIN=>"fi",FRA=>"fr",HUN=>"hu",ITA=>"it",JPN=>"jp",KOR=>"ko",NLD=>"nl",PLK=>"pl",PTB=>"pt_br",RUS=>"ru");while($l=decode"utf-8",scalar<STDIN>){chomp$l;(undef,$s,$t,$lang)=split//,$l;next if$t eq"null"||$lang eq"ALL";$s=~s/([\\\@])/\\$1/g;$t=~s/([\\\@])/\\$1/g;$url{$s}->{$lang}=$t}print"\%urls = (\n";foreach$s(sort{$a cmp $b}keys%url){print encode"utf-8","\t\"$s\" => {\n";foreach$l(sort{$a cmp $b}keys %{$url{$s}}){die"Unknown language: $l\n"unless defined$lm{$l};print encode"utf-8","\t\t\"".($lm{$l})."\" => \"$url{$s}->{$l}\",\n"}print encode"utf-8","\t},\n"}print"),\n"' >URLList.2013.01.07.pl
for FOLDER in `ls .`; do for FILE in `ls $FOLDER`; do echo -n "$FOLDER$FILE: "; bzcat $FOLDER$FILE |wc -l; done; done
for FILE in `ls *bz2`; do echo -n "$FILE: "; bzcat $FILE |wc -l; done
cat servers.txt |perl -ne '($s)=split" ";unless($s=~/^\#/||$s=~/^\s*$/){system"rsync -azyvv moses_server.ventzi.pl cmsuser\@$s:/local/cms/bin/. "}'