diff --git a/.travis.yml b/.travis.yml index 0b658be..85d6d14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,6 +32,11 @@ jobs: script: - tox -e shellcheck - name: Test + install: + - pip install tox + script: + - tox -e test + - name: Integration Test service: - docker addons: @@ -41,18 +46,13 @@ jobs: install: - pip install tox script: - - tox -e test + - bash integrationTestPrepration.sh + - tox -e integrationTest + - cat D-TL01-005.txt + - cat D-TL01-005.csv - > docker run --rm -e SONARQUBE_SCANNER_PARAMS -e SONAR_TOKEN="${SONAR_TOKEN}" -v "`pwd`:/usr/src" sonarsource/sonar-scanner-cli - - name: Integration Test - install: - - pip install -r requirements.txt - script: - - bash integrationTestPrepration.sh - - python truku.py D-TL01-005.xlsx - - cat D-TL01-005.txt - - cat D-TL01-005.csv diff --git a/README.md b/README.md index 1b7af1f..89d3b62 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,14 @@ source venv/bin/activate pip install -r requirements.txt ``` -## 轉錄音稿 +## 轉Truku錄音稿 從Google Sheet下載錄音稿,假設下載後檔名是`1-465|D-TL01-005.xlsx` ```bash cd Patas source venv/bin/activate -python truku.py ~/Downloads/1-465|D-TL01-005.xlsx +python patas.py Truku ~/Downloads/1-465|D-TL01-005.xlsx ``` 執行完會產生`1-465|D-TL01-005.xlsx.txt`檔案。 @@ -33,7 +33,19 @@ python truku.py ~/Downloads/1-465|D-TL01-005.xlsx ```bash cd Patas source venv/bin/activate -python pangcah.py ~/Downloads/D-PV01|1\~534\ \(C-PL002\).xlsx +python patas.py Pangcah ~/Downloads/D-PV01|1\~534\ \(C-PL002\).xlsx ``` -執行完會產生`D-PV01|1~534 (C-PL002).xlsx.txt`檔案。 +執行完會產生`D-PV01|1~534 (C-PL002).txt`檔案。 + +### Kari Seediq + +從Google Sheet下載錄音稿,假設下載後檔名是`D-SL07-001|合成音稿Sample.xlsx` + +```bash +cd Patas +source venv/bin/activate +python patas.py Seediq ~/Downloads/D-SL07-001|合成音稿Sample.xlsx +``` + +執行完會產生`D-SL07-001|合成音稿Sample.txt`檔案。 diff --git a/integrationTestPrepration.sh b/integrationTestPrepration.sh index 317fbae..6cdc8d0 100644 --- a/integrationTestPrepration.sh +++ b/integrationTestPrepration.sh @@ -1,3 +1,5 @@ #!/bin/bash cp tests/格式檢查/1-4|D-TL01-005.xlsx D-TL01-005.xlsx +cp 'tests/格式檢查/D-PV01|1~353 (C-PL002)sample.xlsx' D-PV01.xlsx +cp tests/格式檢查/D-SL07-001|合成音稿Sample.xlsx D-SL07-001.xlsx diff --git a/truku.py b/kari.py similarity index 59% rename from truku.py rename to kari.py index 25ef41b..fd68666 100644 --- a/truku.py +++ b/kari.py @@ -1,45 +1,45 @@ import pandas import re from os.path import basename -from argparse import ArgumentParser from csv import DictWriter -from pathlib import Path -def xlsx轉錄音稿(xlsx檔名): +def xlsx轉錄音稿kari(xlsx檔名, kari): 語料名 = 找語料名(xlsx檔名) 結果 = [] 錄音編號 = None for 篇名, dataframe in 讀xlsx資料(xlsx檔名): + print(篇名) 結果.append(f"【{語料名}-{篇名}】") for 行 in dataframe: if 錄音編號 is not None and 行.錄音編號 != 錄音編號 + 1: raise ValueError(f"「{篇名}」裡的錄音編號{行.錄音編號}應該要是{錄音編號+1}") 結果.append(f'{行.錄音編號}') - 結果.append(行.太魯閣語.strip()) + 結果.append(getattr(行, kari).strip()) 結果.append('') 錄音編號 = 行.錄音編號 return 結果[:-1] -def xlsx轉csv(xlsx檔名, csv檔名): +def xlsx轉csv(xlsx檔名, kari, csv檔名): with open(csv檔名, 'wt') as 檔案: writer = DictWriter(檔案, fieldnames=[ - '錄音編號', '篇名', '太魯閣語', '華語', + '錄音編號', '篇名', kari, '華語', ]) writer.writeheader() for 篇名, dataframe in 讀xlsx資料(xlsx檔名): + print(篇名) for 行 in dataframe: writer.writerow({ '錄音編號': 行.錄音編號, '篇名': 篇名.strip(), - '太魯閣語': 行.太魯閣語.strip(), + kari: getattr(行, kari).strip(), '華語': 行.華語.strip(), }) def 找語料名(xlsx檔名): - return re.search(r'D-[STP][LVTR]\d\d-\d\d\d', basename(xlsx檔名)).group(0) + return re.search(r'D-[STP][LVTR]\d\d(-\d\d\d)?', basename(xlsx檔名)).group(0) def 讀xlsx資料(xlsx檔名): @@ -48,19 +48,3 @@ def 讀xlsx資料(xlsx檔名): sheet_name=None, ).items(): yield 篇名, dataframe.fillna('').itertuples() - - -def main(): - parser = ArgumentParser(description='San-sing su-pio.') - parser.add_argument('xlsx檔名', type=Path) - args = parser.parse_args() - txt檔名 = args.xlsx檔名.parent / (args.xlsx檔名.stem + '.txt') - with open(txt檔名, 'wt') as 檔案: - for 行 in xlsx轉錄音稿(args.xlsx檔名): - print(行, file=檔案) - csv檔名 = args.xlsx檔名.parent / (args.xlsx檔名.stem + '.csv') - xlsx轉csv(args.xlsx檔名, csv檔名) - - -if __name__ == '__main__': - main() diff --git a/pangcah.py b/pangcah.py deleted file mode 100644 index b2dc911..0000000 --- a/pangcah.py +++ /dev/null @@ -1,41 +0,0 @@ -import pandas -import re -from os.path import basename -from argparse import ArgumentParser - - -def xlsx轉錄音稿(xlsx檔名): - 語料名 = 找語料名(xlsx檔名) - 結果 = [] - 錄音編號 = None - for 篇名, dataframe in pandas.read_excel( - xlsx檔名, engine='openpyxl', - sheet_name=None, - ).items(): - 結果.append(f"【{語料名}-{篇名}】") - print("篇名", 篇名) - for 行 in dataframe.fillna('').itertuples(): - if 錄音編號 is not None and 行.錄音編號 != 錄音編號 + 1: - raise ValueError(f"「{篇名}」裡的錄音編號{行.錄音編號}應該要是{錄音編號+1}") - 結果.append(f'{行.錄音編號}') - 結果.append(行.阿美語.strip()) - 結果.append('') - 錄音編號 = 行.錄音編號 - return 結果[:-1] - - -def 找語料名(xlsx檔名): - return re.search(r'D-[STP][LVTR]\d\d', basename(xlsx檔名)).group(0) - - -def main(): - parser = ArgumentParser(description='產生錄音稿') - parser.add_argument('xlsx檔名') - args = parser.parse_args() - with open(args.xlsx檔名 + '.txt', 'wt') as 檔案: - for 行 in xlsx轉錄音稿(args.xlsx檔名): - print(行, file=檔案) - - -if __name__ == '__main__': - main() diff --git a/patas.py b/patas.py new file mode 100644 index 0000000..6095ce3 --- /dev/null +++ b/patas.py @@ -0,0 +1,30 @@ +from argparse import ArgumentParser +from pathlib import Path +from kari import xlsx轉錄音稿kari, xlsx轉csv + + +def main(): + parser = ArgumentParser(description='產生錄音稿') + parser.add_argument('kari', choices=['Truku', 'Pangcah', 'Seediq']) + parser.add_argument('xlsx檔名', type=Path) + args = parser.parse_args() + if args.kari == 'Truku': + kari = '太魯閣語' + elif args.kari == 'Pangcah': + kari = '阿美語' + elif args.kari == 'Seediq': + kari = 'Tgdaya' + else: + raise ValueError() + + txt檔名 = args.xlsx檔名.parent / (args.xlsx檔名.stem + '.txt') + with open(txt檔名, 'wt') as 檔案: + for 行 in xlsx轉錄音稿kari(args.xlsx檔名, kari): + print(行, file=檔案) + + csv檔名 = args.xlsx檔名.parent / (args.xlsx檔名.stem + '.csv') + xlsx轉csv(args.xlsx檔名, kari, csv檔名) + + +if __name__ == '__main__': + main() diff --git a/tests/testPangcah.py b/tests/testPangcah.py index 0b9928f..f721157 100644 --- a/tests/testPangcah.py +++ b/tests/testPangcah.py @@ -1,7 +1,7 @@ from unittest import TestCase from os.path import join, abspath, dirname -from pangcah import 找語料名 -from pangcah import xlsx轉錄音稿 +from kari import 找語料名 +from kari import xlsx轉錄音稿kari class 語料名試驗(TestCase): @@ -20,13 +20,13 @@ def test格式檢查(self): abspath(dirname(__file__)), '格式檢查', 'D-PV01|1~353 (C-PL002)sample.xlsx' ) - 錄音稿檔名 = join( + 答案錄音稿檔名 = join( abspath(dirname(__file__)), '格式檢查', 'D-PV01|1~353 (C-PL002)sample.txt' ) - 結果 = xlsx轉錄音稿(xlsx檔名) + 結果 = xlsx轉錄音稿kari(xlsx檔名, '阿美語') 答案 = [] - with open(錄音稿檔名) as 檔案: + with open(答案錄音稿檔名) as 檔案: for 行 in 檔案.readlines(): 答案.append(行.rstrip()) self.assertEqual(結果, 答案) diff --git a/tests/testSeediq.py b/tests/testSeediq.py new file mode 100644 index 0000000..9001ce7 --- /dev/null +++ b/tests/testSeediq.py @@ -0,0 +1,32 @@ +from unittest import TestCase +from os.path import join, abspath, dirname +from kari import 找語料名 +from kari import xlsx轉錄音稿kari + + +class 語料名試驗(TestCase): + def test格式檢查(self): + xlsx檔名 = join( + abspath(dirname(__file__)), + '格式檢查', 'D-SL07-001|合成音稿Sample.xlsx' + ) + self.assertEqual(找語料名(xlsx檔名), 'D-SL07-001', xlsx檔名) + + +class xlsx轉錄音稿試驗(TestCase): + def test格式檢查(self): + self.maxDiff = None + xlsx檔名 = join( + abspath(dirname(__file__)), + '格式檢查', 'D-SL07-001|合成音稿Sample.xlsx' + ) + 答案錄音稿檔名 = join( + abspath(dirname(__file__)), + '格式檢查', 'D-SL07-001|合成音稿Sample.txt' + ) + 結果 = xlsx轉錄音稿kari(xlsx檔名, 'Tgdaya') + 答案 = [] + with open(答案錄音稿檔名) as 檔案: + for 行 in 檔案.readlines(): + 答案.append(行.rstrip()) + self.assertEqual(結果, 答案) diff --git "a/tests/test\350\252\236\346\226\231\345\220\215.py" "b/tests/test\350\252\236\346\226\231\345\220\215.py" index 5764b8d..77633be 100644 --- "a/tests/test\350\252\236\346\226\231\345\220\215.py" +++ "b/tests/test\350\252\236\346\226\231\345\220\215.py" @@ -1,6 +1,6 @@ from unittest import TestCase from os.path import join, abspath, dirname -from truku import 找語料名 +from kari import 找語料名 class 語料名試驗(TestCase): diff --git "a/tests/test\350\275\211csv.py" "b/tests/test\350\275\211csv.py" index 297e1f3..0f5b30b 100644 --- "a/tests/test\350\275\211csv.py" +++ "b/tests/test\350\275\211csv.py" @@ -1,7 +1,7 @@ from unittest import TestCase from os.path import join, abspath, dirname -from truku import xlsx轉csv from csv import DictReader +from kari import xlsx轉csv class xlsx轉csv試驗(TestCase): @@ -13,7 +13,7 @@ def test格式檢查(self): csv檔名 = join( abspath(dirname(__file__)), '格式檢查', '1-4|D-TL01-005.csv' ) - xlsx轉csv(xlsx檔名, csv檔名) + xlsx轉csv(xlsx檔名, '太魯閣語', csv檔名) 答案 = { '錄音編號': '4', '篇名': '我眺望山嶺時', @@ -25,4 +25,4 @@ def test格式檢查(self): '華語': '從小至今,不知為何總是喜歡跳望山嶺!到底是何因由呢?', } with open(csv檔名) as 檔案: - self.assertIn(答案, list(DictReader(檔案))) + self.assertEqual(答案, list(DictReader(檔案))[3]) diff --git "a/tests/test\350\275\211\351\214\204\351\237\263\347\250\277.py" "b/tests/test\350\275\211\351\214\204\351\237\263\347\250\277.py" index 3b83f95..01f2ecd 100644 --- "a/tests/test\350\275\211\351\214\204\351\237\263\347\250\277.py" +++ "b/tests/test\350\275\211\351\214\204\351\237\263\347\250\277.py" @@ -1,6 +1,6 @@ from unittest import TestCase from os.path import join, abspath, dirname -from truku import xlsx轉錄音稿 +from kari import xlsx轉錄音稿kari class xlsx轉錄音稿試驗(TestCase): @@ -12,7 +12,7 @@ def test格式檢查(self): 錄音稿檔名 = join( abspath(dirname(__file__)), '格式檢查', '1-4|D-TL01-005.txt' ) - 結果 = xlsx轉錄音稿(xlsx檔名) + 結果 = xlsx轉錄音稿kari(xlsx檔名, '太魯閣語') 答案 = [] with open(錄音稿檔名) as 檔案: for 行 in 檔案.readlines(): @@ -25,4 +25,4 @@ def test編號錯誤(self): abspath(dirname(__file__)), '錄音編號錯誤', 'D-TL01-005.xlsx' ) with self.assertRaises(ValueError): - xlsx轉錄音稿(xlsx檔名) + xlsx轉錄音稿kari(xlsx檔名, '太魯閣語') diff --git "a/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-PV01\357\275\2341~353 (C-PL002)sample.xlsx" "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-PV01\357\275\2341~353 (C-PL002)sample.xlsx" index 6849943..e8afa6a 100644 Binary files "a/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-PV01\357\275\2341~353 (C-PL002)sample.xlsx" and "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-PV01\357\275\2341~353 (C-PL002)sample.xlsx" differ diff --git "a/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.txt" "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.txt" new file mode 100644 index 0000000..67edb95 --- /dev/null +++ "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.txt" @@ -0,0 +1,13 @@ +【D-SL07-001-1-內文】 +1 +Ga neepah baro ka macu tnkuyan na tama mu rudan. + +2 +Mita ba tama na rudan so mmaha neepah ka Watan dige smoora bale. + +【D-SL07-001-2-小小讀書會】 +3 +Pnspuwan ta muuyas + +4 +Kari ga srengo so patis nii ge, diff --git "a/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.xlsx" "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.xlsx" new file mode 100644 index 0000000..12a94f1 Binary files /dev/null and "b/tests/\346\240\274\345\274\217\346\252\242\346\237\245/D-SL07-001\357\275\234\345\220\210\346\210\220\351\237\263\347\250\277Sample.xlsx" differ diff --git a/tox.ini b/tox.ini index 325b32c..6b49d79 100644 --- a/tox.ini +++ b/tox.ini @@ -29,9 +29,19 @@ commands = bash shellcheck.sh [testenv:test] +deps = + -rrequirements.txt + coverage +commands = + python -m unittest {posargs} + +[testenv:integrationTest] deps = -rrequirements.txt coverage commands = coverage run -m unittest {posargs} + coverage run -a patas.py Truku D-TL01-005.xlsx + coverage run -a patas.py Pangcah D-PV01.xlsx + coverage run -a patas.py Seediq D-SL07-001.xlsx coverage xml