From dabc79765be25e39b8d71163b6c8ddd408c8d5cf Mon Sep 17 00:00:00 2001 From: MijinkoSD Date: Mon, 6 May 2024 05:29:08 +0000 Subject: [PATCH] Apply automatic changes --- dist/Tokenizer.d.ts | 39 +++ dist/Tokenizer.js | 154 ++++++++++ dist/Tokenizer.js.map | 1 + dist/TokenizerBuilder.d.ts | 28 ++ dist/TokenizerBuilder.js | 92 ++++++ dist/TokenizerBuilder.js.map | 1 + dist/dict/CharacterClass.d.ts | 18 ++ dist/dict/CharacterClass.js | 67 ++++ dist/dict/CharacterClass.js.map | 1 + dist/dict/CharacterDefinition.d.ts | 42 +++ dist/dict/CharacterDefinition.js | 224 ++++++++++++++ dist/dict/CharacterDefinition.js.map | 1 + dist/dict/ConnectionCosts.d.ts | 17 ++ dist/dict/ConnectionCosts.js | 73 +++++ dist/dict/ConnectionCosts.js.map | 1 + dist/dict/DynamicDictionaries.d.ts | 25 ++ dist/dict/DynamicDictionaries.js | 102 +++++++ dist/dict/DynamicDictionaries.js.map | 1 + dist/dict/InvokeDefinitionMap.d.ts | 41 +++ dist/dict/InvokeDefinitionMap.js | 113 +++++++ dist/dict/InvokeDefinitionMap.js.map | 1 + dist/dict/TokenInfoDictionary.d.ts | 29 ++ dist/dict/TokenInfoDictionary.js | 151 +++++++++ dist/dict/TokenInfoDictionary.js.map | 1 + dist/dict/UnknownDictionary.d.ts | 21 ++ dist/dict/UnknownDictionary.js | 78 +++++ dist/dict/UnknownDictionary.js.map | 1 + .../builder/CharacterDefinitionBuilder.d.ts | 15 + .../builder/CharacterDefinitionBuilder.js | 82 +++++ .../builder/CharacterDefinitionBuilder.js.map | 1 + dist/dict/builder/ConnectionCostsBuilder.d.ts | 13 + dist/dict/builder/ConnectionCostsBuilder.js | 75 +++++ .../builder/ConnectionCostsBuilder.js.map | 1 + dist/dict/builder/CreateDatFiles.d.ts | 1 + dist/dict/builder/CreateDatFiles.js | 93 ++++++ dist/dict/builder/CreateDatFiles.js.map | 1 + dist/dict/builder/DictionaryBuilder.d.ts | 54 ++++ dist/dict/builder/DictionaryBuilder.js | 160 ++++++++++ dist/dict/builder/DictionaryBuilder.js.map | 1 + dist/kuromoji.d.ts | 8 + dist/kuromoji.js | 30 ++ dist/kuromoji.js.map | 1 + dist/loader/BrowserDictionaryLoader.d.ts | 23 ++ dist/loader/BrowserDictionaryLoader.js | 67 ++++ dist/loader/BrowserDictionaryLoader.js.map | 1 + dist/loader/DictionaryLoader.d.ts | 26 ++ dist/loader/DictionaryLoader.js | 186 ++++++++++++ dist/loader/DictionaryLoader.js.map | 1 + dist/loader/NodeDictionaryLoader.d.ts | 22 ++ dist/loader/NodeDictionaryLoader.js | 62 ++++ dist/loader/NodeDictionaryLoader.js.map | 1 + dist/util/ByteBuffer.d.ts | 23 ++ dist/util/ByteBuffer.js | 287 ++++++++++++++++++ dist/util/ByteBuffer.js.map | 1 + dist/util/IpadicFormatter.d.ts | 26 ++ dist/util/IpadicFormatter.js | 62 ++++ dist/util/IpadicFormatter.js.map | 1 + dist/util/PathJoin.d.ts | 1 + dist/util/PathJoin.js | 16 + dist/util/PathJoin.js.map | 1 + dist/util/SurrogateAwareString.d.ts | 17 ++ dist/util/SurrogateAwareString.js | 104 +++++++ dist/util/SurrogateAwareString.js.map | 1 + dist/util/TypeGuard.d.ts | 1 + dist/util/TypeGuard.js | 8 + dist/util/TypeGuard.js.map | 1 + dist/viterbi/ViterbiBuilder.d.ts | 23 ++ dist/viterbi/ViterbiBuilder.js | 121 ++++++++ dist/viterbi/ViterbiBuilder.js.map | 1 + dist/viterbi/ViterbiLattice.d.ts | 20 ++ dist/viterbi/ViterbiLattice.js | 69 +++++ dist/viterbi/ViterbiLattice.js.map | 1 + dist/viterbi/ViterbiNode.d.ts | 27 ++ dist/viterbi/ViterbiNode.js | 110 +++++++ dist/viterbi/ViterbiNode.js.map | 1 + dist/viterbi/ViterbiSearcher.d.ts | 21 ++ dist/viterbi/ViterbiSearcher.js | 97 ++++++ dist/viterbi/ViterbiSearcher.js.map | 1 + 78 files changed, 3290 insertions(+) create mode 100644 dist/Tokenizer.d.ts create mode 100644 dist/Tokenizer.js create mode 100644 dist/Tokenizer.js.map create mode 100644 dist/TokenizerBuilder.d.ts create mode 100644 dist/TokenizerBuilder.js create mode 100644 dist/TokenizerBuilder.js.map create mode 100644 dist/dict/CharacterClass.d.ts create mode 100644 dist/dict/CharacterClass.js create mode 100644 dist/dict/CharacterClass.js.map create mode 100644 dist/dict/CharacterDefinition.d.ts create mode 100644 dist/dict/CharacterDefinition.js create mode 100644 dist/dict/CharacterDefinition.js.map create mode 100644 dist/dict/ConnectionCosts.d.ts create mode 100644 dist/dict/ConnectionCosts.js create mode 100644 dist/dict/ConnectionCosts.js.map create mode 100644 dist/dict/DynamicDictionaries.d.ts create mode 100644 dist/dict/DynamicDictionaries.js create mode 100644 dist/dict/DynamicDictionaries.js.map create mode 100644 dist/dict/InvokeDefinitionMap.d.ts create mode 100644 dist/dict/InvokeDefinitionMap.js create mode 100644 dist/dict/InvokeDefinitionMap.js.map create mode 100644 dist/dict/TokenInfoDictionary.d.ts create mode 100644 dist/dict/TokenInfoDictionary.js create mode 100644 dist/dict/TokenInfoDictionary.js.map create mode 100644 dist/dict/UnknownDictionary.d.ts create mode 100644 dist/dict/UnknownDictionary.js create mode 100644 dist/dict/UnknownDictionary.js.map create mode 100644 dist/dict/builder/CharacterDefinitionBuilder.d.ts create mode 100644 dist/dict/builder/CharacterDefinitionBuilder.js create mode 100644 dist/dict/builder/CharacterDefinitionBuilder.js.map create mode 100644 dist/dict/builder/ConnectionCostsBuilder.d.ts create mode 100644 dist/dict/builder/ConnectionCostsBuilder.js create mode 100644 dist/dict/builder/ConnectionCostsBuilder.js.map create mode 100644 dist/dict/builder/CreateDatFiles.d.ts create mode 100644 dist/dict/builder/CreateDatFiles.js create mode 100644 dist/dict/builder/CreateDatFiles.js.map create mode 100644 dist/dict/builder/DictionaryBuilder.d.ts create mode 100644 dist/dict/builder/DictionaryBuilder.js create mode 100644 dist/dict/builder/DictionaryBuilder.js.map create mode 100644 dist/kuromoji.d.ts create mode 100644 dist/kuromoji.js create mode 100644 dist/kuromoji.js.map create mode 100644 dist/loader/BrowserDictionaryLoader.d.ts create mode 100644 dist/loader/BrowserDictionaryLoader.js create mode 100644 dist/loader/BrowserDictionaryLoader.js.map create mode 100644 dist/loader/DictionaryLoader.d.ts create mode 100644 dist/loader/DictionaryLoader.js create mode 100644 dist/loader/DictionaryLoader.js.map create mode 100644 dist/loader/NodeDictionaryLoader.d.ts create mode 100644 dist/loader/NodeDictionaryLoader.js create mode 100644 dist/loader/NodeDictionaryLoader.js.map create mode 100644 dist/util/ByteBuffer.d.ts create mode 100644 dist/util/ByteBuffer.js create mode 100644 dist/util/ByteBuffer.js.map create mode 100644 dist/util/IpadicFormatter.d.ts create mode 100644 dist/util/IpadicFormatter.js create mode 100644 dist/util/IpadicFormatter.js.map create mode 100644 dist/util/PathJoin.d.ts create mode 100644 dist/util/PathJoin.js create mode 100644 dist/util/PathJoin.js.map create mode 100644 dist/util/SurrogateAwareString.d.ts create mode 100644 dist/util/SurrogateAwareString.js create mode 100644 dist/util/SurrogateAwareString.js.map create mode 100644 dist/util/TypeGuard.d.ts create mode 100644 dist/util/TypeGuard.js create mode 100644 dist/util/TypeGuard.js.map create mode 100644 dist/viterbi/ViterbiBuilder.d.ts create mode 100644 dist/viterbi/ViterbiBuilder.js create mode 100644 dist/viterbi/ViterbiBuilder.js.map create mode 100644 dist/viterbi/ViterbiLattice.d.ts create mode 100644 dist/viterbi/ViterbiLattice.js create mode 100644 dist/viterbi/ViterbiLattice.js.map create mode 100644 dist/viterbi/ViterbiNode.d.ts create mode 100644 dist/viterbi/ViterbiNode.js create mode 100644 dist/viterbi/ViterbiNode.js.map create mode 100644 dist/viterbi/ViterbiSearcher.d.ts create mode 100644 dist/viterbi/ViterbiSearcher.js create mode 100644 dist/viterbi/ViterbiSearcher.js.map diff --git a/dist/Tokenizer.d.ts b/dist/Tokenizer.d.ts new file mode 100644 index 0000000..4f4a59e --- /dev/null +++ b/dist/Tokenizer.d.ts @@ -0,0 +1,39 @@ +import ViterbiBuilder from "./viterbi/ViterbiBuilder.js"; +import ViterbiSearcher from "./viterbi/ViterbiSearcher.js"; +import IpadicFormatter from "./util/IpadicFormatter.js"; +import { IpadicFormatterToken } from "./util/IpadicFormatter.js"; +import DynamicDictionaries from "./dict/DynamicDictionaries.js"; +import ViterbiLattice from "./viterbi/ViterbiLattice.js"; +declare class Tokenizer { + token_info_dictionary: import("./dict/TokenInfoDictionary.js").default; + unknown_dictionary: import("./dict/UnknownDictionary.js").default; + viterbi_builder: ViterbiBuilder; + viterbi_searcher: ViterbiSearcher; + formatter: IpadicFormatter; + /** + * Tokenizer + * @param {DynamicDictionaries} dic Dictionaries used by this tokenizer + * @constructor + */ + constructor(dic: DynamicDictionaries); + /** + * Split into sentence by punctuation + * @param {string} input Input text + * @returns {Array.} Sentences end with punctuation + */ + static splitByPunctuation(input: string): string[]; + /** + * Tokenize text + * @param {string} text Input text to analyze + * @returns {Array} Tokens + */ + tokenize(text: string): IpadicFormatterToken[]; + tokenizeForSentence(sentence: string, tokens?: IpadicFormatterToken[]): IpadicFormatterToken[]; + /** + * Build word lattice + * @param {string} text Input text to analyze + * @returns {ViterbiLattice} Word lattice + */ + getLattice(text: string): ViterbiLattice; +} +export default Tokenizer; diff --git a/dist/Tokenizer.js b/dist/Tokenizer.js new file mode 100644 index 0000000..515175c --- /dev/null +++ b/dist/Tokenizer.js @@ -0,0 +1,154 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import ViterbiBuilder from "./viterbi/ViterbiBuilder.js"; +import ViterbiSearcher from "./viterbi/ViterbiSearcher.js"; +import IpadicFormatter from "./util/IpadicFormatter.js"; +/** + * 読点と句読点。 + */ +const PUNCTUATION = /、|。/; +class Tokenizer { + /** + * Tokenizer + * @param {DynamicDictionaries} dic Dictionaries used by this tokenizer + * @constructor + */ + constructor(dic) { + Object.defineProperty(this, "token_info_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "unknown_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "viterbi_builder", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "viterbi_searcher", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "formatter", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.token_info_dictionary = dic.token_info_dictionary; + this.unknown_dictionary = dic.unknown_dictionary; + this.viterbi_builder = new ViterbiBuilder(dic); + this.viterbi_searcher = new ViterbiSearcher(dic.connection_costs); + this.formatter = new IpadicFormatter(); // TODO Other dictionaries + } + /** + * Split into sentence by punctuation + * @param {string} input Input text + * @returns {Array.} Sentences end with punctuation + */ + static splitByPunctuation(input) { + const sentences = []; + let tail = input; + while (true) { + if (tail === "") { + break; + } + const index = tail.search(PUNCTUATION); + if (index < 0) { + sentences.push(tail); + break; + } + sentences.push(tail.substring(0, index + 1)); + tail = tail.substring(index + 1); + } + return sentences; + } + /** + * Tokenize text + * @param {string} text Input text to analyze + * @returns {Array} Tokens + */ + tokenize(text) { + const sentences = Tokenizer.splitByPunctuation(text); + const tokens = []; + for (const sentence of sentences) { + tokens.push(...this.tokenizeForSentence(sentence, tokens)); + } + return tokens; + } + tokenizeForSentence(sentence, tokens) { + if (tokens === undefined) { + tokens = []; + } + const lattice = this.getLattice(sentence); + const best_path = this.viterbi_searcher.search(lattice); + let last_pos = 0; + if (tokens.length > 0) { + last_pos = tokens[tokens.length - 1].word_position; + } + const result = []; + for (const node of best_path) { + let token, features, features_line; + if (node.type === "KNOWN") { + features_line = this.token_info_dictionary.getFeatures(node.name.toString()); + if (features_line == null) { + features = []; + } + else { + features = features_line.split(","); + } + token = this.formatter.formatEntry(node.name, last_pos + node.start_pos, node.type, features); + } + else if (node.type === "UNKNOWN") { + // Unknown word + features_line = this.unknown_dictionary.getFeatures(node.name.toString()); + if (features_line == null) { + features = []; + } + else { + features = features_line.split(","); + } + token = this.formatter.formatUnknownEntry(node.name, last_pos + node.start_pos, node.type, features, node.surface_form); + } + else { + // TODO User dictionary + token = this.formatter.formatEntry(node.name, last_pos + node.start_pos, node.type, []); + } + result.push(token); + } + return result; + } + /** + * Build word lattice + * @param {string} text Input text to analyze + * @returns {ViterbiLattice} Word lattice + */ + getLattice(text) { + return this.viterbi_builder.build(text); + } +} +export default Tokenizer; +//# sourceMappingURL=Tokenizer.js.map \ No newline at end of file diff --git a/dist/Tokenizer.js.map b/dist/Tokenizer.js.map new file mode 100644 index 0000000..1e794c8 --- /dev/null +++ b/dist/Tokenizer.js.map @@ -0,0 +1 @@ +{"version":3,"file":"Tokenizer.js","sourceRoot":"","sources":["../src/Tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,cAAc,MAAM,6BAA6B,CAAC;AACzD,OAAO,eAAe,MAAM,8BAA8B,CAAC;AAC3D,OAAO,eAAe,MAAM,2BAA2B,CAAC;AAKxD;;GAEG;AACH,MAAM,WAAW,GAAG,KAAK,CAAC;AAE1B,MAAM,SAAS;IAOb;;;;OAIG;IACH,YAAY,GAAwB;QAXpC;;;;;WAAsB;QACtB;;;;;WAAmB;QACnB;;;;;WAAgB;QAChB;;;;;WAAiB;QACjB;;;;;WAAU;QAQR,IAAI,CAAC,qBAAqB,GAAG,GAAG,CAAC,qBAAqB,CAAC;QACvD,IAAI,CAAC,kBAAkB,GAAG,GAAG,CAAC,kBAAkB,CAAC;QACjD,IAAI,CAAC,eAAe,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC;QAC/C,IAAI,CAAC,gBAAgB,GAAG,IAAI,eAAe,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAClE,IAAI,CAAC,SAAS,GAAG,IAAI,eAAe,EAAE,CAAC,CAAC,0BAA0B;IACpE,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,kBAAkB,CAAC,KAAa;QACrC,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,IAAI,IAAI,GAAG,KAAK,CAAC;QACjB,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,IAAI,KAAK,EAAE,EAAE,CAAC;gBAChB,MAAM;YACR,CAAC;YACD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YACvC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,MAAM;YACR,CAAC;YACD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7C,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACnC,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;OAIG;IACH,QAAQ,CAAC,IAAY;QACnB,MAAM,SAAS,GAAG,SAAS,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACrD,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,mBAAmB,CACjB,QAAgB,EAChB,MAA+B;QAE/B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,GAAG,EAAE,CAAC;QACd,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACxD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC;QACrD,CAAC;QAED,MAAM,MAAM,GAA2B,EAAE,CAAC;QAE1C,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,IAAI,KAA2B,EAC7B,QAAkB,EAClB,aAAqB,CAAC;YACxB,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBAC1B,aAAa,GAAG,IAAI,CAAC,qBAAqB,CAAC,WAAW,CACpD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CACrB,CAAC;gBACF,IAAI,aAAa,IAAI,IAAI,EAAE,CAAC;oBAC1B,QAAQ,GAAG,EAAE,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC;gBACD,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAChC,IAAI,CAAC,IAAI,EACT,QAAQ,GAAG,IAAI,CAAC,SAAS,EACzB,IAAI,CAAC,IAAI,EACT,QAAQ,CACT,CAAC;YACJ,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBACnC,eAAe;gBACf,aAAa,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CACjD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CACrB,CAAC;gBACF,IAAI,aAAa,IAAI,IAAI,EAAE,CAAC;oBAC1B,QAAQ,GAAG,EAAE,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC;gBACD,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,CACvC,IAAI,CAAC,IAAI,EACT,QAAQ,GAAG,IAAI,CAAC,SAAS,EACzB,IAAI,CAAC,IAAI,EACT,QAAQ,EACR,IAAI,CAAC,YAAY,CAClB,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,uBAAuB;gBACvB,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAChC,IAAI,CAAC,IAAI,EACT,QAAQ,GAAG,IAAI,CAAC,SAAS,EACzB,IAAI,CAAC,IAAI,EACT,EAAE,CACH,CAAC;YACJ,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACH,UAAU,CAAC,IAAY;QACrB,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,eAAe,SAAS,CAAC"} \ No newline at end of file diff --git a/dist/TokenizerBuilder.d.ts b/dist/TokenizerBuilder.d.ts new file mode 100644 index 0000000..17c83b6 --- /dev/null +++ b/dist/TokenizerBuilder.d.ts @@ -0,0 +1,28 @@ +import Tokenizer from "./Tokenizer.js"; +export interface TokenizerBuilderOption { + dicPath?: string; +} +/** + * Callback used by build + * @callback TokenizerBuilder~onLoad + * @param {Object} err Error object + * @param {Tokenizer} tokenizer Prepared Tokenizer + */ +export type TokenizerBuilderOnLoad = (err: (Error | null)[], tokenizer?: Tokenizer) => void; +declare class TokenizerBuilder { + dic_path: string; + /** + * TokenizerBuilder create Tokenizer instance. + * @param {Object} option JSON object which have key-value pairs settings + * @param {string} option.dicPath Dictionary directory path (or URL using in browser) + * @constructor + */ + constructor(option?: TokenizerBuilderOption); + /** + * Build Tokenizer instance by asynchronous manner + * @param {TokenizerBuilder~onLoad} callback Callback function + */ + build(callback: TokenizerBuilderOnLoad): Promise; + buildBrowser(callback: TokenizerBuilderOnLoad): Promise; +} +export default TokenizerBuilder; diff --git a/dist/TokenizerBuilder.js b/dist/TokenizerBuilder.js new file mode 100644 index 0000000..9acda4a --- /dev/null +++ b/dist/TokenizerBuilder.js @@ -0,0 +1,92 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import Tokenizer from "./Tokenizer.js"; +import BrowserDictionaryLoader from "./loader/BrowserDictionaryLoader.js"; +import NodeDictionaryLoader from "./loader/NodeDictionaryLoader.js"; +class TokenizerBuilder { + /** + * TokenizerBuilder create Tokenizer instance. + * @param {Object} option JSON object which have key-value pairs settings + * @param {string} option.dicPath Dictionary directory path (or URL using in browser) + * @constructor + */ + constructor(option = {}) { + Object.defineProperty(this, "dic_path", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + if (option.dicPath == null) { + this.dic_path = "dict/"; + } + else { + this.dic_path = option.dicPath; + } + } + /** + * Build Tokenizer instance by asynchronous manner + * @param {TokenizerBuilder~onLoad} callback Callback function + */ + async build(callback) { + const loader = new NodeDictionaryLoader(this.dic_path); + await loader.load((err, dic) => { + callback(toErrorArray(err), new Tokenizer(dic)); + }); + } + async buildBrowser(callback) { + const loader = new BrowserDictionaryLoader(this.dic_path); + await loader.load((err, dic) => { + callback(toErrorArray(err), new Tokenizer(dic)); + }); + } +} +/** + * To convert an object to an array of error objects. + * オブジェクトをエラーオブジェクト配列に変換します。 + * + * I said "To convert", but it works well only from error object or string type. + * 変換するとは言ってもエラーオブジェクトか文字列しかうまく変換できないけれど。 + * + * @param obj オブジェクト + * @returns + */ +const toErrorArray = (obj) => { + let _obj; + if (Array.isArray(obj)) { + _obj = obj; + } + else { + _obj = [obj]; + } + const result = []; + for (const o of _obj) { + if (o instanceof Error || o === null) { + result.push(o); + } + else if (typeof o === "string") { + result.push(new Error(o)); + } + else { + result.push(new Error("unknown error object recieved.")); + } + } + return result; +}; +export default TokenizerBuilder; +//# sourceMappingURL=TokenizerBuilder.js.map \ No newline at end of file diff --git a/dist/TokenizerBuilder.js.map b/dist/TokenizerBuilder.js.map new file mode 100644 index 0000000..7ad7789 --- /dev/null +++ b/dist/TokenizerBuilder.js.map @@ -0,0 +1 @@ +{"version":3,"file":"TokenizerBuilder.js","sourceRoot":"","sources":["../src/TokenizerBuilder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,SAAS,MAAM,gBAAgB,CAAC;AACvC,OAAO,uBAAuB,MAAM,qCAAqC,CAAC;AAC1E,OAAO,oBAAoB,MAAM,kCAAkC,CAAC;AAiBpE,MAAM,gBAAgB;IAGpB;;;;;OAKG;IACH,YAAY,SAAiC,EAAE;QAR/C;;;;;WAAiB;QASf,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC;QACjC,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,KAAK,CAAC,QAAgC;QAC1C,MAAM,MAAM,GAAG,IAAI,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvD,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YAC7B,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,IAAI,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,QAAgC;QACjD,MAAM,MAAM,GAAG,IAAI,uBAAuB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YAC7B,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,IAAI,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED;;;;;;;;;GASG;AACH,MAAM,YAAY,GAAG,CAAC,GAAkB,EAAoB,EAAE;IAC5D,IAAI,IAAuB,CAAC;IAC5B,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IACD,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,CAAC,YAAY,KAAK,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;aAAM,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,eAAe,gBAAgB,CAAC"} \ No newline at end of file diff --git a/dist/dict/CharacterClass.d.ts b/dist/dict/CharacterClass.d.ts new file mode 100644 index 0000000..13c0999 --- /dev/null +++ b/dist/dict/CharacterClass.d.ts @@ -0,0 +1,18 @@ +declare class CharacterClass { + class_id: number; + class_name: string; + is_always_invoke: boolean | number; + is_grouping: boolean | number; + max_length: number; + /** + * CharacterClass + * @param {number} class_id + * @param {string} class_name + * @param {boolean} is_always_invoke + * @param {boolean} is_grouping + * @param {number} max_length + * @constructor + */ + constructor(class_id: number, class_name: string, is_always_invoke: boolean | number, is_grouping: boolean | number, max_length: number); +} +export default CharacterClass; diff --git a/dist/dict/CharacterClass.js b/dist/dict/CharacterClass.js new file mode 100644 index 0000000..99260f3 --- /dev/null +++ b/dist/dict/CharacterClass.js @@ -0,0 +1,67 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +class CharacterClass { + /** + * CharacterClass + * @param {number} class_id + * @param {string} class_name + * @param {boolean} is_always_invoke + * @param {boolean} is_grouping + * @param {number} max_length + * @constructor + */ + constructor(class_id, class_name, is_always_invoke, is_grouping, max_length) { + Object.defineProperty(this, "class_id", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "class_name", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "is_always_invoke", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "is_grouping", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "max_length", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.class_id = class_id; + this.class_name = class_name; + this.is_always_invoke = is_always_invoke; + this.is_grouping = is_grouping; + this.max_length = max_length; + } +} +export default CharacterClass; +//# sourceMappingURL=CharacterClass.js.map \ No newline at end of file diff --git a/dist/dict/CharacterClass.js.map b/dist/dict/CharacterClass.js.map new file mode 100644 index 0000000..455a089 --- /dev/null +++ b/dist/dict/CharacterClass.js.map @@ -0,0 +1 @@ +{"version":3,"file":"CharacterClass.js","sourceRoot":"","sources":["../../src/dict/CharacterClass.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,MAAM,cAAc;IAOlB;;;;;;;;OAQG;IACH,YACE,QAAgB,EAChB,UAAkB,EAClB,gBAAkC,EAClC,WAA6B,EAC7B,UAAkB;QApBpB;;;;;WAAiB;QACjB;;;;;WAAmB;QACnB;;;;;WAAmC;QACnC;;;;;WAA8B;QAC9B;;;;;WAAmB;QAkBjB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;QACzC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;CACF;AAED,eAAe,cAAc,CAAC"} \ No newline at end of file diff --git a/dist/dict/CharacterDefinition.d.ts b/dist/dict/CharacterDefinition.d.ts new file mode 100644 index 0000000..b397f8a --- /dev/null +++ b/dist/dict/CharacterDefinition.d.ts @@ -0,0 +1,42 @@ +import InvokeDefinitionMap from "./InvokeDefinitionMap.js"; +import CharacterClass from "./CharacterClass.js"; +declare class CharacterDefinition { + character_category_map: Uint8Array; + compatible_category_map: Uint32Array; + invoke_definition_map: null | InvokeDefinitionMap; + /** + * CharacterDefinition represents char.def file and + * defines behavior of unknown word processing + * @constructor + */ + constructor(); + /** + * Load CharacterDefinition + * @param {Uint8Array} cat_map_buffer + * @param {Uint32Array} compat_cat_map_buffer + * @param {InvokeDefinitionMap} invoke_def_buffer + * @returns {CharacterDefinition} + */ + static load(cat_map_buffer: Uint8Array, compat_cat_map_buffer: Uint32Array, invoke_def_buffer: Uint8Array): CharacterDefinition; + static parseCharCategory(class_id: number, parsed_category_def: string[]): CharacterClass | null; + static parseCategoryMapping(parsed_category_mapping: string[]): Omit; + static parseRangeCategoryMapping(parsed_category_mapping: string[]): CategoryMapping; + /** + * Initializing method + * @param {Array} category_mapping Array of category mapping + */ + initCategoryMappings(category_mapping?: CategoryMapping[]): void; + /** + * Lookup compatible categories for a character (not included 1st category) + * @param {string} ch UCS2 character (just 1st character is effective) + * @returns {Array.} character classes + */ + lookupCompatibleCategory(ch: string): CharacterClass[]; + /** + * Lookup category for a character + * @param {string} ch UCS2 character (just 1st character is effective) + * @returns {CharacterClass} character class + */ + lookup(ch: string): CharacterClass | undefined; +} +export default CharacterDefinition; diff --git a/dist/dict/CharacterDefinition.js b/dist/dict/CharacterDefinition.js new file mode 100644 index 0000000..0588147 --- /dev/null +++ b/dist/dict/CharacterDefinition.js @@ -0,0 +1,224 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import InvokeDefinitionMap from "./InvokeDefinitionMap.js"; +import CharacterClass from "./CharacterClass.js"; +import SurrogateAwareString from "../util/SurrogateAwareString.js"; +const DEFAULT_CATEGORY = "DEFAULT"; +class CharacterDefinition { + /** + * CharacterDefinition represents char.def file and + * defines behavior of unknown word processing + * @constructor + */ + constructor() { + Object.defineProperty(this, "character_category_map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "compatible_category_map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "invoke_definition_map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.character_category_map = new Uint8Array(65536); // for all UCS2 code points + this.compatible_category_map = new Uint32Array(65536); // for all UCS2 code points + this.invoke_definition_map = null; + } + /** + * Load CharacterDefinition + * @param {Uint8Array} cat_map_buffer + * @param {Uint32Array} compat_cat_map_buffer + * @param {InvokeDefinitionMap} invoke_def_buffer + * @returns {CharacterDefinition} + */ + static load(cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer) { + var char_def = new CharacterDefinition(); + char_def.character_category_map = cat_map_buffer; + char_def.compatible_category_map = compat_cat_map_buffer; + char_def.invoke_definition_map = + InvokeDefinitionMap.load(invoke_def_buffer); + return char_def; + } + static parseCharCategory(class_id, parsed_category_def) { + var category = parsed_category_def[1]; + var invoke = parseInt(parsed_category_def[2]); + var grouping = parseInt(parsed_category_def[3]); + var max_length = parseInt(parsed_category_def[4]); + if (!isFinite(invoke) || (invoke !== 0 && invoke !== 1)) { + console.log("char.def parse error. INVOKE is 0 or 1 in:" + invoke); + return null; + } + if (!isFinite(grouping) || (grouping !== 0 && grouping !== 1)) { + console.log("char.def parse error. GROUP is 0 or 1 in:" + grouping); + return null; + } + if (!isFinite(max_length) || max_length < 0) { + console.log("char.def parse error. LENGTH is 1 to n:" + max_length); + return null; + } + var is_invoke = invoke === 1; + var is_grouping = grouping === 1; + return new CharacterClass(class_id, category, is_invoke, is_grouping, max_length); + } + static parseCategoryMapping(parsed_category_mapping) { + var start = parseInt(parsed_category_mapping[1]); + var default_category = parsed_category_mapping[2]; + var compatible_category = 3 < parsed_category_mapping.length + ? parsed_category_mapping.slice(3) + : []; + if (!isFinite(start) || start < 0 || start > 0xffff) { + console.log("char.def parse error. CODE is invalid:" + start); + } + return { + start: start, + default: default_category, + compatible: compatible_category, + }; + } + static parseRangeCategoryMapping(parsed_category_mapping) { + var start = parseInt(parsed_category_mapping[1]); + var end = parseInt(parsed_category_mapping[2]); + var default_category = parsed_category_mapping[3]; + var compatible_category = 4 < parsed_category_mapping.length + ? parsed_category_mapping.slice(4) + : []; + if (!isFinite(start) || start < 0 || start > 0xffff) { + console.log("char.def parse error. CODE is invalid:" + start); + } + if (!isFinite(end) || end < 0 || end > 0xffff) { + console.log("char.def parse error. CODE is invalid:" + end); + } + return { + start: start, + end: end, + default: default_category, + compatible: compatible_category, + }; + } + /** + * Initializing method + * @param {Array} category_mapping Array of category mapping + */ + initCategoryMappings(category_mapping) { + // Initialize map by DEFAULT class + var code_point; + if (category_mapping != null) { + for (var i = 0; i < category_mapping.length; i++) { + var mapping = category_mapping[i]; + var end = mapping.end ?? mapping.start; + for (code_point = mapping.start; code_point <= end; code_point++) { + // Default Category class ID + this.character_category_map[code_point] = + this.invoke_definition_map?.lookup(mapping.default) ?? 0; + for (var j = 0; j < mapping.compatible.length; j++) { + var bitset = this.compatible_category_map[code_point]; + var compatible_category = mapping.compatible[j]; + if (compatible_category == null) { + continue; + } + var class_id = this.invoke_definition_map?.lookup(compatible_category); // Default Category + if (class_id == null) { + continue; + } + var class_id_bit = 1 << class_id; + bitset = bitset | class_id_bit; // Set a bit of class ID 例えば、class_idが3のとき、3ビット目に1を立てる + this.compatible_category_map[code_point] = bitset; + } + } + } + } + var default_id = this.invoke_definition_map?.lookup(DEFAULT_CATEGORY); + if (default_id == null) { + return; + } + for (code_point = 0; code_point < this.character_category_map.length; code_point++) { + // 他に何のクラスも定義されていなかったときだけ DEFAULT + if (this.character_category_map[code_point] === 0) { + // DEFAULT class ID に対応するビットだけ1を立てる + this.character_category_map[code_point] = 1 << default_id; + } + } + } + /** + * Lookup compatible categories for a character (not included 1st category) + * @param {string} ch UCS2 character (just 1st character is effective) + * @returns {Array.} character classes + */ + lookupCompatibleCategory(ch) { + var classes = []; + /* + if (SurrogateAwareString.isSurrogatePair(ch)) { + // Surrogate pair character codes can not be defined by char.def + return classes; + }*/ + var code = ch.charCodeAt(0); + var integer; + if (code < this.compatible_category_map.length) { + integer = this.compatible_category_map[code]; // Bitset + } + if (integer == null || integer === 0) { + return classes; + } + for (var bit = 0; bit < 32; bit++) { + // Treat "bit" as a class ID + if ((integer << (31 - bit)) >>> 31 === 1) { + var character_class = this.invoke_definition_map?.getCharacterClass(bit); + if (character_class == null) { + continue; + } + classes.push(character_class); + } + } + return classes; + } + /** + * Lookup category for a character + * @param {string} ch UCS2 character (just 1st character is effective) + * @returns {CharacterClass} character class + */ + lookup(ch) { + let class_id = null; + if (this.invoke_definition_map === null) + return; + var code = ch.charCodeAt(0); + if (SurrogateAwareString.isSurrogatePair(ch)) { + // Surrogate pair character codes can not be defined by char.def, so set DEFAULT category + class_id = this.invoke_definition_map.lookup(DEFAULT_CATEGORY); + } + else if (code < this.character_category_map.length) { + class_id = this.character_category_map[code]; // Read as integer value + } + if (class_id == null) { + class_id = this.invoke_definition_map.lookup(DEFAULT_CATEGORY); + } + if (class_id === null) + return; + return this.invoke_definition_map.getCharacterClass(class_id); + } +} +export default CharacterDefinition; +//# sourceMappingURL=CharacterDefinition.js.map \ No newline at end of file diff --git a/dist/dict/CharacterDefinition.js.map b/dist/dict/CharacterDefinition.js.map new file mode 100644 index 0000000..7b6ad85 --- /dev/null +++ b/dist/dict/CharacterDefinition.js.map @@ -0,0 +1 @@ +{"version":3,"file":"CharacterDefinition.js","sourceRoot":"","sources":["../../src/dict/CharacterDefinition.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,cAAc,MAAM,qBAAqB,CAAC;AACjD,OAAO,oBAAoB,MAAM,iCAAiC,CAAC;AAEnE,MAAM,gBAAgB,GAAG,SAAS,CAAC;AAEnC,MAAM,mBAAmB;IAKvB;;;;OAIG;IACH;QATA;;;;;WAAmC;QACnC;;;;;WAAqC;QACrC;;;;;WAAkD;QAQhD,IAAI,CAAC,sBAAsB,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,2BAA2B;QAChF,IAAI,CAAC,uBAAuB,GAAG,IAAI,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,2BAA2B;QAClF,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC;IACpC,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,CACT,cAA0B,EAC1B,qBAAkC,EAClC,iBAA6B;QAE7B,IAAI,QAAQ,GAAG,IAAI,mBAAmB,EAAE,CAAC;QACzC,QAAQ,CAAC,sBAAsB,GAAG,cAAc,CAAC;QACjD,QAAQ,CAAC,uBAAuB,GAAG,qBAAqB,CAAC;QACzD,QAAQ,CAAC,qBAAqB;YAC5B,mBAAmB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAC9C,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,CAAC,iBAAiB,CACtB,QAAgB,EAChB,mBAA6B;QAE7B,IAAI,QAAQ,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;QACtC,IAAI,MAAM,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,QAAQ,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,UAAU,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YACxD,OAAO,CAAC,GAAG,CAAC,4CAA4C,GAAG,MAAM,CAAC,CAAC;YACnE,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,KAAK,CAAC,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;YAC9D,OAAO,CAAC,GAAG,CAAC,2CAA2C,GAAG,QAAQ,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,yCAAyC,GAAG,UAAU,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,SAAS,GAAG,MAAM,KAAK,CAAC,CAAC;QAC7B,IAAI,WAAW,GAAG,QAAQ,KAAK,CAAC,CAAC;QAEjC,OAAO,IAAI,cAAc,CACvB,QAAQ,EACR,QAAQ,EACR,SAAS,EACT,WAAW,EACX,UAAU,CACX,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,oBAAoB,CACzB,uBAAiC;QAEjC,IAAI,KAAK,GAAG,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,IAAI,gBAAgB,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QAClD,IAAI,mBAAmB,GACrB,CAAC,GAAG,uBAAuB,CAAC,MAAM;YAChC,CAAC,CAAC,uBAAuB,CAAC,KAAK,CAAC,CAAC,CAAC;YAClC,CAAC,CAAC,EAAE,CAAC;QACT,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;YACpD,OAAO,CAAC,GAAG,CAAC,wCAAwC,GAAG,KAAK,CAAC,CAAC;QAChE,CAAC;QACD,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,OAAO,EAAE,gBAAgB;YACzB,UAAU,EAAE,mBAAmB;SAChC,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,yBAAyB,CAC9B,uBAAiC;QAEjC,IAAI,KAAK,GAAG,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,IAAI,GAAG,GAAG,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/C,IAAI,gBAAgB,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QAClD,IAAI,mBAAmB,GACrB,CAAC,GAAG,uBAAuB,CAAC,MAAM;YAChC,CAAC,CAAC,uBAAuB,CAAC,KAAK,CAAC,CAAC,CAAC;YAClC,CAAC,CAAC,EAAE,CAAC;QACT,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;YACpD,OAAO,CAAC,GAAG,CAAC,wCAAwC,GAAG,KAAK,CAAC,CAAC;QAChE,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,MAAM,EAAE,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,wCAAwC,GAAG,GAAG,CAAC,CAAC;QAC9D,CAAC;QACD,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,GAAG,EAAE,GAAG;YACR,OAAO,EAAE,gBAAgB;YACzB,UAAU,EAAE,mBAAmB;SAChC,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,oBAAoB,CAAC,gBAAoC;QACvD,kCAAkC;QAClC,IAAI,UAAU,CAAC;QACf,IAAI,gBAAgB,IAAI,IAAI,EAAE,CAAC;YAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjD,IAAI,OAAO,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;gBAClC,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,KAAK,CAAC;gBACvC,KAAK,UAAU,GAAG,OAAO,CAAC,KAAK,EAAE,UAAU,IAAI,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC;oBACjE,4BAA4B;oBAC5B,IAAI,CAAC,sBAAsB,CAAC,UAAU,CAAC;wBACrC,IAAI,CAAC,qBAAqB,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBACnD,IAAI,MAAM,GAAG,IAAI,CAAC,uBAAuB,CAAC,UAAU,CAAC,CAAC;wBACtD,IAAI,mBAAmB,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;wBAChD,IAAI,mBAAmB,IAAI,IAAI,EAAE,CAAC;4BAChC,SAAS;wBACX,CAAC;wBACD,IAAI,QAAQ,GACV,IAAI,CAAC,qBAAqB,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC,mBAAmB;wBAC9E,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;4BACrB,SAAS;wBACX,CAAC;wBACD,IAAI,YAAY,GAAG,CAAC,IAAI,QAAQ,CAAC;wBACjC,MAAM,GAAG,MAAM,GAAG,YAAY,CAAC,CAAC,sDAAsD;wBACtF,IAAI,CAAC,uBAAuB,CAAC,UAAU,CAAC,GAAG,MAAM,CAAC;oBACpD,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QACD,IAAI,UAAU,GAAG,IAAI,CAAC,qBAAqB,EAAE,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACtE,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;YACvB,OAAO;QACT,CAAC;QACD,KACE,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAC/C,UAAU,EAAE,EACZ,CAAC;YACD,iCAAiC;YACjC,IAAI,IAAI,CAAC,sBAAsB,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;gBAClD,mCAAmC;gBACnC,IAAI,CAAC,sBAAsB,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC;YAC5D,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,wBAAwB,CAAC,EAAU;QACjC,IAAI,OAAO,GAAqB,EAAE,CAAC;QAEnC;;;;YAII;QACJ,IAAI,IAAI,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,OAAO,CAAC;QACZ,IAAI,IAAI,GAAG,IAAI,CAAC,uBAAuB,CAAC,MAAM,EAAE,CAAC;YAC/C,OAAO,GAAG,IAAI,CAAC,uBAAuB,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;QACzD,CAAC;QAED,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;YACrC,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,EAAE,EAAE,GAAG,EAAE,EAAE,CAAC;YAClC,4BAA4B;YAC5B,IAAI,CAAC,OAAO,IAAI,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,EAAE,CAAC;gBACzC,IAAI,eAAe,GACjB,IAAI,CAAC,qBAAqB,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC;gBACrD,IAAI,eAAe,IAAI,IAAI,EAAE,CAAC;oBAC5B,SAAS;gBACX,CAAC;gBACD,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,EAAU;QACf,IAAI,QAAQ,GAAkB,IAAI,CAAC;QACnC,IAAI,IAAI,CAAC,qBAAqB,KAAK,IAAI;YAAE,OAAO;QAEhD,IAAI,IAAI,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,oBAAoB,CAAC,eAAe,CAAC,EAAE,CAAC,EAAE,CAAC;YAC7C,yFAAyF;YACzF,QAAQ,GAAG,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACjE,CAAC;aAAM,IAAI,IAAI,GAAG,IAAI,CAAC,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACrD,QAAQ,GAAG,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC,CAAC,wBAAwB;QACxE,CAAC;QAED,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YACrB,QAAQ,GAAG,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;QACjE,CAAC;QAED,IAAI,QAAQ,KAAK,IAAI;YAAE,OAAO;QAE9B,OAAO,IAAI,CAAC,qBAAqB,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAChE,CAAC;CACF;AAED,eAAe,mBAAmB,CAAC"} \ No newline at end of file diff --git a/dist/dict/ConnectionCosts.d.ts b/dist/dict/ConnectionCosts.d.ts new file mode 100644 index 0000000..452addb --- /dev/null +++ b/dist/dict/ConnectionCosts.d.ts @@ -0,0 +1,17 @@ +declare class ConnectionCosts { + forward_dimension: number; + backward_dimension: number; + buffer: Int16Array; + /** + * Connection costs matrix from cc.dat file. + * 2 dimension matrix [forward_id][backward_id] -> cost + * @constructor + * @param {number} forward_dimension + * @param {number} backward_dimension + */ + constructor(forward_dimension: number, backward_dimension: number); + put(forward_id: number, backward_id: number, cost: number): void; + get(forward_id: number, backward_id: number): number; + loadConnectionCosts(connection_costs_buffer: Int16Array): void; +} +export default ConnectionCosts; diff --git a/dist/dict/ConnectionCosts.js b/dist/dict/ConnectionCosts.js new file mode 100644 index 0000000..2d60bdc --- /dev/null +++ b/dist/dict/ConnectionCosts.js @@ -0,0 +1,73 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +class ConnectionCosts { + /** + * Connection costs matrix from cc.dat file. + * 2 dimension matrix [forward_id][backward_id] -> cost + * @constructor + * @param {number} forward_dimension + * @param {number} backward_dimension + */ + constructor(forward_dimension, backward_dimension) { + Object.defineProperty(this, "forward_dimension", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "backward_dimension", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "buffer", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.forward_dimension = forward_dimension; + this.backward_dimension = backward_dimension; + // leading 2 integers for forward_dimension, backward_dimension, respectively + this.buffer = new Int16Array(forward_dimension * backward_dimension + 2); + this.buffer[0] = forward_dimension; + this.buffer[1] = backward_dimension; + } + put(forward_id, backward_id, cost) { + const index = forward_id * this.backward_dimension + backward_id + 2; + if (this.buffer.length < index + 1) { + throw "ConnectionCosts buffer overflow"; + } + this.buffer[index] = cost; + } + get(forward_id, backward_id) { + const index = forward_id * this.backward_dimension + backward_id + 2; + if (this.buffer.length < index + 1) { + throw "ConnectionCosts buffer overflow"; + } + return this.buffer[index]; + } + loadConnectionCosts(connection_costs_buffer) { + this.forward_dimension = connection_costs_buffer[0]; + this.backward_dimension = connection_costs_buffer[1]; + this.buffer = connection_costs_buffer; + } +} +export default ConnectionCosts; +//# sourceMappingURL=ConnectionCosts.js.map \ No newline at end of file diff --git a/dist/dict/ConnectionCosts.js.map b/dist/dict/ConnectionCosts.js.map new file mode 100644 index 0000000..7b5f6db --- /dev/null +++ b/dist/dict/ConnectionCosts.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ConnectionCosts.js","sourceRoot":"","sources":["../../src/dict/ConnectionCosts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,MAAM,eAAe;IAKnB;;;;;;OAMG;IACH,YAAY,iBAAyB,EAAE,kBAA0B;QAXjE;;;;;WAA0B;QAC1B;;;;;WAA2B;QAC3B;;;;;WAAmB;QAUjB,IAAI,CAAC,iBAAiB,GAAG,iBAAiB,CAAC;QAC3C,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAE7C,6EAA6E;QAC7E,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,iBAAiB,GAAG,kBAAkB,GAAG,CAAC,CAAC,CAAC;QACzE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC;IACtC,CAAC;IAED,GAAG,CAAC,UAAkB,EAAE,WAAmB,EAAE,IAAY;QACvD,MAAM,KAAK,GAAG,UAAU,GAAG,IAAI,CAAC,kBAAkB,GAAG,WAAW,GAAG,CAAC,CAAC;QACrE,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,iCAAiC,CAAC;QAC1C,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;IAC5B,CAAC;IAED,GAAG,CAAC,UAAkB,EAAE,WAAmB;QACzC,MAAM,KAAK,GAAG,UAAU,GAAG,IAAI,CAAC,kBAAkB,GAAG,WAAW,GAAG,CAAC,CAAC;QACrE,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,iCAAiC,CAAC;QAC1C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED,mBAAmB,CAAC,uBAAmC;QACrD,IAAI,CAAC,iBAAiB,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QACpD,IAAI,CAAC,kBAAkB,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,uBAAuB,CAAC;IACxC,CAAC;CACF;AAED,eAAe,eAAe,CAAC"} \ No newline at end of file diff --git a/dist/dict/DynamicDictionaries.d.ts b/dist/dict/DynamicDictionaries.d.ts new file mode 100644 index 0000000..dcf1d32 --- /dev/null +++ b/dist/dict/DynamicDictionaries.d.ts @@ -0,0 +1,25 @@ +import TokenInfoDictionary from "./TokenInfoDictionary.js"; +import ConnectionCosts from "./ConnectionCosts.js"; +import UnknownDictionary from "./UnknownDictionary.js"; +import type DoubleArray from "doublearray.ts/dist/doubleArrayClass.js"; +import { ArrayBuffer } from "doublearray.ts/dist/types.js"; +declare class DynamicDictionaries { + trie: DoubleArray; + token_info_dictionary: TokenInfoDictionary; + connection_costs: ConnectionCosts; + unknown_dictionary: UnknownDictionary; + /** + * Dictionaries container for Tokenizer + * @param {DoubleArray} trie + * @param {TokenInfoDictionary} token_info_dictionary + * @param {ConnectionCosts} connection_costs + * @param {UnknownDictionary} unknown_dictionary + * @constructor + */ + constructor(trie?: DoubleArray | null, token_info_dictionary?: TokenInfoDictionary | null, connection_costs?: ConnectionCosts | null, unknown_dictionary?: UnknownDictionary | null); + loadTrie(base_buffer: ArrayBuffer, check_buffer: ArrayBuffer): this; + loadTokenInfoDictionaries(token_info_buffer: Uint8Array, pos_buffer: Uint8Array, target_map_buffer: Uint8Array): this; + loadConnectionCosts(cc_buffer: Int16Array): this; + loadUnknownDictionaries(unk_buffer: Uint8Array, unk_pos_buffer: Uint8Array, unk_map_buffer: Uint8Array, cat_map_buffer: Uint8Array, compat_cat_map_buffer: Uint32Array, invoke_def_buffer: Uint8Array): this; +} +export default DynamicDictionaries; diff --git a/dist/dict/DynamicDictionaries.js b/dist/dict/DynamicDictionaries.js new file mode 100644 index 0000000..e3c51a0 --- /dev/null +++ b/dist/dict/DynamicDictionaries.js @@ -0,0 +1,102 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import TokenInfoDictionary from "./TokenInfoDictionary.js"; +import ConnectionCosts from "./ConnectionCosts.js"; +import UnknownDictionary from "./UnknownDictionary.js"; +import doublearray from "doublearray.ts"; +class DynamicDictionaries { + /** + * Dictionaries container for Tokenizer + * @param {DoubleArray} trie + * @param {TokenInfoDictionary} token_info_dictionary + * @param {ConnectionCosts} connection_costs + * @param {UnknownDictionary} unknown_dictionary + * @constructor + */ + constructor(trie, token_info_dictionary, connection_costs, unknown_dictionary) { + Object.defineProperty(this, "trie", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "token_info_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "connection_costs", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "unknown_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + if (trie != null) { + this.trie = trie; + } + else { + this.trie = doublearray.builder(0).build([{ k: "", v: 1 }]); + } + if (token_info_dictionary != null) { + this.token_info_dictionary = token_info_dictionary; + } + else { + this.token_info_dictionary = new TokenInfoDictionary(); + } + if (connection_costs != null) { + this.connection_costs = connection_costs; + } + else { + // backward_size * backward_size + this.connection_costs = new ConnectionCosts(0, 0); + } + if (unknown_dictionary != null) { + this.unknown_dictionary = unknown_dictionary; + } + else { + this.unknown_dictionary = new UnknownDictionary(); + } + } + // from base.dat & check.dat + loadTrie(base_buffer, check_buffer) { + this.trie = doublearray.load(base_buffer, check_buffer); + return this; + } + loadTokenInfoDictionaries(token_info_buffer, pos_buffer, target_map_buffer) { + this.token_info_dictionary.loadDictionary(token_info_buffer); + this.token_info_dictionary.loadPosVector(pos_buffer); + this.token_info_dictionary.loadTargetMap(target_map_buffer); + return this; + } + loadConnectionCosts(cc_buffer) { + this.connection_costs.loadConnectionCosts(cc_buffer); + return this; + } + loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer) { + this.unknown_dictionary.loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer); + return this; + } +} +export default DynamicDictionaries; +//# sourceMappingURL=DynamicDictionaries.js.map \ No newline at end of file diff --git a/dist/dict/DynamicDictionaries.js.map b/dist/dict/DynamicDictionaries.js.map new file mode 100644 index 0000000..34ac605 --- /dev/null +++ b/dist/dict/DynamicDictionaries.js.map @@ -0,0 +1 @@ +{"version":3,"file":"DynamicDictionaries.js","sourceRoot":"","sources":["../../src/dict/DynamicDictionaries.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,eAAe,MAAM,sBAAsB,CAAC;AACnD,OAAO,iBAAiB,MAAM,wBAAwB,CAAC;AACvD,OAAO,WAAW,MAAM,gBAAgB,CAAC;AAIzC,MAAM,mBAAmB;IAMvB;;;;;;;OAOG;IACH,YACE,IAAyB,EACzB,qBAAkD,EAClD,gBAAyC,EACzC,kBAA6C;QAjB/C;;;;;WAAkB;QAClB;;;;;WAA2C;QAC3C;;;;;WAAkC;QAClC;;;;;WAAsC;QAgBpC,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;YACjB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9D,CAAC;QACD,IAAI,qBAAqB,IAAI,IAAI,EAAE,CAAC;YAClC,IAAI,CAAC,qBAAqB,GAAG,qBAAqB,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,qBAAqB,GAAG,IAAI,mBAAmB,EAAE,CAAC;QACzD,CAAC;QACD,IAAI,gBAAgB,IAAI,IAAI,EAAE,CAAC;YAC7B,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;QAC3C,CAAC;aAAM,CAAC;YACN,gCAAgC;YAChC,IAAI,CAAC,gBAAgB,GAAG,IAAI,eAAe,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,kBAAkB,IAAI,IAAI,EAAE,CAAC;YAC/B,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,kBAAkB,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,QAAQ,CAAC,WAAwB,EAAE,YAAyB;QAC1D,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,yBAAyB,CACvB,iBAA6B,EAC7B,UAAsB,EACtB,iBAA6B;QAE7B,IAAI,CAAC,qBAAqB,CAAC,cAAc,CAAC,iBAAiB,CAAC,CAAC;QAC7D,IAAI,CAAC,qBAAqB,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;QACrD,IAAI,CAAC,qBAAqB,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mBAAmB,CAAC,SAAqB;QACvC,IAAI,CAAC,gBAAgB,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uBAAuB,CACrB,UAAsB,EACtB,cAA0B,EAC1B,cAA0B,EAC1B,cAA0B,EAC1B,qBAAkC,EAClC,iBAA6B;QAE7B,IAAI,CAAC,kBAAkB,CAAC,uBAAuB,CAC7C,UAAU,EACV,cAAc,EACd,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,iBAAiB,CAClB,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED,eAAe,mBAAmB,CAAC"} \ No newline at end of file diff --git a/dist/dict/InvokeDefinitionMap.d.ts b/dist/dict/InvokeDefinitionMap.d.ts new file mode 100644 index 0000000..09e8eb3 --- /dev/null +++ b/dist/dict/InvokeDefinitionMap.d.ts @@ -0,0 +1,41 @@ +import CharacterClass from "./CharacterClass.js"; +declare class InvokeDefinitionMap { + map: CharacterClass[]; + lookup_table: { + [key: string]: number; + }; + /** + * InvokeDefinitionMap represents invoke definition a part of char.def + * @constructor + */ + constructor(); + /** + * Load InvokeDefinitionMap from buffer + * @param {Uint8Array} invoke_def_buffer + * @returns {InvokeDefinitionMap} + */ + static load(invoke_def_buffer: Uint8Array): InvokeDefinitionMap; + /** + * Initializing method + * @param {Array.} character_category_definition Array of CharacterClass + */ + init(character_category_definition: CharacterClass[]): void; + /** + * Get class information by class ID + * @param {number} class_id + * @returns {CharacterClass} + */ + getCharacterClass(class_id: number): CharacterClass; + /** + * For building character definition dictionary + * @param {string} class_name character + * @returns {number} class_id + */ + lookup(class_name: string): number | null; + /** + * Transform from map to binary buffer + * @returns {Uint8Array} + */ + toBuffer(): Uint8Array; +} +export default InvokeDefinitionMap; diff --git a/dist/dict/InvokeDefinitionMap.js b/dist/dict/InvokeDefinitionMap.js new file mode 100644 index 0000000..dcb9f5a --- /dev/null +++ b/dist/dict/InvokeDefinitionMap.js @@ -0,0 +1,113 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import ByteBuffer from "../util/ByteBuffer.js"; +import CharacterClass from "./CharacterClass.js"; +class InvokeDefinitionMap { + /** + * InvokeDefinitionMap represents invoke definition a part of char.def + * @constructor + */ + constructor() { + Object.defineProperty(this, "map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "lookup_table", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.map = []; + this.lookup_table = {}; // Just for building dictionary + } + /** + * Load InvokeDefinitionMap from buffer + * @param {Uint8Array} invoke_def_buffer + * @returns {InvokeDefinitionMap} + */ + static load(invoke_def_buffer) { + var invoke_def = new InvokeDefinitionMap(); + var character_category_definition = []; + var buffer = new ByteBuffer(invoke_def_buffer); + while (buffer.position + 1 < buffer.size()) { + var class_id = character_category_definition.length; + var is_always_invoke = buffer.get(); + var is_grouping = buffer.get(); + var max_length = buffer.getInt(); + var class_name = buffer.getString(); + character_category_definition.push(new CharacterClass(class_id, class_name, is_always_invoke, is_grouping, max_length)); + } + invoke_def.init(character_category_definition); + return invoke_def; + } + /** + * Initializing method + * @param {Array.} character_category_definition Array of CharacterClass + */ + init(character_category_definition) { + if (character_category_definition == null) { + return; + } + for (var i = 0; i < character_category_definition.length; i++) { + var character_class = character_category_definition[i]; + this.map[i] = character_class; + this.lookup_table[character_class.class_name] = i; + } + } + /** + * Get class information by class ID + * @param {number} class_id + * @returns {CharacterClass} + */ + getCharacterClass(class_id) { + return this.map[class_id]; + } + /** + * For building character definition dictionary + * @param {string} class_name character + * @returns {number} class_id + */ + lookup(class_name) { + const class_id = this.lookup_table[class_name]; + if (class_id == null) { + return null; + } + return class_id; + } + /** + * Transform from map to binary buffer + * @returns {Uint8Array} + */ + toBuffer() { + var buffer = new ByteBuffer(); + for (var i = 0; i < this.map.length; i++) { + var char_class = this.map[i]; + buffer.put(Number(char_class.is_always_invoke)); + buffer.put(Number(char_class.is_grouping)); + buffer.putInt(char_class.max_length); + buffer.putString(char_class.class_name); + } + buffer.shrink(); + return buffer.buffer; + } +} +export default InvokeDefinitionMap; +//# sourceMappingURL=InvokeDefinitionMap.js.map \ No newline at end of file diff --git a/dist/dict/InvokeDefinitionMap.js.map b/dist/dict/InvokeDefinitionMap.js.map new file mode 100644 index 0000000..6a53c4b --- /dev/null +++ b/dist/dict/InvokeDefinitionMap.js.map @@ -0,0 +1 @@ +{"version":3,"file":"InvokeDefinitionMap.js","sourceRoot":"","sources":["../../src/dict/InvokeDefinitionMap.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,UAAU,MAAM,uBAAuB,CAAC;AAC/C,OAAO,cAAc,MAAM,qBAAqB,CAAC;AAEjD,MAAM,mBAAmB;IAIvB;;;OAGG;IACH;QAPA;;;;;WAAsB;QACtB;;;;;WAAwC;QAOtC,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;QACd,IAAI,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,+BAA+B;IACzD,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,IAAI,CAAC,iBAA6B;QACvC,IAAI,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC3C,IAAI,6BAA6B,GAAG,EAAE,CAAC;QAEvC,IAAI,MAAM,GAAG,IAAI,UAAU,CAAC,iBAAiB,CAAC,CAAC;QAC/C,OAAO,MAAM,CAAC,QAAQ,GAAG,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3C,IAAI,QAAQ,GAAW,6BAA6B,CAAC,MAAM,CAAC;YAC5D,IAAI,gBAAgB,GAAG,MAAM,CAAC,GAAG,EAAE,CAAC;YACpC,IAAI,WAAW,GAAG,MAAM,CAAC,GAAG,EAAE,CAAC;YAC/B,IAAI,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;YACjC,IAAI,UAAU,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;YACpC,6BAA6B,CAAC,IAAI,CAChC,IAAI,cAAc,CAChB,QAAQ,EACR,UAAU,EACV,gBAAgB,EAChB,WAAW,EACX,UAAU,CACX,CACF,CAAC;QACJ,CAAC;QAED,UAAU,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QAE/C,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;OAGG;IACH,IAAI,CAAC,6BAA+C;QAClD,IAAI,6BAA6B,IAAI,IAAI,EAAE,CAAC;YAC1C,OAAO;QACT,CAAC;QACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,6BAA6B,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9D,IAAI,eAAe,GAAG,6BAA6B,CAAC,CAAC,CAAC,CAAC;YACvD,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC;YAC9B,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,iBAAiB,CAAC,QAAgB;QAChC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,UAAkB;QACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC/C,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YACrB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;;OAGG;IACH,QAAQ;QACN,IAAI,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAChD,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;YAC3C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;YACrC,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;QAC1C,CAAC;QACD,MAAM,CAAC,MAAM,EAAE,CAAC;QAChB,OAAO,MAAM,CAAC,MAAM,CAAC;IACvB,CAAC;CACF;AAED,eAAe,mBAAmB,CAAC"} \ No newline at end of file diff --git a/dist/dict/TokenInfoDictionary.d.ts b/dist/dict/TokenInfoDictionary.d.ts new file mode 100644 index 0000000..e9e4a35 --- /dev/null +++ b/dist/dict/TokenInfoDictionary.d.ts @@ -0,0 +1,29 @@ +import ByteBuffer from "../util/ByteBuffer.js"; +declare class TokenInfoDictionary { + dictionary: ByteBuffer; + target_map: { + [key: string]: number[]; + }; + pos_buffer: ByteBuffer; + /** + * TokenInfoDictionary + * @constructor + */ + constructor(); + buildDictionary(entries: string[][]): { + [key: number]: string; + }; + put(left_id: number, right_id: number, word_cost: number, surface_form: string, feature: string): number; + addMapping(source: number, target: number): void; + targetMapToBuffer(): Uint8Array; + loadDictionary(array_buffer: Uint8Array): this; + loadPosVector(array_buffer: Uint8Array): this; + loadTargetMap(array_buffer: Uint8Array): this; + /** + * Look up features in the dictionary + * @param {string} token_info_id_str Word ID to look up + * @returns {string} Features string concatenated by "," + */ + getFeatures(token_info_id_str: string): string; +} +export default TokenInfoDictionary; diff --git a/dist/dict/TokenInfoDictionary.js b/dist/dict/TokenInfoDictionary.js new file mode 100644 index 0000000..637d667 --- /dev/null +++ b/dist/dict/TokenInfoDictionary.js @@ -0,0 +1,151 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import ByteBuffer from "../util/ByteBuffer.js"; +class TokenInfoDictionary { + /** + * TokenInfoDictionary + * @constructor + */ + constructor() { + Object.defineProperty(this, "dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "target_map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "pos_buffer", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.dictionary = new ByteBuffer(10 * 1024 * 1024); + this.target_map = {}; // trie_id (of surface form) -> token_info_id (of token) + this.pos_buffer = new ByteBuffer(10 * 1024 * 1024); + } + // left_id right_id word_cost ... + // ^ this position is token_info_id + buildDictionary(entries) { + var dictionary_entries = {}; // using as hashmap, string -> string (word_id -> surface_form) to build dictionary + for (var i = 0; i < entries.length; i++) { + var entry = entries[i]; + if (entry.length < 4) { + continue; + } + var surface_form = entry[0].toString(); + var left_id = Number(entry[1]); + var right_id = Number(entry[2]); + var word_cost = Number(entry[3]); + var feature = entry.slice(4).join(","); // TODO Optimize + // Assertion + if (!isFinite(left_id) || !isFinite(right_id) || !isFinite(word_cost)) { + console.log(entry); + } + var token_info_id = this.put(left_id, right_id, word_cost, surface_form, feature); + dictionary_entries[token_info_id] = surface_form; + } + // Remove last unused area + this.dictionary.shrink(); + this.pos_buffer.shrink(); + return dictionary_entries; + } + put(left_id, right_id, word_cost, surface_form, feature) { + var token_info_id = this.dictionary.position; + var pos_id = this.pos_buffer.position; + this.dictionary.putShort(left_id); + this.dictionary.putShort(right_id); + this.dictionary.putShort(word_cost); + this.dictionary.putInt(pos_id); + this.pos_buffer.putString(surface_form + "," + feature); + return token_info_id; + } + addMapping(source, target) { + const mapping = this.target_map[source] ?? []; + // if (mapping == null) { + // mapping = []; + // } + mapping.push(target); + this.target_map[source] = mapping; + } + targetMapToBuffer() { + var buffer = new ByteBuffer(); + var map_keys_size = Object.keys(this.target_map).length; + buffer.putInt(map_keys_size); + for (var key in this.target_map) { + var values = this.target_map[key]; // Array + var map_values_size = values.length; + buffer.putInt(parseInt(key)); + buffer.putInt(map_values_size); + for (var i = 0; i < values.length; i++) { + buffer.putInt(values[i]); + } + } + return buffer.shrink(); // Shrink-ed Typed Array + } + // from tid.dat + loadDictionary(array_buffer) { + this.dictionary = new ByteBuffer(array_buffer); + return this; + } + // from tid_pos.dat + loadPosVector(array_buffer) { + this.pos_buffer = new ByteBuffer(array_buffer); + return this; + } + // from tid_map.dat + loadTargetMap(array_buffer) { + const buffer = new ByteBuffer(array_buffer); + buffer.position = 0; + this.target_map = {}; + buffer.readInt(); // map_keys_size + while (buffer.buffer.length > buffer.position) { + // if (buffer.buffer.length < buffer.position + 1) { + // break; + // } + const key = buffer.readInt(); + const map_values_size = buffer.readInt(); + for (let i = 0; i < map_values_size; i++) { + const value = buffer.readInt(); + this.addMapping(key, value); + } + } + return this; + } + /** + * Look up features in the dictionary + * @param {string} token_info_id_str Word ID to look up + * @returns {string} Features string concatenated by "," + */ + getFeatures(token_info_id_str) { + var token_info_id = parseInt(token_info_id_str); + if (isNaN(token_info_id)) { + // TODO throw error + return ""; + } + var pos_id = this.dictionary.getInt(token_info_id + 6); + return this.pos_buffer.getString(pos_id); + } +} +export default TokenInfoDictionary; +//# sourceMappingURL=TokenInfoDictionary.js.map \ No newline at end of file diff --git a/dist/dict/TokenInfoDictionary.js.map b/dist/dict/TokenInfoDictionary.js.map new file mode 100644 index 0000000..42a7431 --- /dev/null +++ b/dist/dict/TokenInfoDictionary.js.map @@ -0,0 +1 @@ +{"version":3,"file":"TokenInfoDictionary.js","sourceRoot":"","sources":["../../src/dict/TokenInfoDictionary.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,UAAU,MAAM,uBAAuB,CAAC;AAE/C,MAAM,mBAAmB;IAKvB;;;OAGG;IACH;QARA;;;;;WAAuB;QACvB;;;;;WAAwC;QACxC;;;;;WAAuB;QAOrB,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC,wDAAwD;QAC9E,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;IACrD,CAAC;IAED,iCAAiC;IACjC,mCAAmC;IACnC,eAAe,CAAC,OAAmB;QACjC,IAAI,kBAAkB,GAA8B,EAAE,CAAC,CAAC,mFAAmF;QAE3I,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,IAAI,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAEvB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,SAAS;YACX,CAAC;YAED,IAAI,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACvC,IAAI,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAChC,IAAI,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;YAExD,YAAY;YACZ,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YAED,IAAI,aAAa,GAAG,IAAI,CAAC,GAAG,CAC1B,OAAO,EACP,QAAQ,EACR,SAAS,EACT,YAAY,EACZ,OAAO,CACR,CAAC;YACF,kBAAkB,CAAC,aAAa,CAAC,GAAG,YAAY,CAAC;QACnD,CAAC;QAED,0BAA0B;QAC1B,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QAEzB,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IAED,GAAG,CACD,OAAe,EACf,QAAgB,EAChB,SAAiB,EACjB,YAAoB,EACpB,OAAe;QAEf,IAAI,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAC7C,IAAI,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAEtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACpC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,YAAY,GAAG,GAAG,GAAG,OAAO,CAAC,CAAC;QAExD,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,UAAU,CAAC,MAAc,EAAE,MAAc;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAC9C,yBAAyB;QACzB,kBAAkB;QAClB,IAAI;QACJ,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAErB,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC;IACpC,CAAC;IAED,iBAAiB;QACf,IAAI,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAC7B,KAAK,IAAI,GAAG,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YAChC,IAAI,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ;YAC3C,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC;YACpC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,wBAAwB;IAClD,CAAC;IAED,eAAe;IACf,cAAc,CAAC,YAAwB;QACrC,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;QAC/C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mBAAmB;IACnB,aAAa,CAAC,YAAwB;QACpC,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;QAC/C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mBAAmB;IACnB,aAAa,CAAC,YAAwB;QACpC,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;QAC5C,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;QACpB,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,gBAAgB;QAClC,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC9C,oDAAoD;YACpD,WAAW;YACX,IAAI;YACJ,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;YAC7B,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;YACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC/B,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;;OAIG;IACH,WAAW,CAAC,iBAAyB;QACnC,IAAI,aAAa,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAChD,IAAI,KAAK,CAAC,aAAa,CAAC,EAAE,CAAC;YACzB,mBAAmB;YACnB,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;QACvD,OAAO,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,eAAe,mBAAmB,CAAC"} \ No newline at end of file diff --git a/dist/dict/UnknownDictionary.d.ts b/dist/dict/UnknownDictionary.d.ts new file mode 100644 index 0000000..1dc989e --- /dev/null +++ b/dist/dict/UnknownDictionary.d.ts @@ -0,0 +1,21 @@ +import TokenInfoDictionary from "./TokenInfoDictionary.js"; +import CharacterDefinition from "./CharacterDefinition.js"; +import ByteBuffer from "../util/ByteBuffer.js"; +declare class UnknownDictionary extends TokenInfoDictionary { + dictionary: ByteBuffer; + target_map: { + [key: string]: number[]; + }; + pos_buffer: ByteBuffer; + character_definition: CharacterDefinition | null; + /** + * UnknownDictionary + * @constructor + */ + constructor(); + characterDefinition(character_definition: CharacterDefinition): this; + lookup(ch: string): import("./CharacterClass.js").default | undefined; + lookupCompatibleCategory(ch: string): import("./CharacterClass.js").default[] | undefined; + loadUnknownDictionaries(unk_buffer: Uint8Array, unk_pos_buffer: Uint8Array, unk_map_buffer: Uint8Array, cat_map_buffer: Uint8Array, compat_cat_map_buffer: Uint32Array, invoke_def_buffer: Uint8Array): void; +} +export default UnknownDictionary; diff --git a/dist/dict/UnknownDictionary.js b/dist/dict/UnknownDictionary.js new file mode 100644 index 0000000..79e5eaf --- /dev/null +++ b/dist/dict/UnknownDictionary.js @@ -0,0 +1,78 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import TokenInfoDictionary from "./TokenInfoDictionary.js"; +import CharacterDefinition from "./CharacterDefinition.js"; +import ByteBuffer from "../util/ByteBuffer.js"; +// Inherit from TokenInfoDictionary as a super class +class UnknownDictionary extends TokenInfoDictionary { + /** + * UnknownDictionary + * @constructor + */ + constructor() { + super(); + Object.defineProperty(this, "dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "target_map", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "pos_buffer", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "character_definition", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.dictionary = new ByteBuffer(10 * 1024 * 1024); + this.target_map = {}; // class_id (of CharacterClass) -> token_info_id (of unknown class) + this.pos_buffer = new ByteBuffer(10 * 1024 * 1024); + this.character_definition = null; + } + // Inherit from TokenInfoDictionary as a super class + // UnknownDictionary.prototype = Object.create(TokenInfoDictionary.prototype); + characterDefinition(character_definition) { + this.character_definition = character_definition; + return this; + } + lookup(ch) { + return this.character_definition?.lookup(ch); + } + lookupCompatibleCategory(ch) { + return this.character_definition?.lookupCompatibleCategory(ch); + } + loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer) { + this.loadDictionary(unk_buffer); + this.loadPosVector(unk_pos_buffer); + this.loadTargetMap(unk_map_buffer); + this.character_definition = CharacterDefinition.load(cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer); + } +} +export default UnknownDictionary; +//# sourceMappingURL=UnknownDictionary.js.map \ No newline at end of file diff --git a/dist/dict/UnknownDictionary.js.map b/dist/dict/UnknownDictionary.js.map new file mode 100644 index 0000000..8e63a30 --- /dev/null +++ b/dist/dict/UnknownDictionary.js.map @@ -0,0 +1 @@ +{"version":3,"file":"UnknownDictionary.js","sourceRoot":"","sources":["../../src/dict/UnknownDictionary.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,UAAU,MAAM,uBAAuB,CAAC;AAE/C,oDAAoD;AACpD,MAAM,iBAAkB,SAAQ,mBAAmB;IAMjD;;;OAGG;IACH;QACE,KAAK,EAAE,CAAC;QAVV;;;;;WAAuB;QACvB;;;;;WAAwC;QACxC;;;;;WAAuB;QACvB;;;;;WAAiD;QAQ/C,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC,mEAAmE;QACzF,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;IACnC,CAAC;IAED,oDAAoD;IACpD,8EAA8E;IAE9E,mBAAmB,CAAC,oBAAyC;QAC3D,IAAI,CAAC,oBAAoB,GAAG,oBAAoB,CAAC;QACjD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,CAAC,EAAU;QACf,OAAO,IAAI,CAAC,oBAAoB,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,wBAAwB,CAAC,EAAU;QACjC,OAAO,IAAI,CAAC,oBAAoB,EAAE,wBAAwB,CAAC,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,uBAAuB,CACrB,UAAsB,EACtB,cAA0B,EAC1B,cAA0B,EAC1B,cAA0B,EAC1B,qBAAkC,EAClC,iBAA6B;QAE7B,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;QAChC,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QACnC,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QACnC,IAAI,CAAC,oBAAoB,GAAG,mBAAmB,CAAC,IAAI,CAClD,cAAc,EACd,qBAAqB,EACrB,iBAAiB,CAClB,CAAC;IACJ,CAAC;CACF;AAED,eAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/dict/builder/CharacterDefinitionBuilder.d.ts b/dist/dict/builder/CharacterDefinitionBuilder.d.ts new file mode 100644 index 0000000..9b40112 --- /dev/null +++ b/dist/dict/builder/CharacterDefinitionBuilder.d.ts @@ -0,0 +1,15 @@ +import CharacterClass from "../CharacterClass.js"; +import CharacterDefinition from "../CharacterDefinition.js"; +declare class CharacterDefinitionBuilder { + char_def: CharacterDefinition; + character_category_definition: CharacterClass[]; + category_mapping: CategoryMapping[]; + /** + * CharacterDefinitionBuilder + * @constructor + */ + constructor(); + putLine(line: string): void; + build(): CharacterDefinition; +} +export default CharacterDefinitionBuilder; diff --git a/dist/dict/builder/CharacterDefinitionBuilder.js b/dist/dict/builder/CharacterDefinitionBuilder.js new file mode 100644 index 0000000..b4317df --- /dev/null +++ b/dist/dict/builder/CharacterDefinitionBuilder.js @@ -0,0 +1,82 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import CharacterDefinition from "../CharacterDefinition.js"; +import InvokeDefinitionMap from "../InvokeDefinitionMap.js"; +const CATEGORY_DEF_PATTERN = /^(\w+)\s+(\d)\s+(\d)\s+(\d)/; +const CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/; +const RANGE_CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})\.\.(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/; +class CharacterDefinitionBuilder { + /** + * CharacterDefinitionBuilder + * @constructor + */ + constructor() { + Object.defineProperty(this, "char_def", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "character_category_definition", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "category_mapping", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.char_def = new CharacterDefinition(); + this.char_def.invoke_definition_map = new InvokeDefinitionMap(); + this.character_category_definition = []; + this.category_mapping = []; + } + putLine(line) { + var parsed_category_def = CATEGORY_DEF_PATTERN.exec(line); + if (parsed_category_def != null) { + var class_id = this.character_category_definition.length; + var char_class = CharacterDefinition.parseCharCategory(class_id, parsed_category_def); + if (char_class == null) { + return; + } + this.character_category_definition.push(char_class); + return; + } + var parsed_category_mapping = CATEGORY_MAPPING_PATTERN.exec(line); + if (parsed_category_mapping != null) { + var mapping = CharacterDefinition.parseCategoryMapping(parsed_category_mapping); + this.category_mapping.push(mapping); + } + var parsed_range_category_mapping = RANGE_CATEGORY_MAPPING_PATTERN.exec(line); + if (parsed_range_category_mapping != null) { + var range_mapping = CharacterDefinition.parseRangeCategoryMapping(parsed_range_category_mapping); + this.category_mapping.push(range_mapping); + } + } + build() { + // TODO If DEFAULT category does not exist, throw error + this.char_def.invoke_definition_map?.init(this.character_category_definition); + this.char_def.initCategoryMappings(this.category_mapping); + return this.char_def; + } +} +export default CharacterDefinitionBuilder; +//# sourceMappingURL=CharacterDefinitionBuilder.js.map \ No newline at end of file diff --git a/dist/dict/builder/CharacterDefinitionBuilder.js.map b/dist/dict/builder/CharacterDefinitionBuilder.js.map new file mode 100644 index 0000000..4e2abb6 --- /dev/null +++ b/dist/dict/builder/CharacterDefinitionBuilder.js.map @@ -0,0 +1 @@ +{"version":3,"file":"CharacterDefinitionBuilder.js","sourceRoot":"","sources":["../../../src/dict/builder/CharacterDefinitionBuilder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAGb,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAE5D,MAAM,oBAAoB,GAAG,6BAA6B,CAAC;AAC3D,MAAM,wBAAwB,GAC5B,mDAAmD,CAAC;AACtD,MAAM,8BAA8B,GAClC,sEAAsE,CAAC;AAEzE,MAAM,0BAA0B;IAK9B;;;OAGG;IACH;QARA;;;;;WAA8B;QAC9B;;;;;WAAgD;QAChD;;;;;WAAoC;QAOlC,IAAI,CAAC,QAAQ,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,CAAC,qBAAqB,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAChE,IAAI,CAAC,6BAA6B,GAAG,EAAE,CAAC;QACxC,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;IAC7B,CAAC;IAED,OAAO,CAAC,IAAY;QAClB,IAAI,mBAAmB,GAAG,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1D,IAAI,mBAAmB,IAAI,IAAI,EAAE,CAAC;YAChC,IAAI,QAAQ,GAAG,IAAI,CAAC,6BAA6B,CAAC,MAAM,CAAC;YACzD,IAAI,UAAU,GAAG,mBAAmB,CAAC,iBAAiB,CACpD,QAAQ,EACR,mBAAmB,CACpB,CAAC;YACF,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;gBACvB,OAAO;YACT,CAAC;YACD,IAAI,CAAC,6BAA6B,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACpD,OAAO;QACT,CAAC;QACD,IAAI,uBAAuB,GAAG,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClE,IAAI,uBAAuB,IAAI,IAAI,EAAE,CAAC;YACpC,IAAI,OAAO,GAAG,mBAAmB,CAAC,oBAAoB,CACpD,uBAAuB,CACxB,CAAC;YACF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,6BAA6B,GAC/B,8BAA8B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,6BAA6B,IAAI,IAAI,EAAE,CAAC;YAC1C,IAAI,aAAa,GAAG,mBAAmB,CAAC,yBAAyB,CAC/D,6BAA6B,CAC9B,CAAC;YACF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,KAAK;QACH,uDAAuD;QACvD,IAAI,CAAC,QAAQ,CAAC,qBAAqB,EAAE,IAAI,CACvC,IAAI,CAAC,6BAA6B,CACnC,CAAC;QACF,IAAI,CAAC,QAAQ,CAAC,oBAAoB,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC1D,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;CACF;AAED,eAAe,0BAA0B,CAAC"} \ No newline at end of file diff --git a/dist/dict/builder/ConnectionCostsBuilder.d.ts b/dist/dict/builder/ConnectionCostsBuilder.d.ts new file mode 100644 index 0000000..639664b --- /dev/null +++ b/dist/dict/builder/ConnectionCostsBuilder.d.ts @@ -0,0 +1,13 @@ +import ConnectionCosts from "../ConnectionCosts.js"; +declare class ConnectionCostsBuilder { + lines: number; + connection_cost: ConnectionCosts | null; + /** + * Builder class for constructing ConnectionCosts object + * @constructor + */ + constructor(); + putLine(line: string): void; + build(): ConnectionCosts | null; +} +export default ConnectionCostsBuilder; diff --git a/dist/dict/builder/ConnectionCostsBuilder.js b/dist/dict/builder/ConnectionCostsBuilder.js new file mode 100644 index 0000000..c891336 --- /dev/null +++ b/dist/dict/builder/ConnectionCostsBuilder.js @@ -0,0 +1,75 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import ConnectionCosts from "../ConnectionCosts.js"; +class ConnectionCostsBuilder { + /** + * Builder class for constructing ConnectionCosts object + * @constructor + */ + constructor() { + Object.defineProperty(this, "lines", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "connection_cost", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.lines = 0; + this.connection_cost = null; + } + putLine(line) { + if (this.lines === 0 || this.connection_cost === null) { + var dimensions = line.split(" "); + var forward_dimension = parseInt(dimensions[0]); + var backward_dimension = parseInt(dimensions[1]); + if (forward_dimension < 0 || backward_dimension < 0) { + throw "Parse error of matrix.def"; + } + this.connection_cost = new ConnectionCosts(forward_dimension, backward_dimension); + this.lines++; + return; + } + var costs = line.split(" "); + if (costs.length !== 3) { + return; + } + var forward_id = parseInt(costs[0]); + var backward_id = parseInt(costs[1]); + var cost = parseInt(costs[2]); + if (forward_id < 0 || + backward_id < 0 || + !isFinite(forward_id) || + !isFinite(backward_id) || + this.connection_cost.forward_dimension <= forward_id || + this.connection_cost.backward_dimension <= backward_id) { + throw "Parse error of matrix.def"; + } + this.connection_cost.put(forward_id, backward_id, cost); + this.lines++; + } + build() { + return this.connection_cost; + } +} +export default ConnectionCostsBuilder; +//# sourceMappingURL=ConnectionCostsBuilder.js.map \ No newline at end of file diff --git a/dist/dict/builder/ConnectionCostsBuilder.js.map b/dist/dict/builder/ConnectionCostsBuilder.js.map new file mode 100644 index 0000000..8e569e1 --- /dev/null +++ b/dist/dict/builder/ConnectionCostsBuilder.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ConnectionCostsBuilder.js","sourceRoot":"","sources":["../../../src/dict/builder/ConnectionCostsBuilder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,eAAe,MAAM,uBAAuB,CAAC;AAEpD,MAAM,sBAAsB;IAI1B;;;OAGG;IACH;QAPA;;;;;WAAc;QACd;;;;;WAAwC;QAOtC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC;QACf,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;IAC9B,CAAC;IAED,OAAO,CAAC,IAAY;QAClB,IAAI,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI,EAAE,CAAC;YACtD,IAAI,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,iBAAiB,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAChD,IAAI,kBAAkB,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAEjD,IAAI,iBAAiB,GAAG,CAAC,IAAI,kBAAkB,GAAG,CAAC,EAAE,CAAC;gBACpD,MAAM,2BAA2B,CAAC;YACpC,CAAC;YAED,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CACxC,iBAAiB,EACjB,kBAAkB,CACnB,CAAC;YACF,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO;QACT,CAAC;QAED,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAE5B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;QACT,CAAC;QAED,IAAI,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,IAAI,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAE9B,IACE,UAAU,GAAG,CAAC;YACd,WAAW,GAAG,CAAC;YACf,CAAC,QAAQ,CAAC,UAAU,CAAC;YACrB,CAAC,QAAQ,CAAC,WAAW,CAAC;YACtB,IAAI,CAAC,eAAe,CAAC,iBAAiB,IAAI,UAAU;YACpD,IAAI,CAAC,eAAe,CAAC,kBAAkB,IAAI,WAAW,EACtD,CAAC;YACD,MAAM,2BAA2B,CAAC;QACpC,CAAC;QAED,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC;QACxD,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,KAAK;QACH,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;CACF;AAED,eAAe,sBAAsB,CAAC"} \ No newline at end of file diff --git a/dist/dict/builder/CreateDatFiles.d.ts b/dist/dict/builder/CreateDatFiles.d.ts new file mode 100644 index 0000000..cb0ff5c --- /dev/null +++ b/dist/dict/builder/CreateDatFiles.d.ts @@ -0,0 +1 @@ +export {}; diff --git a/dist/dict/builder/CreateDatFiles.js b/dist/dict/builder/CreateDatFiles.js new file mode 100644 index 0000000..6246248 --- /dev/null +++ b/dist/dict/builder/CreateDatFiles.js @@ -0,0 +1,93 @@ +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import IPADic from "mecab-ipadic-seed"; +import kuromoji from "../../kuromoji.js"; +import { pathJoin } from "../../util/PathJoin.js"; +const outDir = "dict-uncompressed/"; +const createDatFiles = async () => { + if (!existsSync(outDir)) { + mkdirSync(outDir); + } + const dic = new IPADic(); + const builder = kuromoji.dictionaryBuilder(); + // Build token info dictionary + const tokenInfo = async () => { + await dic.readTokenInfo((line) => { + builder.addTokenInfoDictionary(line); + }); + console.log("Finishied to read token info dics"); + }; + // Build connection costs matrix + const matrixDef = async () => { + await dic.readMatrixDef((line) => { + builder.putCostMatrixLine(line); + }); + console.log("Finishied to read matrix.def"); + }; + // Build unknown dictionary + const unkDef = async () => { + await dic.readUnkDef((line) => { + builder.putUnkDefLine(line); + }); + console.log("Finishied to read unk.def"); + }; + // Build character definition dictionary + const charDef = async () => { + await dic.readCharDef((line) => { + builder.putCharDefLine(line); + }); + console.log("Finishied to read char.def"); + }; + await buildBinaryDictionaries([tokenInfo(), matrixDef(), unkDef(), charDef()], builder); +}; +/** + * To node.js Buffer + * @param typed + * @returns + */ +const toBuffer = (typed) => { + if (!typed) { + return Buffer.alloc(0); + } + // よくわからないのでエラーを握りつぶした + // @ts-ignore + var ab = typed.buffer; + var buffer = new Buffer(ab.byteLength); + var view = new Uint8Array(ab); + for (var i = 0; i < buffer.length; ++i) { + buffer[i] = view[i]; + } + return buffer; +}; +const buildBinaryDictionaries = async (promises, builder) => { + // Build kuromoji.js binary dictionary + await Promise.all(promises); + console.log("Finishied to read all seed dictionary files"); + console.log("Building binary dictionary ..."); + const dic = builder.build(); + const base_buffer = toBuffer(dic.trie.bc.getBaseBuffer()); + const check_buffer = toBuffer(dic.trie.bc.getCheckBuffer()); + const token_info_buffer = toBuffer(dic.token_info_dictionary.dictionary.buffer); + const tid_pos_buffer = toBuffer(dic.token_info_dictionary.pos_buffer.buffer); + const tid_map_buffer = toBuffer(dic.token_info_dictionary.targetMapToBuffer()); + const connection_costs_buffer = toBuffer(dic.connection_costs.buffer); + const unk_buffer = toBuffer(dic.unknown_dictionary.dictionary.buffer); + const unk_pos_buffer = toBuffer(dic.unknown_dictionary.pos_buffer.buffer); + const unk_map_buffer = toBuffer(dic.unknown_dictionary.targetMapToBuffer()); + const char_map_buffer = toBuffer(dic.unknown_dictionary.character_definition?.character_category_map); + const char_compat_map_buffer = toBuffer(dic.unknown_dictionary.character_definition?.compatible_category_map); + const invoke_definition_map_buffer = toBuffer(dic.unknown_dictionary.character_definition?.invoke_definition_map?.toBuffer()); + writeFileSync(pathJoin([outDir, "base.dat"]), base_buffer); + writeFileSync(pathJoin([outDir, "check.dat"]), check_buffer); + writeFileSync(pathJoin([outDir, "tid.dat"]), token_info_buffer); + writeFileSync(pathJoin([outDir, "tid_pos.dat"]), tid_pos_buffer); + writeFileSync(pathJoin([outDir, "tid_map.dat"]), tid_map_buffer); + writeFileSync(pathJoin([outDir, "cc.dat"]), connection_costs_buffer); + writeFileSync(pathJoin([outDir, "unk.dat"]), unk_buffer); + writeFileSync(pathJoin([outDir, "unk_pos.dat"]), unk_pos_buffer); + writeFileSync(pathJoin([outDir, "unk_map.dat"]), unk_map_buffer); + writeFileSync(pathJoin([outDir, "unk_char.dat"]), char_map_buffer); + writeFileSync(pathJoin([outDir, "unk_compat.dat"]), char_compat_map_buffer); + writeFileSync(pathJoin([outDir, "unk_invoke.dat"]), invoke_definition_map_buffer); +}; +await createDatFiles(); +//# sourceMappingURL=CreateDatFiles.js.map \ No newline at end of file diff --git a/dist/dict/builder/CreateDatFiles.js.map b/dist/dict/builder/CreateDatFiles.js.map new file mode 100644 index 0000000..611116b --- /dev/null +++ b/dist/dict/builder/CreateDatFiles.js.map @@ -0,0 +1 @@ +{"version":3,"file":"CreateDatFiles.js","sourceRoot":"","sources":["../../../src/dict/builder/CreateDatFiles.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC/D,OAAO,MAAM,MAAM,mBAAmB,CAAC;AACvC,OAAO,QAAQ,MAAM,mBAAmB,CAAC;AAEzC,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAElD,MAAM,MAAM,GAAG,oBAAoB,CAAC;AAEpC,MAAM,cAAc,GAAG,KAAK,IAAI,EAAE;IAChC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QACxB,SAAS,CAAC,MAAM,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,MAAM,EAAE,CAAC;IACzB,MAAM,OAAO,GAAG,QAAQ,CAAC,iBAAiB,EAAE,CAAC;IAE7C,8BAA8B;IAC9B,MAAM,SAAS,GAAG,KAAK,IAAI,EAAE;QAC3B,MAAM,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,EAAE,EAAE;YAC/B,OAAO,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;IACnD,CAAC,CAAC;IAEF,gCAAgC;IAChC,MAAM,SAAS,GAAG,KAAK,IAAI,EAAE;QAC3B,MAAM,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,EAAE,EAAE;YAC/B,OAAO,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAClC,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;IAC9C,CAAC,CAAC;IAEF,2BAA2B;IAC3B,MAAM,MAAM,GAAG,KAAK,IAAI,EAAE;QACxB,MAAM,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,EAAE;YAC5B,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC,CAAC;IAEF,wCAAwC;IACxC,MAAM,OAAO,GAAG,KAAK,IAAI,EAAE;QACzB,MAAM,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,EAAE;YAC7B,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC5C,CAAC,CAAC;IAEF,MAAM,uBAAuB,CAC3B,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAC/C,OAAO,CACR,CAAC;AACJ,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,QAAQ,GAAG,CACf,KAA2D,EACnD,EAAE;IACV,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,sBAAsB;IACtB,aAAa;IACb,IAAI,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;IACtB,IAAI,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;IACvC,IAAI,IAAI,GAAG,IAAI,UAAU,CAAC,EAAE,CAAC,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC;QACvC,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,uBAAuB,GAAG,KAAK,EACnC,QAAyB,EACzB,OAA0B,EAC1B,EAAE;IACF,sCAAsC;IACtC,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;IAC3D,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;IAC9C,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAE5B,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAC1D,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,cAAc,EAAE,CAAC,CAAC;IAC5D,MAAM,iBAAiB,GAAG,QAAQ,CAChC,GAAG,CAAC,qBAAqB,CAAC,UAAU,CAAC,MAAM,CAC5C,CAAC;IACF,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,qBAAqB,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;IAC7E,MAAM,cAAc,GAAG,QAAQ,CAC7B,GAAG,CAAC,qBAAqB,CAAC,iBAAiB,EAAE,CAC9C,CAAC;IACF,MAAM,uBAAuB,GAAG,QAAQ,CAAC,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;IACtE,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,kBAAkB,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;IACtE,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,kBAAkB,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;IAC1E,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,kBAAkB,CAAC,iBAAiB,EAAE,CAAC,CAAC;IAC5E,MAAM,eAAe,GAAG,QAAQ,CAC9B,GAAG,CAAC,kBAAkB,CAAC,oBAAoB,EAAE,sBAAsB,CACpE,CAAC;IACF,MAAM,sBAAsB,GAAG,QAAQ,CACrC,GAAG,CAAC,kBAAkB,CAAC,oBAAoB,EAAE,uBAAuB,CACrE,CAAC;IACF,MAAM,4BAA4B,GAAG,QAAQ,CAC3C,GAAG,CAAC,kBAAkB,CAAC,oBAAoB,EAAE,qBAAqB,EAAE,QAAQ,EAAE,CAC/E,CAAC;IAEF,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAC3D,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;IAC7D,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC;IAChE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IACjE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IACjE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,uBAAuB,CAAC,CAAC;IACrE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACzD,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IACjE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IACjE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;IACnE,aAAa,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC;IAC5E,aAAa,CACX,QAAQ,CAAC,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC,EACpC,4BAA4B,CAC7B,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,cAAc,EAAE,CAAC"} \ No newline at end of file diff --git a/dist/dict/builder/DictionaryBuilder.d.ts b/dist/dict/builder/DictionaryBuilder.d.ts new file mode 100644 index 0000000..a25a545 --- /dev/null +++ b/dist/dict/builder/DictionaryBuilder.d.ts @@ -0,0 +1,54 @@ +import DynamicDictionaries from "../DynamicDictionaries.js"; +import TokenInfoDictionary from "../TokenInfoDictionary.js"; +import ConnectionCostsBuilder from "./ConnectionCostsBuilder.js"; +import CharacterDefinitionBuilder from "./CharacterDefinitionBuilder.js"; +import UnknownDictionary from "../UnknownDictionary.js"; +declare class DictionaryBuilder { + tid_entries: string[][]; + unk_entries: string[][]; + cc_builder: ConnectionCostsBuilder; + cd_builder: CharacterDefinitionBuilder; + /** + * Build dictionaries (token info, connection costs) + * + * Generates from matrix.def + * cc.dat: Connection costs + * + * Generates from *.csv + * dat.dat: Double array + * tid.dat: Token info dictionary + * tid_map.dat: targetMap + * tid_pos.dat: posList (part of speech) + */ + constructor(); + addTokenInfoDictionary(line: string): this; + /** + * Put one line of "matrix.def" file for building ConnectionCosts object + * @param {string} line is a line of "matrix.def" + */ + putCostMatrixLine(line: string): this; + putCharDefLine(line: string): this; + /** + * Put one line of "unk.def" file for building UnknownDictionary object + * @param {string} line is a line of "unk.def" + */ + putUnkDefLine(line: string): this; + build(): DynamicDictionaries; + /** + * Build TokenInfoDictionary + * + * @returns {{trie: *, token_info_dictionary: *}} + */ + buildTokenInfoDictionary(): { + trie: import("doublearray.ts/dist/doubleArrayClass.js").default; + token_info_dictionary: TokenInfoDictionary; + }; + buildUnknownDictionary(): UnknownDictionary; + /** + * Build double array trie + * + * @returns {DoubleArray} Double-Array trie + */ + buildDoubleArray(): import("doublearray.ts/dist/doubleArrayClass.js").default; +} +export default DictionaryBuilder; diff --git a/dist/dict/builder/DictionaryBuilder.js b/dist/dict/builder/DictionaryBuilder.js new file mode 100644 index 0000000..3b0226d --- /dev/null +++ b/dist/dict/builder/DictionaryBuilder.js @@ -0,0 +1,160 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import doublearray from "doublearray.ts"; +import DynamicDictionaries from "../DynamicDictionaries.js"; +import TokenInfoDictionary from "../TokenInfoDictionary.js"; +import ConnectionCostsBuilder from "./ConnectionCostsBuilder.js"; +import CharacterDefinitionBuilder from "./CharacterDefinitionBuilder.js"; +import UnknownDictionary from "../UnknownDictionary.js"; +class DictionaryBuilder { + /** + * Build dictionaries (token info, connection costs) + * + * Generates from matrix.def + * cc.dat: Connection costs + * + * Generates from *.csv + * dat.dat: Double array + * tid.dat: Token info dictionary + * tid_map.dat: targetMap + * tid_pos.dat: posList (part of speech) + */ + constructor() { + Object.defineProperty(this, "tid_entries", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "unk_entries", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "cc_builder", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "cd_builder", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + // Array of entries, each entry in Mecab form + // (0: surface form, 1: left id, 2: right id, 3: word cost, 4: part of speech id, 5-: other features) + this.tid_entries = []; + this.unk_entries = []; + this.cc_builder = new ConnectionCostsBuilder(); + this.cd_builder = new CharacterDefinitionBuilder(); + } + addTokenInfoDictionary(line) { + var new_entry = line.split(","); + this.tid_entries.push(new_entry); + return this; + } + /** + * Put one line of "matrix.def" file for building ConnectionCosts object + * @param {string} line is a line of "matrix.def" + */ + putCostMatrixLine(line) { + this.cc_builder.putLine(line); + return this; + } + putCharDefLine(line) { + this.cd_builder.putLine(line); + return this; + } + /** + * Put one line of "unk.def" file for building UnknownDictionary object + * @param {string} line is a line of "unk.def" + */ + putUnkDefLine(line) { + this.unk_entries.push(line.split(",")); + return this; + } + build() { + var dictionaries = this.buildTokenInfoDictionary(); + var unknown_dictionary = this.buildUnknownDictionary(); + return new DynamicDictionaries(dictionaries.trie, dictionaries.token_info_dictionary, this.cc_builder.build(), unknown_dictionary); + } + /** + * Build TokenInfoDictionary + * + * @returns {{trie: *, token_info_dictionary: *}} + */ + buildTokenInfoDictionary() { + var token_info_dictionary = new TokenInfoDictionary(); + // using as hashmap, string -> string (word_id -> surface_form) to build dictionary + var dictionary_entries = token_info_dictionary.buildDictionary(this.tid_entries); + var trie = this.buildDoubleArray(); + for (let entry in dictionary_entries) { + let token_info_id = parseInt(entry); + var surface_form = dictionary_entries[token_info_id]; + var trie_id = trie.lookup(surface_form.toString()); + // Assertion + // if (trie_id < 0) { + // console.log("Not Found:" + surface_form); + // } + token_info_dictionary.addMapping(trie_id, token_info_id); + } + return { + trie: trie, + token_info_dictionary: token_info_dictionary, + }; + } + buildUnknownDictionary() { + var unk_dictionary = new UnknownDictionary(); + // using as hashmap, string -> string (word_id -> surface_form) to build dictionary + var dictionary_entries = unk_dictionary.buildDictionary(this.unk_entries); + var char_def = this.cd_builder.build(); // Create CharacterDefinition + unk_dictionary.characterDefinition(char_def); + for (let entry in dictionary_entries) { + const token_info_id = Number(entry); + var class_name = dictionary_entries[token_info_id]; + var class_id = char_def.invoke_definition_map?.lookup(class_name); + // Assertion + // if (trie_id < 0) { + // console.log("Not Found:" + surface_form); + // } + if (class_id == null) + continue; + unk_dictionary.addMapping(class_id, token_info_id); + } + return unk_dictionary; + } + /** + * Build double array trie + * + * @returns {DoubleArray} Double-Array trie + */ + buildDoubleArray() { + var trie_id = 0; + var words = this.tid_entries.map((entry) => { + var surface_form = entry[0]; + return { k: surface_form.toString(), v: trie_id++ }; + }); + var builder = doublearray.builder(1024 * 1024); + return builder.build(words); + } +} +export default DictionaryBuilder; +//# sourceMappingURL=DictionaryBuilder.js.map \ No newline at end of file diff --git a/dist/dict/builder/DictionaryBuilder.js.map b/dist/dict/builder/DictionaryBuilder.js.map new file mode 100644 index 0000000..4b14efd --- /dev/null +++ b/dist/dict/builder/DictionaryBuilder.js.map @@ -0,0 +1 @@ +{"version":3,"file":"DictionaryBuilder.js","sourceRoot":"","sources":["../../../src/dict/builder/DictionaryBuilder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,WAAW,MAAM,gBAAgB,CAAC;AACzC,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,sBAAsB,MAAM,6BAA6B,CAAC;AACjE,OAAO,0BAA0B,MAAM,iCAAiC,CAAC;AACzE,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAGxD,MAAM,iBAAiB;IAMrB;;;;;;;;;;;OAWG;IACH;QAjBA;;;;;WAAwB;QACxB;;;;;WAAwB;QACxB;;;;;WAAmC;QACnC;;;;;WAAuC;QAerC,6CAA6C;QAC7C,qGAAqG;QACrG,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;QACtB,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;QACtB,IAAI,CAAC,UAAU,GAAG,IAAI,sBAAsB,EAAE,CAAC;QAC/C,IAAI,CAAC,UAAU,GAAG,IAAI,0BAA0B,EAAE,CAAC;IACrD,CAAC;IAED,sBAAsB,CAAC,IAAY;QACjC,IAAI,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjC,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,iBAAiB,CAAC,IAAY;QAC5B,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC9B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,cAAc,CAAC,IAAY;QACzB,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC9B,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,IAAY;QACxB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK;QACH,IAAI,YAAY,GAAG,IAAI,CAAC,wBAAwB,EAAE,CAAC;QACnD,IAAI,kBAAkB,GAAG,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAEvD,OAAO,IAAI,mBAAmB,CAC5B,YAAY,CAAC,IAAI,EACjB,YAAY,CAAC,qBAAqB,EAClC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,EACvB,kBAAkB,CACnB,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,wBAAwB;QACtB,IAAI,qBAAqB,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAEtD,mFAAmF;QACnF,IAAI,kBAAkB,GAAG,qBAAqB,CAAC,eAAe,CAC5D,IAAI,CAAC,WAAW,CACjB,CAAC;QAEF,IAAI,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAEnC,KAAK,IAAI,KAAK,IAAI,kBAAkB,EAAE,CAAC;YACrC,IAAI,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,YAAY,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC;YACrD,IAAI,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEnD,YAAY;YACZ,qBAAqB;YACrB,gDAAgD;YAChD,IAAI;YAEJ,qBAAqB,CAAC,UAAU,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QAC3D,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI;YACV,qBAAqB,EAAE,qBAAqB;SAC7C,CAAC;IACJ,CAAC;IAED,sBAAsB;QACpB,IAAI,cAAc,GAAG,IAAI,iBAAiB,EAAE,CAAC;QAE7C,mFAAmF;QACnF,IAAI,kBAAkB,GAAG,cAAc,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAE1E,IAAI,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,CAAC,6BAA6B;QAErE,cAAc,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAE7C,KAAK,IAAI,KAAK,IAAI,kBAAkB,EAAE,CAAC;YACrC,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,UAAU,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC;YACnD,IAAI,QAAQ,GAAG,QAAQ,CAAC,qBAAqB,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;YAElE,YAAY;YACZ,qBAAqB;YACrB,gDAAgD;YAChD,IAAI;YAEJ,IAAI,QAAQ,IAAI,IAAI;gBAAE,SAAS;YAC/B,cAAc,CAAC,UAAU,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACrD,CAAC;QAED,OAAO,cAAc,CAAC;IACxB,CAAC;IAED;;;;OAIG;IACH,gBAAgB;QACd,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,KAAK,GAAU,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;YAChD,IAAI,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC5B,OAAO,EAAE,CAAC,EAAE,YAAY,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,IAAI,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAC/C,OAAO,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;CACF;AAED,eAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/kuromoji.d.ts b/dist/kuromoji.d.ts new file mode 100644 index 0000000..da049a5 --- /dev/null +++ b/dist/kuromoji.d.ts @@ -0,0 +1,8 @@ +import { TokenizerBuilderOption } from "./TokenizerBuilder.js"; +import TokenizerBuilder from "./TokenizerBuilder.js"; +import DictionaryBuilder from "./dict/builder/DictionaryBuilder.js"; +declare const kuromoji: { + builder: (option?: TokenizerBuilderOption) => TokenizerBuilder; + dictionaryBuilder: () => DictionaryBuilder; +}; +export default kuromoji; diff --git a/dist/kuromoji.js b/dist/kuromoji.js new file mode 100644 index 0000000..170d03d --- /dev/null +++ b/dist/kuromoji.js @@ -0,0 +1,30 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import TokenizerBuilder from "./TokenizerBuilder.js"; +import DictionaryBuilder from "./dict/builder/DictionaryBuilder.js"; +// Public methods +const kuromoji = { + builder: (option = {}) => { + return new TokenizerBuilder(option); + }, + dictionaryBuilder: () => { + return new DictionaryBuilder(); + }, +}; +export default kuromoji; +//# sourceMappingURL=kuromoji.js.map \ No newline at end of file diff --git a/dist/kuromoji.js.map b/dist/kuromoji.js.map new file mode 100644 index 0000000..13dd875 --- /dev/null +++ b/dist/kuromoji.js.map @@ -0,0 +1 @@ +{"version":3,"file":"kuromoji.js","sourceRoot":"","sources":["../src/kuromoji.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAIb,OAAO,gBAAgB,MAAM,uBAAuB,CAAC;AACrD,OAAO,iBAAiB,MAAM,qCAAqC,CAAC;AAEpE,iBAAiB;AACjB,MAAM,QAAQ,GAAG;IACf,OAAO,EAAE,CAAC,SAAiC,EAAE,EAAE,EAAE;QAC/C,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IACD,iBAAiB,EAAE,GAAG,EAAE;QACtB,OAAO,IAAI,iBAAiB,EAAE,CAAC;IACjC,CAAC;CACF,CAAC;AAEF,eAAe,QAAQ,CAAC"} \ No newline at end of file diff --git a/dist/loader/BrowserDictionaryLoader.d.ts b/dist/loader/BrowserDictionaryLoader.d.ts new file mode 100644 index 0000000..674bdfe --- /dev/null +++ b/dist/loader/BrowserDictionaryLoader.d.ts @@ -0,0 +1,23 @@ +import DictionaryLoader from "./DictionaryLoader.js"; +/** + * Callback + * @callback BrowserDictionaryLoader~onLoad + * @param {Object} err Error object + * @param {Uint8Array} buffer Loaded buffer + */ +export type BrowserDictionaryLoaderOnLoad = (err: string | ProgressEvent | null, buffer: ArrayBufferLike | null) => void; +declare class BrowserDictionaryLoader extends DictionaryLoader { + /** + * BrowserDictionaryLoader inherits DictionaryLoader, using jQuery XHR for download + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path: string); + /** + * Utility function to load gzipped dictionary + * @param {string} url Dictionary URL + * @param {BrowserDictionaryLoader~onLoad} callback Callback function + */ + loadArrayBuffer(url: string, callback: BrowserDictionaryLoaderOnLoad): Promise; +} +export default BrowserDictionaryLoader; diff --git a/dist/loader/BrowserDictionaryLoader.js b/dist/loader/BrowserDictionaryLoader.js new file mode 100644 index 0000000..ba668ab --- /dev/null +++ b/dist/loader/BrowserDictionaryLoader.js @@ -0,0 +1,67 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import { Inflate } from "pako"; +import DictionaryLoader from "./DictionaryLoader.js"; +class BrowserDictionaryLoader extends DictionaryLoader { + /** + * BrowserDictionaryLoader inherits DictionaryLoader, using jQuery XHR for download + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path) { + super(dic_path); + } + /** + * Utility function to load gzipped dictionary + * @param {string} url Dictionary URL + * @param {BrowserDictionaryLoader~onLoad} callback Callback function + */ + async loadArrayBuffer(url, callback) { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.open("GET", url, true); + xhr.responseType = "arraybuffer"; + xhr.onload = function () { + if (this.status > 0 && this.status !== 200) { + callback(xhr.statusText, null); + resolve(); + return; + } + const arraybuffer = new Uint8Array(this.response); + const inflate = new Inflate(); + inflate.push(arraybuffer, true); + if (inflate.err) { + reject(new Error(inflate.err.toString() + ": " + inflate.msg)); + } + const decompressed = inflate.result; + const typed_array = decompressed instanceof Uint8Array + ? decompressed + : new TextEncoder().encode(decompressed); + callback(null, typed_array.buffer); + resolve(); + }; + xhr.onerror = function (err) { + callback(err, null); + resolve(); + }; + xhr.send(); + }); + } +} +export default BrowserDictionaryLoader; +//# sourceMappingURL=BrowserDictionaryLoader.js.map \ No newline at end of file diff --git a/dist/loader/BrowserDictionaryLoader.js.map b/dist/loader/BrowserDictionaryLoader.js.map new file mode 100644 index 0000000..98becfc --- /dev/null +++ b/dist/loader/BrowserDictionaryLoader.js.map @@ -0,0 +1 @@ +{"version":3,"file":"BrowserDictionaryLoader.js","sourceRoot":"","sources":["../../src/loader/BrowserDictionaryLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,gBAAgB,MAAM,uBAAuB,CAAC;AAarD,MAAM,uBAAwB,SAAQ,gBAAgB;IACpD;;;;OAIG;IACH,YAAY,QAAgB;QAC1B,KAAK,CAAC,QAAQ,CAAC,CAAC;IAClB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,eAAe,CAAC,GAAW,EAAE,QAAuC;QACxE,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,MAAM,GAAG,GAAG,IAAI,cAAc,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;YAC3B,GAAG,CAAC,YAAY,GAAG,aAAa,CAAC;YACjC,GAAG,CAAC,MAAM,GAAG;gBACX,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;oBAC3C,QAAQ,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;oBAC/B,OAAO,EAAE,CAAC;oBACV,OAAO;gBACT,CAAC;gBACD,MAAM,WAAW,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,QAAuB,CAAC,CAAC;gBAEjE,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;gBAC9B,OAAO,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;gBAChC,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;gBACjE,CAAC;gBACD,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;gBACpC,MAAM,WAAW,GACf,YAAY,YAAY,UAAU;oBAChC,CAAC,CAAC,YAAY;oBACd,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;gBAC7C,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;gBACnC,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;YACF,GAAG,CAAC,OAAO,GAAG,UAAU,GAAG;gBACzB,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;gBACpB,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;YACF,GAAG,CAAC,IAAI,EAAE,CAAC;QACb,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,eAAe,uBAAuB,CAAC"} \ No newline at end of file diff --git a/dist/loader/DictionaryLoader.d.ts b/dist/loader/DictionaryLoader.d.ts new file mode 100644 index 0000000..5cf5d8e --- /dev/null +++ b/dist/loader/DictionaryLoader.d.ts @@ -0,0 +1,26 @@ +import DynamicDictionaries from "../dict/DynamicDictionaries.js"; +/** + * Callback + * @callback DictionaryLoader~onLoad + * @param {Object} err Error object + * @param {DynamicDictionaries} dic Loaded dictionary + */ +export type DictionaryLoaderOnLoad = (err: Object | null, dic: DynamicDictionaries) => void; +export type LoadArrayBufferCallback = (err: Object | null, buffer?: ArrayBufferLike | null) => void; +declare class DictionaryLoader { + dic: DynamicDictionaries; + dic_path: string; + /** + * DictionaryLoader base constructor + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path: string); + loadArrayBuffer(file: string, callback: LoadArrayBufferCallback): Promise; + /** + * Load dictionary files + * @param {DictionaryLoader~onLoad} load_callback Callback function called after loaded + */ + load(load_callback: DictionaryLoaderOnLoad): Promise; +} +export default DictionaryLoader; diff --git a/dist/loader/DictionaryLoader.js b/dist/loader/DictionaryLoader.js new file mode 100644 index 0000000..3bb642a --- /dev/null +++ b/dist/loader/DictionaryLoader.js @@ -0,0 +1,186 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// import { join } from "path"; +import { pathJoin } from "../util/PathJoin.js"; +import DynamicDictionaries from "../dict/DynamicDictionaries.js"; +import { isNotContainUndefined } from "../util/TypeGuard.js"; +class DictionaryLoader { + /** + * DictionaryLoader base constructor + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path) { + Object.defineProperty(this, "dic", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "dic_path", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.dic = new DynamicDictionaries(); + this.dic_path = dic_path; + } + // @ts-ignore + async loadArrayBuffer(file, callback) { + throw new Error("DictionaryLoader#loadArrayBuffer should be overwrite"); + } + /** + * Load dictionary files + * @param {DictionaryLoader~onLoad} load_callback Callback function called after loaded + */ + async load(load_callback) { + const dic = this.dic; + const dic_path = this.dic_path; + const loadArrayBuffer = this.loadArrayBuffer; + let prepared_callback_errs = []; + const prepareCallback = (err) => { + prepared_callback_errs.push(err); + }; + const trie = async () => { + const whenErr = (err, buffers) => { + if (err || buffers === undefined) { + prepareCallback(err); + return; + } + if (!isNotContainUndefined(buffers)) { + return prepareCallback(err); + } + const base_buffer = new Int32Array(buffers[0]); + const check_buffer = new Int32Array(buffers[1]); + dic.loadTrie(base_buffer, check_buffer); + prepareCallback(null); + }; + // const loadFunc: Promise[] = []; + const buffers = await Promise.all(["base.dat.tgz", "check.dat.tgz"].map(async (filename) => { + let result; + await loadArrayBuffer(pathJoin([dic_path, filename]), (err, buffer) => { + if (err || buffer === undefined || buffer == null) { + return whenErr(err); + } + result = buffer; + }); + return result; + })); + whenErr(null, buffers); + // await Promise.all(loadFunc); + }; + const takeDictionaryInfo = async () => { + // const buffers: ArrayBufferLike[] = []; + const whenErr = (err, buffers) => { + if (err || buffers === undefined || buffers === null) { + return prepareCallback(err); + } + if (!isNotContainUndefined(buffers)) { + return prepareCallback(err); + } + const token_info_buffer = new Uint8Array(buffers[0]); + const pos_buffer = new Uint8Array(buffers[1]); + const target_map_buffer = new Uint8Array(buffers[2]); + dic.loadTokenInfoDictionaries(token_info_buffer, pos_buffer, target_map_buffer); + prepareCallback(null); + }; + const buffers = await Promise.all(["tid.dat.tgz", "tid_pos.dat.tgz", "tid_map.dat.tgz"].map(async (filename) => { + let result; + await loadArrayBuffer(pathJoin([dic_path, filename]), (err, buffer) => { + if (err || buffer === undefined || buffer == null) { + return whenErr(err); + } + result = buffer; + }); + return result; + })); + whenErr(null, buffers); + }; + const connectionCostMatrix = async () => { + await loadArrayBuffer(pathJoin([dic_path, "cc.dat.tgz"]), (err, buffer) => { + if (err) { + return prepareCallback(err); + } + let cc_buffer; + if (buffer === null || buffer === undefined) { + cc_buffer = new Int16Array(0); + } + else { + cc_buffer = new Int16Array(buffer, 0, Math.floor(buffer.byteLength / 2)); + } + dic.loadConnectionCosts(cc_buffer); + prepareCallback(null); + }); + }; + const unknownDictionaries = async () => { + // const buffers: ArrayBufferLike[] = []; + const whenErr = (err, buffers) => { + if (err || !buffers) { + return prepareCallback(err); + } + if (!isNotContainUndefined(buffers)) { + return prepareCallback(err); + } + const unk_buffer = new Uint8Array(buffers[0]); + const unk_pos_buffer = new Uint8Array(buffers[1]); + const unk_map_buffer = new Uint8Array(buffers[2]); + const cat_map_buffer = new Uint8Array(buffers[3]); + const compat_cat_map_buffer = new Uint32Array(buffers[4]); + const invoke_def_buffer = new Uint8Array(buffers[5]); + dic.loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer); + // dic.loadUnknownDictionaries(char_buffer, unk_buffer); + prepareCallback(null); + }; + const buffers = await Promise.all([ + "unk.dat.tgz", + "unk_pos.dat.tgz", + "unk_map.dat.tgz", + "unk_char.dat.tgz", + "unk_compat.dat.tgz", + "unk_invoke.dat.tgz", + ].map(async (filename) => { + let result; + await loadArrayBuffer(pathJoin([dic_path, filename]), (err, buffer) => { + if (err || buffer === undefined || buffer === null) { + return whenErr(err); + } + result = buffer; + }); + return result; + })); + whenErr(null, buffers); + }; + await Promise.all([ + trie(), + takeDictionaryInfo(), + connectionCostMatrix(), + unknownDictionaries(), + ]).catch((error) => { + prepareCallback(error); + }); + if (prepared_callback_errs.length > 0) { + // 元々はエラーが複数発生しても、全てのエラー情報を返さずにどれか1つを返すような仕様だったらしい + // 以下の処理で正しいかはわからないけれど、とりあえず1個だけ返すようにする + let errIndex = prepared_callback_errs.findIndex((e) => e !== null); + errIndex = errIndex == -1 ? 0 : errIndex; + load_callback(prepared_callback_errs[errIndex], dic); + } + } +} +export default DictionaryLoader; +//# sourceMappingURL=DictionaryLoader.js.map \ No newline at end of file diff --git a/dist/loader/DictionaryLoader.js.map b/dist/loader/DictionaryLoader.js.map new file mode 100644 index 0000000..6624a3f --- /dev/null +++ b/dist/loader/DictionaryLoader.js.map @@ -0,0 +1 @@ +{"version":3,"file":"DictionaryLoader.js","sourceRoot":"","sources":["../../src/loader/DictionaryLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,+BAA+B;AAC/B,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAC/C,OAAO,mBAAmB,MAAM,gCAAgC,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAqB7D,MAAM,gBAAgB;IAIpB;;;;OAIG;IACH,YAAY,QAAgB;QAR5B;;;;;WAAyB;QACzB;;;;;WAAiB;QAQf,IAAI,CAAC,GAAG,GAAG,IAAI,mBAAmB,EAAE,CAAC;QACrC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED,aAAa;IACb,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,QAAiC;QACnE,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,IAAI,CAAC,aAAqC;QAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;QACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC/B,MAAM,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC;QAE7C,IAAI,sBAAsB,GAAsB,EAAE,CAAC;QAEnD,MAAM,eAAe,GAAG,CAAC,GAAkB,EAAQ,EAAE;YACnD,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,CAAC,CAAC;QAEF,MAAM,IAAI,GAAG,KAAK,IAAmB,EAAE;YACrC,MAAM,OAAO,GAAG,CACd,GAAkB,EAClB,OAAyC,EACzC,EAAE;gBACF,IAAI,GAAG,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBACjC,eAAe,CAAC,GAAG,CAAC,CAAC;oBACrB,OAAO;gBACT,CAAC;gBACD,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,CAAC;oBACpC,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,MAAM,WAAW,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/C,MAAM,YAAY,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEhD,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;gBACxC,eAAe,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC,CAAC;YAEF,wCAAwC;YACxC,MAAM,OAAO,GAAoC,MAAM,OAAO,CAAC,GAAG,CAChE,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;gBACvD,IAAI,MAAmC,CAAC;gBACxC,MAAM,eAAe,CACnB,QAAQ,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAC9B,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;oBACd,IAAI,GAAG,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC;wBAClD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC;oBACD,MAAM,GAAG,MAAM,CAAC;gBAClB,CAAC,CACF,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC,CAAC,CACH,CAAC;YACF,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACvB,+BAA+B;QACjC,CAAC,CAAC;QAEF,MAAM,kBAAkB,GAAG,KAAK,IAAmB,EAAE;YACnD,yCAAyC;YAEzC,MAAM,OAAO,GAAG,CACd,GAAkB,EAClB,OAAgD,EAChD,EAAE;gBACF,IAAI,GAAG,IAAI,OAAO,KAAK,SAAS,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,CAAC;oBACpC,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,MAAM,iBAAiB,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,MAAM,iBAAiB,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAErD,GAAG,CAAC,yBAAyB,CAC3B,iBAAiB,EACjB,UAAU,EACV,iBAAiB,CAClB,CAAC;gBACF,eAAe,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC,CAAC;YAEF,MAAM,OAAO,GAAoC,MAAM,OAAO,CAAC,GAAG,CAChE,CAAC,aAAa,EAAE,iBAAiB,EAAE,iBAAiB,CAAC,CAAC,GAAG,CACvD,KAAK,EAAE,QAAQ,EAAwC,EAAE;gBACvD,IAAI,MAAmC,CAAC;gBACxC,MAAM,eAAe,CACnB,QAAQ,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAC9B,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;oBACd,IAAI,GAAG,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC;wBAClD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC;oBACD,MAAM,GAAG,MAAM,CAAC;gBAClB,CAAC,CACF,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC,CACF,CACF,CAAC;YACF,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzB,CAAC,CAAC;QAEF,MAAM,oBAAoB,GAAG,KAAK,IAAmB,EAAE;YACrD,MAAM,eAAe,CACnB,QAAQ,CAAC,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC,EAClC,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;gBACd,IAAI,GAAG,EAAE,CAAC;oBACR,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,IAAI,SAAqB,CAAC;gBAC1B,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;oBAC5C,SAAS,GAAG,IAAI,UAAU,CAAC,CAAC,CAAC,CAAC;gBAChC,CAAC;qBAAM,CAAC;oBACN,SAAS,GAAG,IAAI,UAAU,CACxB,MAAM,EACN,CAAC,EACD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,CAAC,CAClC,CAAC;gBACJ,CAAC;gBACD,GAAG,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;gBACnC,eAAe,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC,CACF,CAAC;QACJ,CAAC,CAAC;QAEF,MAAM,mBAAmB,GAAG,KAAK,IAAmB,EAAE;YACpD,yCAAyC;YAEzC,MAAM,OAAO,GAAG,CACd,GAAkB,EAClB,OAAgD,EAChD,EAAE;gBACF,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;oBACpB,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,CAAC;oBACpC,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,MAAM,cAAc,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAClD,MAAM,cAAc,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAClD,MAAM,cAAc,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAClD,MAAM,qBAAqB,GAAG,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1D,MAAM,iBAAiB,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBAErD,GAAG,CAAC,uBAAuB,CACzB,UAAU,EACV,cAAc,EACd,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,iBAAiB,CAClB,CAAC;gBACF,wDAAwD;gBACxD,eAAe,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC,CAAC;YAEF,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B;gBACE,aAAa;gBACb,iBAAiB;gBACjB,iBAAiB;gBACjB,kBAAkB;gBAClB,oBAAoB;gBACpB,oBAAoB;aACrB,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;gBACvB,IAAI,MAAmC,CAAC;gBACxC,MAAM,eAAe,CACnB,QAAQ,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAC9B,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;oBACd,IAAI,GAAG,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;wBACnD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC;oBACD,MAAM,GAAG,MAAM,CAAC;gBAClB,CAAC,CACF,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC,CAAC,CACH,CAAC;YACF,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzB,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC;YAChB,IAAI,EAAE;YACN,kBAAkB,EAAE;YACpB,oBAAoB,EAAE;YACtB,mBAAmB,EAAE;SACtB,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACjB,eAAe,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC,CAAC,CAAC;QAEH,IAAI,sBAAsB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,kDAAkD;YAClD,uCAAuC;YACvC,IAAI,QAAQ,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;YACnE,QAAQ,GAAG,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;YACzC,aAAa,CAAC,sBAAsB,CAAC,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;CACF;AAED,eAAe,gBAAgB,CAAC"} \ No newline at end of file diff --git a/dist/loader/NodeDictionaryLoader.d.ts b/dist/loader/NodeDictionaryLoader.d.ts new file mode 100644 index 0000000..12eab45 --- /dev/null +++ b/dist/loader/NodeDictionaryLoader.d.ts @@ -0,0 +1,22 @@ +import DictionaryLoader from "./DictionaryLoader.js"; +/** + * @callback NodeDictionaryLoader~onLoad + * @param {Object} err Error object + * @param {Uint8Array} buffer Loaded buffer + */ +export type NodeDictionaryLoaderOnLoad = (err: Error | null, buffer?: ArrayBufferLike | null) => void; +declare class NodeDictionaryLoader extends DictionaryLoader { + /** + * NodeDictionaryLoader inherits DictionaryLoader + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path: string); + /** + * Utility function + * @param {string} file Dictionary file path + * @param {NodeDictionaryLoader~onLoad} callback Callback function + */ + loadArrayBuffer(file: string, callback: NodeDictionaryLoaderOnLoad): Promise; +} +export default NodeDictionaryLoader; diff --git a/dist/loader/NodeDictionaryLoader.js b/dist/loader/NodeDictionaryLoader.js new file mode 100644 index 0000000..b2969e4 --- /dev/null +++ b/dist/loader/NodeDictionaryLoader.js @@ -0,0 +1,62 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import fs from "node:fs"; +import { Inflate } from "pako"; +import DictionaryLoader from "./DictionaryLoader.js"; +class NodeDictionaryLoader extends DictionaryLoader { + /** + * NodeDictionaryLoader inherits DictionaryLoader + * @param {string} dic_path Dictionary path + * @constructor + */ + constructor(dic_path) { + super(dic_path); + } + /** + * Utility function + * @param {string} file Dictionary file path + * @param {NodeDictionaryLoader~onLoad} callback Callback function + */ + async loadArrayBuffer(file, callback) { + return new Promise((resolve) => { + // ここでfile(ファイルパス)からファイルを読み込んでいる + fs.readFile(file, (err, buffer) => { + if (err) { + callback(err); + resolve(); + return; + } + const inflate = new Inflate(); + inflate.push(buffer, true); + if (inflate.err) { + callback(new Error(inflate.err.toString())); + resolve(); + return; + } + const decompressed = inflate.result; + const typed_array = decompressed instanceof Uint8Array + ? decompressed + : new TextEncoder().encode(decompressed); + callback(null, typed_array.buffer); + resolve(); + }); + }); + } +} +export default NodeDictionaryLoader; +//# sourceMappingURL=NodeDictionaryLoader.js.map \ No newline at end of file diff --git a/dist/loader/NodeDictionaryLoader.js.map b/dist/loader/NodeDictionaryLoader.js.map new file mode 100644 index 0000000..bee6307 --- /dev/null +++ b/dist/loader/NodeDictionaryLoader.js.map @@ -0,0 +1 @@ +{"version":3,"file":"NodeDictionaryLoader.js","sourceRoot":"","sources":["../../src/loader/NodeDictionaryLoader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,gBAAgB,MAAM,uBAAuB,CAAC;AAYrD,MAAM,oBAAqB,SAAQ,gBAAgB;IACjD;;;;OAIG;IACH,YAAY,QAAgB;QAC1B,KAAK,CAAC,QAAQ,CAAC,CAAC;IAClB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,QAAoC;QACtE,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YACnC,gCAAgC;YAChC,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;gBAChC,IAAI,GAAG,EAAE,CAAC;oBACR,QAAQ,CAAC,GAAG,CAAC,CAAC;oBACd,OAAO,EAAE,CAAC;oBACV,OAAO;gBACT,CAAC;gBAED,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;gBAC9B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBAC3B,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;oBAChB,QAAQ,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;oBAC5C,OAAO,EAAE,CAAC;oBACV,OAAO;gBACT,CAAC;gBACD,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;gBACpC,MAAM,WAAW,GACf,YAAY,YAAY,UAAU;oBAChC,CAAC,CAAC,YAAY;oBACd,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;gBAC7C,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;gBACnC,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,eAAe,oBAAoB,CAAC"} \ No newline at end of file diff --git a/dist/util/ByteBuffer.d.ts b/dist/util/ByteBuffer.d.ts new file mode 100644 index 0000000..5349529 --- /dev/null +++ b/dist/util/ByteBuffer.d.ts @@ -0,0 +1,23 @@ +declare class ByteBuffer { + buffer: Uint8Array; + position: number; + /** + * Utilities to manipulate byte sequence + * @param {(number|Uint8Array)} arg Initial size of this buffer (number), or buffer to set (Uint8Array) + * @constructor + */ + constructor(arg?: number | Uint8Array); + size(): number; + reallocate(): void; + shrink(): Uint8Array; + put(b: number): void; + get(index?: number): number; + putShort(num: number): void; + getShort(index?: number): number; + putInt(num: number): void; + getInt(index?: number): number; + readInt(): number; + putString(str: string): void; + getString(index?: number): string; +} +export default ByteBuffer; diff --git a/dist/util/ByteBuffer.js b/dist/util/ByteBuffer.js new file mode 100644 index 0000000..f0c855a --- /dev/null +++ b/dist/util/ByteBuffer.js @@ -0,0 +1,287 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Convert String (UTF-16) to UTF-8 ArrayBuffer + * + * @param {String} str UTF-16 string to convert + * @return {Uint8Array} Byte sequence encoded by UTF-8 + */ +const stringToUtf8Bytes = (str) => { + // Max size of 1 character is 4 bytes + const bytes = new Uint8Array(str.length * 4); + let i = 0, j = 0; + while (i < str.length) { + let unicode_code; + const utf16_code = str.charCodeAt(i++); + if (utf16_code >= 0xd800 && utf16_code <= 0xdbff) { + // surrogate pair + const upper = utf16_code; // high surrogate + const lower = str.charCodeAt(i++); // low surrogate + if (lower >= 0xdc00 && lower <= 0xdfff) { + unicode_code = + (upper - 0xd800) * (1 << 10) + (1 << 16) + (lower - 0xdc00); + } + else { + // malformed surrogate pair + return null; + } + } + else { + // not surrogate code + unicode_code = utf16_code; + } + if (unicode_code < 0x80) { + // 1-byte + bytes[j++] = unicode_code; + } + else if (unicode_code < 1 << 11) { + // 2-byte + bytes[j++] = (unicode_code >>> 6) | 0xc0; + bytes[j++] = (unicode_code & 0x3f) | 0x80; + } + else if (unicode_code < 1 << 16) { + // 3-byte + bytes[j++] = (unicode_code >>> 12) | 0xe0; + bytes[j++] = ((unicode_code >> 6) & 0x3f) | 0x80; + bytes[j++] = (unicode_code & 0x3f) | 0x80; + } + else if (unicode_code < 1 << 21) { + // 4-byte + bytes[j++] = (unicode_code >>> 18) | 0xf0; + bytes[j++] = ((unicode_code >> 12) & 0x3f) | 0x80; + bytes[j++] = ((unicode_code >> 6) & 0x3f) | 0x80; + bytes[j++] = (unicode_code & 0x3f) | 0x80; + } + else { + // malformed UCS4 code + } + } + return bytes.subarray(0, j); +}; +/** + * Convert UTF-8 ArrayBuffer to String (UTF-16) + * + * @param {Array} bytes UTF-8 byte sequence to convert + * @return {String} String encoded by UTF-16 + */ +const utf8BytesToString = (bytes) => { + let str = ""; + let code, b1, b2, b3, b4, upper, lower; + let i = 0; + while (i < bytes.length) { + b1 = bytes[i++]; + if (b1 < 0x80) { + // 1 byte + code = b1; + } + else if (b1 >> 5 === 0x06) { + // 2 bytes + b2 = bytes[i++]; + code = ((b1 & 0x1f) << 6) | (b2 & 0x3f); + } + else if (b1 >> 4 === 0x0e) { + // 3 bytes + b2 = bytes[i++]; + b3 = bytes[i++]; + code = ((b1 & 0x0f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3f); + } + else { + // 4 bytes + b2 = bytes[i++]; + b3 = bytes[i++]; + b4 = bytes[i++]; + code = + ((b1 & 0x07) << 18) | + ((b2 & 0x3f) << 12) | + ((b3 & 0x3f) << 6) | + (b4 & 0x3f); + } + if (code < 0x10000) { + str += String.fromCharCode(code); + } + else { + // surrogate pair + code -= 0x10000; + upper = 0xd800 | (code >> 10); + lower = 0xdc00 | (code & 0x3ff); + str += String.fromCharCode(upper, lower); + } + } + return str; +}; +class ByteBuffer { + /** + * Utilities to manipulate byte sequence + * @param {(number|Uint8Array)} arg Initial size of this buffer (number), or buffer to set (Uint8Array) + * @constructor + */ + constructor(arg) { + Object.defineProperty(this, "buffer", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "position", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + let initial_size; + if (arg == null) { + initial_size = 1024 * 1024; + } + else if (typeof arg === "number") { + initial_size = arg; + } + else if (arg instanceof Uint8Array) { + this.buffer = arg; + this.position = 0; // Overwrite + return; + } + else { + // typeof arg -> String + throw (typeof arg + " is invalid parameter type for ByteBuffer constructor"); + } + // arg is null or number + this.buffer = new Uint8Array(initial_size); + this.position = 0; + } + size() { + return this.buffer.length; + } + reallocate() { + const new_array = new Uint8Array(this.buffer.length * 2); + new_array.set(this.buffer); + this.buffer = new_array; + } + shrink() { + this.buffer = this.buffer.subarray(0, this.position); + return this.buffer; + } + put(b) { + if (this.buffer.length < this.position + 1) { + this.reallocate(); + } + this.buffer[this.position++] = b; + } + get(index) { + if (index == null) { + index = this.position; + this.position += 1; + } + if (this.buffer.length < index + 1) { + return 0; + } + return this.buffer[index]; + } + // Write short to buffer by little endian + putShort(num) { + if (0xffff < num) { + throw num + " is over short value"; + } + const lower = 0x00ff & num; + const upper = (0xff00 & num) >> 8; + this.put(lower); + this.put(upper); + } + // Read short from buffer by little endian + getShort(index) { + if (index == null) { + index = this.position; + this.position += 2; + } + if (this.buffer.length < index + 2) { + return 0; + } + const lower = this.buffer[index]; + const upper = this.buffer[index + 1]; + let value = (upper << 8) + lower; + if (value & 0x8000) { + value = -((value - 1) ^ 0xffff); + } + return value; + } + // Write integer to buffer by little endian + putInt(num) { + if (0xffffffff < num) { + throw num + " is over integer value"; + } + const b0 = 0x000000ff & num; + const b1 = (0x0000ff00 & num) >> 8; + const b2 = (0x00ff0000 & num) >> 16; + const b3 = (0xff000000 & num) >> 24; + this.put(b0); + this.put(b1); + this.put(b2); + this.put(b3); + } + // Read integer from buffer by little endian + getInt(index) { + if (index == null) { + index = this.position; + this.position += 4; + } + if (this.buffer.length < index + 4) { + return 0; + } + const b0 = this.buffer[index]; + const b1 = this.buffer[index + 1]; + const b2 = this.buffer[index + 2]; + const b3 = this.buffer[index + 3]; + return (b3 << 24) + (b2 << 16) + (b1 << 8) + b0; + } + readInt() { + const pos = this.position; + this.position += 4; + return this.getInt(pos); + } + putString(str) { + const bytes = stringToUtf8Bytes(str); + if (bytes === null) + return; + for (let i = 0; i < bytes.length; i++) { + this.put(bytes[i]); + } + // put null character as terminal character + this.put(0); + } + getString(index) { + const buf = []; + let ch; + if (index == null) { + index = this.position; + } + while (true) { + if (this.buffer.length < index + 1) { + break; + } + ch = this.get(index++); + if (ch === 0) { + break; + } + else { + buf.push(ch); + } + } + this.position = index; + return utf8BytesToString(buf); + } +} +export default ByteBuffer; +//# sourceMappingURL=ByteBuffer.js.map \ No newline at end of file diff --git a/dist/util/ByteBuffer.js.map b/dist/util/ByteBuffer.js.map new file mode 100644 index 0000000..e376fcd --- /dev/null +++ b/dist/util/ByteBuffer.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ByteBuffer.js","sourceRoot":"","sources":["../../src/util/ByteBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH;;;;;GAKG;AACH,MAAM,iBAAiB,GAAG,CAAC,GAAW,EAAqB,EAAE;IAC3D,qCAAqC;IACrC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE7C,IAAI,CAAC,GAAG,CAAC,EACP,CAAC,GAAG,CAAC,CAAC;IAER,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,IAAI,YAAoB,CAAC;QAEzB,MAAM,UAAU,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC;QACvC,IAAI,UAAU,IAAI,MAAM,IAAI,UAAU,IAAI,MAAM,EAAE,CAAC;YACjD,iBAAiB;YACjB,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,iBAAiB;YAC3C,MAAM,KAAK,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB;YAEnD,IAAI,KAAK,IAAI,MAAM,IAAI,KAAK,IAAI,MAAM,EAAE,CAAC;gBACvC,YAAY;oBACV,CAAC,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC;YAChE,CAAC;iBAAM,CAAC;gBACN,2BAA2B;gBAC3B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;aAAM,CAAC;YACN,qBAAqB;YACrB,YAAY,GAAG,UAAU,CAAC;QAC5B,CAAC;QAED,IAAI,YAAY,GAAG,IAAI,EAAE,CAAC;YACxB,SAAS;YACT,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC;QAC5B,CAAC;aAAM,IAAI,YAAY,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,SAAS;YACT,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,GAAG,IAAI,CAAC;YACzC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAC5C,CAAC;aAAM,IAAI,YAAY,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,SAAS;YACT,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC;YAC1C,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YACjD,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAC5C,CAAC;aAAM,IAAI,YAAY,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,SAAS;YACT,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC;YAC1C,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,YAAY,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YAClD,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YACjD,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;QAC5C,CAAC;aAAM,CAAC;YACN,sBAAsB;QACxB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAC9B,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,iBAAiB,GAAG,CAAC,KAAY,EAAU,EAAE;IACjD,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC;IACvC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACxB,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QAEhB,IAAI,EAAE,GAAG,IAAI,EAAE,CAAC;YACd,SAAS;YACT,IAAI,GAAG,EAAE,CAAC;QACZ,CAAC;aAAM,IAAI,EAAE,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5B,UAAU;YACV,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;QAC1C,CAAC;aAAM,IAAI,EAAE,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5B,UAAU;YACV,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;QAChE,CAAC;aAAM,CAAC;YACN,UAAU;YACV,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,EAAE,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChB,IAAI;gBACF,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;oBACnB,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;oBACnB,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;oBAClB,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,IAAI,IAAI,GAAG,OAAO,EAAE,CAAC;YACnB,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC;aAAM,CAAC;YACN,iBAAiB;YACjB,IAAI,IAAI,OAAO,CAAC;YAChB,KAAK,GAAG,MAAM,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,MAAM,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;YAChC,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEF,MAAM,UAAU;IAId;;;;OAIG;IACH,YAAY,GAAyB;QARrC;;;;;WAAmB;QACnB;;;;;WAAiB;QAQf,IAAI,YAAY,CAAC;QACjB,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;YAChB,YAAY,GAAG,IAAI,GAAG,IAAI,CAAC;QAC7B,CAAC;aAAM,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;YACnC,YAAY,GAAG,GAAG,CAAC;QACrB,CAAC;aAAM,IAAI,GAAG,YAAY,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;YAClB,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,YAAY;YAC/B,OAAO;QACT,CAAC;aAAM,CAAC;YACN,uBAAuB;YACvB,MAAM,CACJ,OAAO,GAAG,GAAG,uDAAuD,CACrE,CAAC;QACJ,CAAC;QACD,wBAAwB;QACxB,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,CAAC;QAC3C,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;IACpB,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED,UAAU;QACR,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzD,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;IAC1B,CAAC;IAED,MAAM;QACJ,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,GAAG,CAAC,CAAS;QACX,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC;YAC3C,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,CAAC;IACnC,CAAC;IAED,GAAG,CAAC,KAAc;QAChB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;YAClB,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC;YACtB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,CAAC;QACX,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED,yCAAyC;IACzC,QAAQ,CAAC,GAAW;QAClB,IAAI,MAAM,GAAG,GAAG,EAAE,CAAC;YACjB,MAAM,GAAG,GAAG,sBAAsB,CAAC;QACrC,CAAC;QACD,MAAM,KAAK,GAAG,MAAM,GAAG,GAAG,CAAC;QAC3B,MAAM,KAAK,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAClB,CAAC;IAED,0CAA0C;IAC1C,QAAQ,CAAC,KAAc;QACrB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;YAClB,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC;YACtB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,CAAC;QACX,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACrC,IAAI,KAAK,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC;QACjC,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;YACnB,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAClC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,2CAA2C;IAC3C,MAAM,CAAC,GAAW;QAChB,IAAI,UAAU,GAAG,GAAG,EAAE,CAAC;YACrB,MAAM,GAAG,GAAG,wBAAwB,CAAC;QACvC,CAAC;QACD,MAAM,EAAE,GAAG,UAAU,GAAG,GAAG,CAAC;QAC5B,MAAM,EAAE,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,EAAE,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACpC,MAAM,EAAE,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACpC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACf,CAAC;IAED,4CAA4C;IAC5C,MAAM,CAAC,KAAc;QACnB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;YAClB,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC;YACtB,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QACrB,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,CAAC;QACX,CAAC;QACD,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC9B,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAElC,OAAO,CAAC,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;IAClD,CAAC;IAED,OAAO;QACL,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC1B,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QACnB,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,SAAS,CAAC,GAAW;QACnB,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,KAAK,KAAK,IAAI;YAAE,OAAO;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,2CAA2C;QAC3C,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACd,CAAC;IAED,SAAS,CAAC,KAAc;QACtB,MAAM,GAAG,GAAG,EAAE,CAAC;QACf,IAAI,EAAE,CAAC;QACP,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;YAClB,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC;QACxB,CAAC;QACD,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;gBACnC,MAAM;YACR,CAAC;YACD,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;YACvB,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;gBACb,MAAM;YACR,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,CAAC;QACH,CAAC;QACD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QACtB,OAAO,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;CACF;AAED,eAAe,UAAU,CAAC"} \ No newline at end of file diff --git a/dist/util/IpadicFormatter.d.ts b/dist/util/IpadicFormatter.d.ts new file mode 100644 index 0000000..e3b730b --- /dev/null +++ b/dist/util/IpadicFormatter.d.ts @@ -0,0 +1,26 @@ +import { ViterbiNodeType } from "../viterbi/ViterbiNode.js"; +export interface IpadicFormatterToken { + word_id: number; + word_type: ViterbiNodeType; + word_position: number; + surface_form: string | Uint8Array; + pos: string; + pos_detail_1: string; + pos_detail_2: string; + pos_detail_3: string; + conjugated_type: string; + conjugated_form: string; + basic_form: string; + reading?: string; + pronunciation?: string; +} +declare class IpadicFormatter { + /** + * Mappings between IPADIC dictionary features and tokenized results + * @constructor + */ + constructor(); + formatEntry(word_id: number, position: number, type: ViterbiNodeType, features: string[]): IpadicFormatterToken; + formatUnknownEntry(word_id: number, position: number, type: ViterbiNodeType, features: string[], surface_form: string | Uint8Array): IpadicFormatterToken; +} +export default IpadicFormatter; diff --git a/dist/util/IpadicFormatter.js b/dist/util/IpadicFormatter.js new file mode 100644 index 0000000..60aadb5 --- /dev/null +++ b/dist/util/IpadicFormatter.js @@ -0,0 +1,62 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +class IpadicFormatter { + /** + * Mappings between IPADIC dictionary features and tokenized results + * @constructor + */ + constructor() { } + formatEntry(word_id, position, type, features) { + let token = { + word_id, + word_type: type, + word_position: position, + surface_form: features[0], + pos: features[1], + pos_detail_1: features[2], + pos_detail_2: features[3], + pos_detail_3: features[4], + conjugated_type: features[5], + conjugated_form: features[6], + basic_form: features[7], + reading: features[8], + pronunciation: features[9], + }; + return token; + } + formatUnknownEntry(word_id, position, type, features, surface_form) { + let token = { + word_id, + word_type: type, + word_position: position, + surface_form: surface_form, + pos: features[1], + pos_detail_1: features[2], + pos_detail_2: features[3], + pos_detail_3: features[4], + conjugated_type: features[5], + conjugated_form: features[6], + basic_form: features[7], + // reading: features[8], + // pronunciation: features[9], + }; + return token; + } +} +export default IpadicFormatter; +//# sourceMappingURL=IpadicFormatter.js.map \ No newline at end of file diff --git a/dist/util/IpadicFormatter.js.map b/dist/util/IpadicFormatter.js.map new file mode 100644 index 0000000..e074ff3 --- /dev/null +++ b/dist/util/IpadicFormatter.js.map @@ -0,0 +1 @@ +{"version":3,"file":"IpadicFormatter.js","sourceRoot":"","sources":["../../src/util/IpadicFormatter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAoBb,MAAM,eAAe;IACnB;;;OAGG;IACH,gBAAe,CAAC;IAEhB,WAAW,CACT,OAAe,EACf,QAAgB,EAChB,IAAqB,EACrB,QAAkB;QAElB,IAAI,KAAK,GAAyB;YAChC,OAAO;YACP,SAAS,EAAE,IAAI;YACf,aAAa,EAAE,QAAQ;YAEvB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC;YAChB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,eAAe,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC5B,eAAe,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC5B,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;YACvB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;YACpB,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;SAC3B,CAAC;QAEF,OAAO,KAAK,CAAC;IACf,CAAC;IAED,kBAAkB,CAChB,OAAe,EACf,QAAgB,EAChB,IAAqB,EACrB,QAAkB,EAClB,YAAiC;QAEjC,IAAI,KAAK,GAAyB;YAChC,OAAO;YACP,SAAS,EAAE,IAAI;YACf,aAAa,EAAE,QAAQ;YAEvB,YAAY,EAAE,YAAY;YAC1B,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC;YAChB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,YAAY,EAAE,QAAQ,CAAC,CAAC,CAAC;YACzB,eAAe,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC5B,eAAe,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC5B,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;YACvB,wBAAwB;YACxB,8BAA8B;SAC/B,CAAC;QACF,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAED,eAAe,eAAe,CAAC"} \ No newline at end of file diff --git a/dist/util/PathJoin.d.ts b/dist/util/PathJoin.d.ts new file mode 100644 index 0000000..b68347f --- /dev/null +++ b/dist/util/PathJoin.d.ts @@ -0,0 +1 @@ +export declare const pathJoin: (pathes: string[]) => string; diff --git a/dist/util/PathJoin.js b/dist/util/PathJoin.js new file mode 100644 index 0000000..43aade4 --- /dev/null +++ b/dist/util/PathJoin.js @@ -0,0 +1,16 @@ +export const pathJoin = (pathes) => { + let result = ""; + for (const path of pathes) { + if (path.startsWith("/") && result.endsWith("/")) { + result += path.slice(1); + } + else if (path.startsWith("/") || result.endsWith("/") || result == "") { + result += path; + } + else { + result += "/" + path; + } + } + return result; +}; +//# sourceMappingURL=PathJoin.js.map \ No newline at end of file diff --git a/dist/util/PathJoin.js.map b/dist/util/PathJoin.js.map new file mode 100644 index 0000000..c162fd1 --- /dev/null +++ b/dist/util/PathJoin.js.map @@ -0,0 +1 @@ +{"version":3,"file":"PathJoin.js","sourceRoot":"","sources":["../../src/util/PathJoin.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,MAAgB,EAAE,EAAE;IAC3C,IAAI,MAAM,GAAW,EAAE,CAAC;IACxB,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1B,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;YACxE,MAAM,IAAI,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC"} \ No newline at end of file diff --git a/dist/util/SurrogateAwareString.d.ts b/dist/util/SurrogateAwareString.d.ts new file mode 100644 index 0000000..467e661 --- /dev/null +++ b/dist/util/SurrogateAwareString.d.ts @@ -0,0 +1,17 @@ +declare class SurrogateAwareString { + str: string; + index_mapping: number[]; + length: number; + /** + * String wrapper for UTF-16 surrogate pair (4 bytes) + * @param {string} str String to wrap + * @constructor + */ + constructor(str: string); + slice(index: number): string; + charAt(index: number): string; + charCodeAt(index: number): number; + toString(): string; + static isSurrogatePair(ch: string): boolean; +} +export default SurrogateAwareString; diff --git a/dist/util/SurrogateAwareString.js b/dist/util/SurrogateAwareString.js new file mode 100644 index 0000000..2e78a54 --- /dev/null +++ b/dist/util/SurrogateAwareString.js @@ -0,0 +1,104 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +class SurrogateAwareString { + /** + * String wrapper for UTF-16 surrogate pair (4 bytes) + * @param {string} str String to wrap + * @constructor + */ + constructor(str) { + Object.defineProperty(this, "str", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "index_mapping", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "length", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.str = str; + this.index_mapping = []; + for (var pos = 0; pos < str.length; pos++) { + var ch = str.charAt(pos); + this.index_mapping.push(pos); + if (SurrogateAwareString.isSurrogatePair(ch)) { + pos++; + } + } + // Surrogate aware length + this.length = this.index_mapping.length; + } + slice(index) { + if (this.index_mapping.length <= index) { + return ""; + } + var surrogate_aware_index = this.index_mapping[index]; + return this.str.slice(surrogate_aware_index); + } + charAt(index) { + if (this.str.length <= index) { + return ""; + } + var surrogate_aware_start_index = this.index_mapping[index]; + var surrogate_aware_end_index = this.index_mapping[index + 1]; + if (surrogate_aware_end_index == null) { + return this.str.slice(surrogate_aware_start_index); + } + return this.str.slice(surrogate_aware_start_index, surrogate_aware_end_index); + } + charCodeAt(index) { + if (this.index_mapping.length <= index) { + return NaN; + } + var surrogate_aware_index = this.index_mapping[index]; + var upper = this.str.charCodeAt(surrogate_aware_index); + var lower; + if (upper >= 0xD800 && upper <= 0xDBFF && + surrogate_aware_index < this.str.length) { + lower = this.str.charCodeAt(surrogate_aware_index + 1); + if (lower >= 0xDC00 && lower <= 0xDFFF) { + return (upper - 0xD800) * 0x400 + lower - 0xDC00 + 0x10000; + } + } + return upper; + } + toString() { + return this.str; + } + static isSurrogatePair(ch) { + var utf16_code = ch.charCodeAt(0); + if (utf16_code >= 0xD800 && utf16_code <= 0xDBFF) { + // surrogate pair + return true; + } + else { + return false; + } + } +} +export default SurrogateAwareString; +//# sourceMappingURL=SurrogateAwareString.js.map \ No newline at end of file diff --git a/dist/util/SurrogateAwareString.js.map b/dist/util/SurrogateAwareString.js.map new file mode 100644 index 0000000..58e8000 --- /dev/null +++ b/dist/util/SurrogateAwareString.js.map @@ -0,0 +1 @@ +{"version":3,"file":"SurrogateAwareString.js","sourceRoot":"","sources":["../../src/util/SurrogateAwareString.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,MAAM,oBAAoB;IAKxB;;;;OAIG;IACH,YAAY,GAAW;QATvB;;;;;WAAY;QACZ;;;;;WAAwB;QACxB;;;;;WAAe;QAQb,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;QACf,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC;QAExB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAC1C,IAAI,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACzB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7B,IAAI,oBAAoB,CAAC,eAAe,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7C,GAAG,EAAE,CAAC;YACR,CAAC;QACH,CAAC;QACD,yBAAyB;QACzB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;IAC1C,CAAC;IAED,KAAK,CAAC,KAAa;QACjB,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YACvC,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,qBAAqB,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACtD,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,CAAC,KAAa;QAClB,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,2BAA2B,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC5D,IAAI,yBAAyB,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAE9D,IAAI,yBAAyB,IAAI,IAAI,EAAE,CAAC;YACtC,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CACnB,2BAA2B,EAC3B,yBAAyB,CAC1B,CAAC;IACJ,CAAC;IAED,UAAU,CAAC,KAAa;QACtB,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YACvC,OAAO,GAAG,CAAC;QACb,CAAC;QACD,IAAI,qBAAqB,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACtD,IAAI,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,qBAAqB,CAAC,CAAC;QACvD,IAAI,KAAK,CAAC;QACV,IACE,KAAK,IAAI,MAAM,IAAI,KAAK,IAAI,MAAM;YAClC,qBAAqB,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EACvC,CAAC;YACD,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,qBAAqB,GAAG,CAAC,CAAC,CAAC;YACvD,IAAI,KAAK,IAAI,MAAM,IAAI,KAAK,IAAI,MAAM,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,OAAO,CAAC;YAC7D,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,GAAG,CAAC;IAClB,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,EAAU;QAC/B,IAAI,UAAU,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,UAAU,IAAI,MAAM,IAAI,UAAU,IAAI,MAAM,EAAE,CAAC;YACjD,iBAAiB;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;aAAM,CAAC;YACN,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;CACF;AAED,eAAe,oBAAoB,CAAC"} \ No newline at end of file diff --git a/dist/util/TypeGuard.d.ts b/dist/util/TypeGuard.d.ts new file mode 100644 index 0000000..6eb0756 --- /dev/null +++ b/dist/util/TypeGuard.d.ts @@ -0,0 +1 @@ +export declare const isNotContainUndefined: (ary: (T | undefined)[]) => ary is T[]; diff --git a/dist/util/TypeGuard.js b/dist/util/TypeGuard.js new file mode 100644 index 0000000..4acf69a --- /dev/null +++ b/dist/util/TypeGuard.js @@ -0,0 +1,8 @@ +export const isNotContainUndefined = (ary) => { + for (const a of ary) { + if (a === undefined) + return false; + } + return true; +}; +//# sourceMappingURL=TypeGuard.js.map \ No newline at end of file diff --git a/dist/util/TypeGuard.js.map b/dist/util/TypeGuard.js.map new file mode 100644 index 0000000..905ea3c --- /dev/null +++ b/dist/util/TypeGuard.js.map @@ -0,0 +1 @@ +{"version":3,"file":"TypeGuard.js","sourceRoot":"","sources":["../../src/util/TypeGuard.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,qBAAqB,GAAG,CACnC,GAAsB,EACV,EAAE;IACd,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,KAAK,SAAS;YAAE,OAAO,KAAK,CAAC;IACpC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC,CAAC"} \ No newline at end of file diff --git a/dist/viterbi/ViterbiBuilder.d.ts b/dist/viterbi/ViterbiBuilder.d.ts new file mode 100644 index 0000000..c15bf43 --- /dev/null +++ b/dist/viterbi/ViterbiBuilder.d.ts @@ -0,0 +1,23 @@ +import ViterbiLattice from "./ViterbiLattice.js"; +import DynamicDictionaries from "../dict/DynamicDictionaries.js"; +import DoubleArray from "doublearray.ts/dist/doubleArrayClass.js"; +import TokenInfoDictionary from "../dict/TokenInfoDictionary.js"; +import UnknownDictionary from "../dict/UnknownDictionary.js"; +declare class ViterbiBuilder { + trie: DoubleArray; + token_info_dictionary: TokenInfoDictionary; + unknown_dictionary: UnknownDictionary; + /** + * ViterbiBuilder builds word lattice (ViterbiLattice) + * @param {DynamicDictionaries} dic dictionary + * @constructor + */ + constructor(dic: DynamicDictionaries); + /** + * Build word lattice + * @param {string} sentence_str Input text + * @returns {ViterbiLattice} Word lattice + */ + build(sentence_str: string): ViterbiLattice; +} +export default ViterbiBuilder; diff --git a/dist/viterbi/ViterbiBuilder.js b/dist/viterbi/ViterbiBuilder.js new file mode 100644 index 0000000..928ffb1 --- /dev/null +++ b/dist/viterbi/ViterbiBuilder.js @@ -0,0 +1,121 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import ViterbiNode from "./ViterbiNode.js"; +import ViterbiLattice from "./ViterbiLattice.js"; +import SurrogateAwareString from "../util/SurrogateAwareString.js"; +class ViterbiBuilder { + /** + * ViterbiBuilder builds word lattice (ViterbiLattice) + * @param {DynamicDictionaries} dic dictionary + * @constructor + */ + constructor(dic) { + Object.defineProperty(this, "trie", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "token_info_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "unknown_dictionary", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.trie = dic.trie; + this.token_info_dictionary = dic.token_info_dictionary; + this.unknown_dictionary = dic.unknown_dictionary; + } + /** + * Build word lattice + * @param {string} sentence_str Input text + * @returns {ViterbiLattice} Word lattice + */ + build(sentence_str) { + const lattice = new ViterbiLattice(); + const sentence = new SurrogateAwareString(sentence_str); + let trie_id, left_id, right_id, word_cost; + for (let pos = 0; pos < sentence.length; pos++) { + const tail = sentence.slice(pos); + const vocabulary = this.trie.commonPrefixSearch(tail); + for (let n = 0; n < vocabulary.length; n++) { + // Words in dictionary do not have surrogate pair (only UCS2 set) + trie_id = vocabulary[n].v; + let key = vocabulary[n].k; + if (key === null || key === undefined) + continue; + if (trie_id == null) + continue; + const token_info_ids = this.token_info_dictionary.target_map[trie_id]; + for (let i = 0; i < token_info_ids.length; i++) { + // FIXME parseInt要らない説 + const token_info_id = parseInt(token_info_ids[i].toString()); + left_id = + this.token_info_dictionary.dictionary.getShort(token_info_id); + right_id = this.token_info_dictionary.dictionary.getShort(token_info_id + 2); + word_cost = this.token_info_dictionary.dictionary.getShort(token_info_id + 4); + // node_name, cost, start_index, length, type, left_id, right_id, surface_form + lattice.append(new ViterbiNode(token_info_id, word_cost, pos + 1, key.length, "KNOWN", left_id, right_id, key)); + } + } + // Unknown word processing + const surrogate_aware_tail = new SurrogateAwareString(tail); + const head_char = new SurrogateAwareString(surrogate_aware_tail.charAt(0)); + const head_char_class = this.unknown_dictionary.lookup(head_char.toString()); + if (head_char_class === undefined) + continue; + if (vocabulary == null || + vocabulary.length === 0 || + head_char_class.is_always_invoke === 1) { + // Process unknown word + let key = head_char; + if (head_char_class.is_grouping === 1 && + 1 < surrogate_aware_tail.length) { + for (var k = 1; k < surrogate_aware_tail.length; k++) { + const next_char = surrogate_aware_tail.charAt(k); + const next_char_class = this.unknown_dictionary.lookup(next_char); + if (head_char_class.class_name !== next_char_class?.class_name) { + break; + } + key = key.toString() + next_char; + } + } + const unk_ids = this.unknown_dictionary.target_map[head_char_class.class_id]; + for (let j = 0; j < unk_ids.length; j++) { + // FIXME parseInt要らない説 + const unk_id = parseInt(unk_ids[j].toString()); + left_id = this.unknown_dictionary.dictionary.getShort(unk_id); + right_id = this.unknown_dictionary.dictionary.getShort(unk_id + 2); + word_cost = this.unknown_dictionary.dictionary.getShort(unk_id + 4); + // node_name, cost, start_index, length, type, left_id, right_id, surface_form + lattice.append(new ViterbiNode(unk_id, word_cost, pos + 1, key.length, "UNKNOWN", left_id, right_id, key.toString())); + } + } + } + lattice.appendEos(); + return lattice; + } +} +export default ViterbiBuilder; +//# sourceMappingURL=ViterbiBuilder.js.map \ No newline at end of file diff --git a/dist/viterbi/ViterbiBuilder.js.map b/dist/viterbi/ViterbiBuilder.js.map new file mode 100644 index 0000000..de11746 --- /dev/null +++ b/dist/viterbi/ViterbiBuilder.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ViterbiBuilder.js","sourceRoot":"","sources":["../../src/viterbi/ViterbiBuilder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,WAAW,MAAM,kBAAkB,CAAC;AAC3C,OAAO,cAAc,MAAM,qBAAqB,CAAC;AACjD,OAAO,oBAAoB,MAAM,iCAAiC,CAAC;AAMnE,MAAM,cAAc;IAKlB;;;;OAIG;IACH,YAAY,GAAwB;QATpC;;;;;WAAkB;QAClB;;;;;WAA2C;QAC3C;;;;;WAAsC;QAQpC,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QACrB,IAAI,CAAC,qBAAqB,GAAG,GAAG,CAAC,qBAAqB,CAAC;QACvD,IAAI,CAAC,kBAAkB,GAAG,GAAG,CAAC,kBAAkB,CAAC;IACnD,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,YAAoB;QACxB,MAAM,OAAO,GAAG,IAAI,cAAc,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,IAAI,oBAAoB,CAAC,YAAY,CAAC,CAAC;QAExD,IAAI,OAA2B,EAC7B,OAAe,EACf,QAAgB,EAChB,SAAiB,CAAC;QACpB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,QAAQ,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACjC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;YACtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,iEAAiE;gBACjE,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,SAAS;oBAAE,SAAS;gBAEhD,IAAI,OAAO,IAAI,IAAI;oBAAE,SAAS;gBAE9B,MAAM,cAAc,GAAG,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;gBACtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/C,sBAAsB;oBACtB,MAAM,aAAa,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAE7D,OAAO;wBACL,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;oBAChE,QAAQ,GAAG,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,QAAQ,CACvD,aAAa,GAAG,CAAC,CAClB,CAAC;oBACF,SAAS,GAAG,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,QAAQ,CACxD,aAAa,GAAG,CAAC,CAClB,CAAC;oBAEF,8EAA8E;oBAC9E,OAAO,CAAC,MAAM,CACZ,IAAI,WAAW,CACb,aAAa,EACb,SAAS,EACT,GAAG,GAAG,CAAC,EACP,GAAG,CAAC,MAAM,EACV,OAAO,EACP,OAAO,EACP,QAAQ,EACR,GAAG,CACJ,CACF,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,0BAA0B;YAC1B,MAAM,oBAAoB,GAAG,IAAI,oBAAoB,CAAC,IAAI,CAAC,CAAC;YAC5D,MAAM,SAAS,GAAG,IAAI,oBAAoB,CACxC,oBAAoB,CAAC,MAAM,CAAC,CAAC,CAAC,CAC/B,CAAC;YACF,MAAM,eAAe,GAAG,IAAI,CAAC,kBAAkB,CAAC,MAAM,CACpD,SAAS,CAAC,QAAQ,EAAE,CACrB,CAAC;YACF,IAAI,eAAe,KAAK,SAAS;gBAAE,SAAS;YAC5C,IACE,UAAU,IAAI,IAAI;gBAClB,UAAU,CAAC,MAAM,KAAK,CAAC;gBACvB,eAAe,CAAC,gBAAgB,KAAK,CAAC,EACtC,CAAC;gBACD,uBAAuB;gBACvB,IAAI,GAAG,GAAkC,SAAS,CAAC;gBACnD,IACE,eAAe,CAAC,WAAW,KAAK,CAAC;oBACjC,CAAC,GAAG,oBAAoB,CAAC,MAAM,EAC/B,CAAC;oBACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,oBAAoB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBACrD,MAAM,SAAS,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBACjD,MAAM,eAAe,GAAG,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;wBAClE,IAAI,eAAe,CAAC,UAAU,KAAK,eAAe,EAAE,UAAU,EAAE,CAAC;4BAC/D,MAAM;wBACR,CAAC;wBACD,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,GAAG,SAAS,CAAC;oBACnC,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GACX,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;gBAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACxC,sBAAsB;oBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAE/C,OAAO,GAAG,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;oBAC9D,QAAQ,GAAG,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACnE,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAEpE,8EAA8E;oBAC9E,OAAO,CAAC,MAAM,CACZ,IAAI,WAAW,CACb,MAAM,EACN,SAAS,EACT,GAAG,GAAG,CAAC,EACP,GAAG,CAAC,MAAM,EACV,SAAS,EACT,OAAO,EACP,QAAQ,EACR,GAAG,CAAC,QAAQ,EAAE,CACf,CACF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,CAAC,SAAS,EAAE,CAAC;QAEpB,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED,eAAe,cAAc,CAAC"} \ No newline at end of file diff --git a/dist/viterbi/ViterbiLattice.d.ts b/dist/viterbi/ViterbiLattice.d.ts new file mode 100644 index 0000000..b4483a3 --- /dev/null +++ b/dist/viterbi/ViterbiLattice.d.ts @@ -0,0 +1,20 @@ +import ViterbiNode from "./ViterbiNode.js"; +declare class ViterbiLattice { + nodes_end_at: ViterbiNode[][]; + eos_pos: number; + /** + * ViterbiLattice is a lattice in Viterbi algorithm + * @constructor + */ + constructor(); + /** + * Append node to ViterbiLattice + * @param {ViterbiNode} node + */ + append(node: ViterbiNode): void; + /** + * Set ends with EOS (End of Statement) + */ + appendEos(): void; +} +export default ViterbiLattice; diff --git a/dist/viterbi/ViterbiLattice.js b/dist/viterbi/ViterbiLattice.js new file mode 100644 index 0000000..813b1d1 --- /dev/null +++ b/dist/viterbi/ViterbiLattice.js @@ -0,0 +1,69 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +import ViterbiNode from "./ViterbiNode.js"; +class ViterbiLattice { + /** + * ViterbiLattice is a lattice in Viterbi algorithm + * @constructor + */ + constructor() { + Object.defineProperty(this, "nodes_end_at", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "eos_pos", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.nodes_end_at = []; + this.nodes_end_at[0] = [new ViterbiNode(-1, 0, 0, 0, "BOS", 0, 0, "")]; + this.eos_pos = 1; + } + /** + * Append node to ViterbiLattice + * @param {ViterbiNode} node + */ + append(node) { + var last_pos = node.start_pos + node.length - 1; + if (this.eos_pos < last_pos) { + this.eos_pos = last_pos; + } + var prev_nodes = this.nodes_end_at[last_pos]; + if (prev_nodes == null) { + prev_nodes = []; + } + prev_nodes.push(node); + this.nodes_end_at[last_pos] = prev_nodes; + } + /** + * Set ends with EOS (End of Statement) + */ + appendEos() { + var last_index = this.nodes_end_at.length; + this.eos_pos++; + this.nodes_end_at[last_index] = [ + new ViterbiNode(-1, 0, this.eos_pos, 0, "EOS", 0, 0, ""), + ]; + } +} +export default ViterbiLattice; +//# sourceMappingURL=ViterbiLattice.js.map \ No newline at end of file diff --git a/dist/viterbi/ViterbiLattice.js.map b/dist/viterbi/ViterbiLattice.js.map new file mode 100644 index 0000000..6105b24 --- /dev/null +++ b/dist/viterbi/ViterbiLattice.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ViterbiLattice.js","sourceRoot":"","sources":["../../src/viterbi/ViterbiLattice.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAEb,OAAO,WAAW,MAAM,kBAAkB,CAAC;AAE3C,MAAM,cAAc;IAIlB;;;OAGG;IACH;QAPA;;;;;WAA8B;QAC9B;;;;;WAAgB;QAOd,IAAI,CAAC,YAAY,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QACvE,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,IAAiB;QACtB,IAAI,QAAQ,GAAG,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,OAAO,GAAG,QAAQ,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC;QAC1B,CAAC;QAED,IAAI,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;YACvB,UAAU,GAAG,EAAE,CAAC;QAClB,CAAC;QACD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEtB,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,UAAU,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,SAAS;QACP,IAAI,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;QAC1C,IAAI,CAAC,OAAO,EAAE,CAAC;QACf,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,GAAG;YAC9B,IAAI,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;SACzD,CAAC;IACJ,CAAC;CACF;AAED,eAAe,cAAc,CAAC"} \ No newline at end of file diff --git a/dist/viterbi/ViterbiNode.d.ts b/dist/viterbi/ViterbiNode.d.ts new file mode 100644 index 0000000..a50229a --- /dev/null +++ b/dist/viterbi/ViterbiNode.d.ts @@ -0,0 +1,27 @@ +export type ViterbiNodeType = "KNOWN" | "UNKNOWN" | "BOS" | "EOS"; +declare class ViterbiNode { + name: number; + cost: number; + start_pos: number; + length: number; + left_id: number; + right_id: number; + prev: null | ViterbiNode; + surface_form: string | Uint8Array; + shortest_cost: number; + type: ViterbiNodeType; + /** + * ViterbiNode is a node of ViterbiLattice + * @param {number} node_name Word ID + * @param {number} node_cost Word cost to generate + * @param {number} start_pos Start position from 1 + * @param {number} length Word length + * @param {string} type Node type (KNOWN, UNKNOWN, BOS, EOS, ...) + * @param {number} left_id Left context ID + * @param {number} right_id Right context ID + * @param {string} surface_form Surface form of this word + * @constructor + */ + constructor(node_name: number, node_cost: number, start_pos: number, length: number, type: ViterbiNodeType, left_id: number, right_id: number, surface_form: string | Uint8Array); +} +export default ViterbiNode; diff --git a/dist/viterbi/ViterbiNode.js b/dist/viterbi/ViterbiNode.js new file mode 100644 index 0000000..2d92f04 --- /dev/null +++ b/dist/viterbi/ViterbiNode.js @@ -0,0 +1,110 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +"use strict"; +class ViterbiNode { + /** + * ViterbiNode is a node of ViterbiLattice + * @param {number} node_name Word ID + * @param {number} node_cost Word cost to generate + * @param {number} start_pos Start position from 1 + * @param {number} length Word length + * @param {string} type Node type (KNOWN, UNKNOWN, BOS, EOS, ...) + * @param {number} left_id Left context ID + * @param {number} right_id Right context ID + * @param {string} surface_form Surface form of this word + * @constructor + */ + constructor(node_name, node_cost, start_pos, length, type, left_id, right_id, surface_form) { + Object.defineProperty(this, "name", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "cost", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "start_pos", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "length", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "left_id", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "right_id", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "prev", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "surface_form", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "shortest_cost", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "type", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.name = node_name; + this.cost = node_cost; + this.start_pos = start_pos; + this.length = length; + this.left_id = left_id; + this.right_id = right_id; + this.prev = null; + this.surface_form = surface_form; + if (type === "BOS") { + this.shortest_cost = 0; + } + else { + this.shortest_cost = Number.MAX_VALUE; + } + this.type = type; + } +} +export default ViterbiNode; +//# sourceMappingURL=ViterbiNode.js.map \ No newline at end of file diff --git a/dist/viterbi/ViterbiNode.js.map b/dist/viterbi/ViterbiNode.js.map new file mode 100644 index 0000000..b6ec531 --- /dev/null +++ b/dist/viterbi/ViterbiNode.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ViterbiNode.js","sourceRoot":"","sources":["../../src/viterbi/ViterbiNode.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,YAAY,CAAC;AAIb,MAAM,WAAW;IAYf;;;;;;;;;;;OAWG;IACH,YACE,SAAiB,EACjB,SAAiB,EACjB,SAAiB,EACjB,MAAc,EACd,IAAqB,EACrB,OAAe,EACf,QAAgB,EAChB,YAAiC;QA/BnC;;;;;WAAa;QACb;;;;;WAAa;QACb;;;;;WAAkB;QAClB;;;;;WAAe;QACf;;;;;WAAgB;QAChB;;;;;WAAiB;QACjB;;;;;WAAyB;QACzB;;;;;WAAkC;QAClC;;;;;WAAsB;QACtB;;;;;WAAsB;QAwBpB,IAAI,CAAC,IAAI,GAAG,SAAS,CAAC;QACtB,IAAI,CAAC,IAAI,GAAG,SAAS,CAAC;QACtB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;QACzB,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,SAAS,CAAC;QACxC,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;CACF;AAED,eAAe,WAAW,CAAC"} \ No newline at end of file diff --git a/dist/viterbi/ViterbiSearcher.d.ts b/dist/viterbi/ViterbiSearcher.d.ts new file mode 100644 index 0000000..a95b6de --- /dev/null +++ b/dist/viterbi/ViterbiSearcher.d.ts @@ -0,0 +1,21 @@ +import ConnectionCosts from "../dict/ConnectionCosts.js"; +import ViterbiLattice from "./ViterbiLattice.js"; +import ViterbiNode from "./ViterbiNode.js"; +declare class ViterbiSearcher { + connection_costs: ConnectionCosts; + /** + * ViterbiSearcher is for searching best Viterbi path + * @param {ConnectionCosts} connection_costs Connection costs matrix + * @constructor + */ + constructor(connection_costs: ConnectionCosts); + /** + * Search best path by forward-backward algorithm + * @param {ViterbiLattice} lattice Viterbi lattice to search + * @returns {Array} Shortest path + */ + search(lattice: ViterbiLattice): ViterbiNode[]; + forward(lattice: ViterbiLattice): ViterbiLattice; + backward(lattice: ViterbiLattice): ViterbiNode[]; +} +export default ViterbiSearcher; diff --git a/dist/viterbi/ViterbiSearcher.js b/dist/viterbi/ViterbiSearcher.js new file mode 100644 index 0000000..c961d2a --- /dev/null +++ b/dist/viterbi/ViterbiSearcher.js @@ -0,0 +1,97 @@ +/* + * Copyright 2014 Takuya Asano + * Copyright 2010-2014 Atilika Inc. and contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +class ViterbiSearcher { + /** + * ViterbiSearcher is for searching best Viterbi path + * @param {ConnectionCosts} connection_costs Connection costs matrix + * @constructor + */ + constructor(connection_costs) { + Object.defineProperty(this, "connection_costs", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.connection_costs = connection_costs; + } + /** + * Search best path by forward-backward algorithm + * @param {ViterbiLattice} lattice Viterbi lattice to search + * @returns {Array} Shortest path + */ + search(lattice) { + lattice = this.forward(lattice); + return this.backward(lattice); + } + forward(lattice) { + for (let i = 1; i <= lattice.eos_pos; i++) { + const nodes = lattice.nodes_end_at[i]; + if (nodes === undefined) { + continue; + } + for (const node of nodes) { + let cost = Number.MAX_VALUE; + let shortest_prev_node = null; + const index = node.start_pos - 1; + if (!(index in lattice.nodes_end_at)) { + // TODO process unknown words (repair word lattice) + continue; + } + const prev_nodes = lattice.nodes_end_at[index]; + for (const prev_node of prev_nodes) { + let edge_cost; + if (node.left_id == null || prev_node.right_id == null) { + // TODO assert + console.log("Left or right is null"); + edge_cost = 0; + } + else { + edge_cost = this.connection_costs.get(prev_node.right_id, node.left_id); + } + const _cost = prev_node.shortest_cost + edge_cost + node.cost; + if (_cost < cost) { + shortest_prev_node = prev_node; + cost = _cost; + } + } + node.prev = shortest_prev_node; + node.shortest_cost = cost; + } + } + return lattice; + } + backward(lattice) { + const shortest_path = []; + const eos = lattice.nodes_end_at[lattice.nodes_end_at.length - 1][0]; + let node_back = eos.prev; + if (node_back == null) { + return []; + } + while (node_back.type !== "BOS") { + shortest_path.push(node_back); + if (node_back.prev == null) { + // TODO Failed to back. Process unknown words? + return []; + } + node_back = node_back.prev; + } + return shortest_path.reverse(); + } +} +export default ViterbiSearcher; +//# sourceMappingURL=ViterbiSearcher.js.map \ No newline at end of file diff --git a/dist/viterbi/ViterbiSearcher.js.map b/dist/viterbi/ViterbiSearcher.js.map new file mode 100644 index 0000000..75f9ce5 --- /dev/null +++ b/dist/viterbi/ViterbiSearcher.js.map @@ -0,0 +1 @@ +{"version":3,"file":"ViterbiSearcher.js","sourceRoot":"","sources":["../../src/viterbi/ViterbiSearcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAMH,MAAM,eAAe;IAGnB;;;;OAIG;IACH,YAAY,gBAAiC;QAP7C;;;;;WAAkC;QAQhC,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;IAC3C,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,OAAuB;QAC5B,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAChC,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,CAAC,OAAuB;QAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACtC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,IAAI,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC;gBAC5B,IAAI,kBAAkB,GAAuB,IAAI,CAAC;gBAElD,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;gBACjC,IAAI,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;oBACrC,mDAAmD;oBACnD,SAAS;gBACX,CAAC;gBACD,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;gBAC/C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;oBACnC,IAAI,SAAS,CAAC;oBACd,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,IAAI,SAAS,CAAC,QAAQ,IAAI,IAAI,EAAE,CAAC;wBACvD,cAAc;wBACd,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;wBACrC,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;yBAAM,CAAC;wBACN,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CACnC,SAAS,CAAC,QAAQ,EAClB,IAAI,CAAC,OAAO,CACb,CAAC;oBACJ,CAAC;oBAED,MAAM,KAAK,GAAG,SAAS,CAAC,aAAa,GAAG,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC;oBAC9D,IAAI,KAAK,GAAG,IAAI,EAAE,CAAC;wBACjB,kBAAkB,GAAG,SAAS,CAAC;wBAC/B,IAAI,GAAG,KAAK,CAAC;oBACf,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;gBAC/B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,QAAQ,CAAC,OAAuB;QAC9B,MAAM,aAAa,GAAG,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAErE,IAAI,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC;QACzB,IAAI,SAAS,IAAI,IAAI,EAAE,CAAC;YACtB,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,OAAO,SAAS,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YAChC,aAAa,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,SAAS,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;gBAC3B,8CAA8C;gBAC9C,OAAO,EAAE,CAAC;YACZ,CAAC;YACD,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC;QAC7B,CAAC;QAED,OAAO,aAAa,CAAC,OAAO,EAAE,CAAC;IACjC,CAAC;CACF;AAED,eAAe,eAAe,CAAC"} \ No newline at end of file