diff --git a/package-lock.json b/package-lock.json index 5e3c7da0..8b07066a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pashto-inflector", - "version": "7.2.1", + "version": "7.2.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pashto-inflector", - "version": "7.2.1", + "version": "7.2.2", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 6df9c6c4..6ac3f078 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pashto-inflector", - "version": "7.2.1", + "version": "7.2.2", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/components/package-lock.json b/src/components/package-lock.json index f28fb7a5..3bdc5679 100644 --- a/src/components/package-lock.json +++ b/src/components/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lingdocs/ps-react", - "version": "7.2.1", + "version": "7.2.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@lingdocs/ps-react", - "version": "7.2.1", + "version": "7.2.2", "license": "MIT", "dependencies": { "@formkit/auto-animate": "^1.0.0-beta.3", diff --git a/src/components/package.json b/src/components/package.json index 34fbc7de..1ad779f8 100644 --- a/src/components/package.json +++ b/src/components/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/ps-react", - "version": "7.2.1", + "version": "7.2.2", "description": "Pashto inflector library module with React components", "main": "dist/components/library.js", "module": "dist/components/library.js", diff --git a/src/lib/package.json b/src/lib/package.json index 2e22717a..0886048c 100644 --- a/src/lib/package.json +++ b/src/lib/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/inflect", - "version": "7.2.1", + "version": "7.2.2", "description": "Pashto inflector library", "main": "dist/index.js", "types": "dist/lib/library.d.ts", diff --git a/src/lib/src/inflections-and-vocative.ts b/src/lib/src/inflections-and-vocative.ts new file mode 100644 index 00000000..3b26e2e5 --- /dev/null +++ b/src/lib/src/inflections-and-vocative.ts @@ -0,0 +1,596 @@ +import * as T from "../../types"; +import { makePsString } from "./accent-and-ps-utils"; +import { + accentIsOnEnd, + accentOnNFromEnd, + countSyllables, + removeAccents, +} from "./accent-helpers"; +import { applyPsString, mapGen } from "./fp-ps"; +import { getInflectionPattern } from "./inflection-pattern"; +import { + endsInConsonant, + endsInTob, + hasShwaEnding, + mapPsString, + endsWith, +} from "./p-text-helpers"; +import { removeDuplicates } from "./phrase-building/vp-tools"; +import { + isAdjOrUnisexNounEntry, + isAnimNounEntry, + isFemNounEntry, + isInflectableEntry, + isMascNounEntry, + isNounEntry, + isNumberEntry, +} from "./type-predicates"; + +type Plurals = + | { + plural?: T.PluralInflections; + arabicPlural?: T.PluralInflections; + } + | undefined; + +const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; +const endingInHayOrAynRegex = /[^ا][هع]$/; + +export function getInfsAndVocative( + entry: T.DictionaryEntryNoFVars, + plurals: Plurals +): + | { + inflections?: T.Inflections; + vocative?: T.PluralInflections; + } + | false { + if (!isInflectableEntry(entry)) { + return false; + } + // @ts-ignore + const e: T.InflectableEntry = entry as T.InflectableEntry; + const pattern = getInflectionPattern(e); + if ( + pattern === 0 && + isFemNounEntry(e) && + isAnimNounEntry(e) && + endsInConsonant(e) + ) { + return { + vocative: vocFemAnimException({ + e, + plurals: genderPlural("fem", plurals), + }), + }; + } + const gender: T.Gender | "unisex" = + isAdjOrUnisexNounEntry(e) || isNumberEntry(e) + ? "unisex" + : isMascNounEntry(e) + ? "masc" + : "fem"; + if (pattern === 0) { + return false; + } + if (pattern === 6) { + return pattern6({ e, plurals: genderPlural("fem", plurals) }); + } + const funcs = patternFuncs[pattern]; + const masc = + gender === "unisex" || gender === "masc" + ? funcs.masc({ e, plurals: genderPlural("masc", plurals) }) + : undefined; + const fem = + gender === "unisex" || gender === "fem" + ? funcs.fem({ e, plurals: genderPlural("fem", plurals) }) + : undefined; + return aggregateInfsAndVoc(masc, fem); +} + +type PatternInput = { + e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry; + plurals: T.PsString[]; +}; + +type InflectionsAndVocative = { + inflections: T.InflectionSet; + vocative: T.PluralInflectionSet; +}; + +const patternFuncs: Record< + 1 | 2 | 3 | 4 | 5, + Record InflectionsAndVocative> +> = { + 1: { + masc: vocPattern1Masc, + fem: vocPattern1Fem, + }, + 2: { + masc: vocPattern2Masc, + fem: vocPattern2Fem, + }, + 3: { + masc: vocPattern3Masc, + fem: vocPattern3Fem, + }, + 4: { + masc: vocPattern4Masc, + fem: vocPattern4Fem, + }, + 5: { + masc: vocPattern5Masc, + fem: vocPattern5Fem, + }, +}; + +function addPlurals( + e: T.ArrayOneOrMore, + plurals: T.PsString[] +): T.ArrayOneOrMore { + if (!plurals) { + return e; + } + return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore; +} + +function pattern6({ e, plurals }: PatternInput): { + inflections: T.Inflections; + vocative: T.PluralInflections; +} { + const base = removeAccents({ p: e.p.slice(0, -1), f: e.f.slice(0, -2) }); + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ۍ`, f: `${base.f}úy` }], + [ + { p: `${base.p}یو`, f: `${base.f}úyo` }, + { p: `${base.p}و`, f: `${base.f}ó` }, + ], + ]; + return { + inflections: { + fem: inflections, + }, + vocative: { + fem: [inflections[0], addPlurals(inflections[2], plurals)], + }, + }; +} + +function vocFemAnimException({ + e, + plurals, +}: PatternInput): T.PluralInflections { + if (!e.ppp || !e.ppf) { + throw new Error( + "plural missing for feminine animate exception noun " + e.p + ); + } + // TODO: HANDLE BETTER WITH PLURALS! + const plurBase = mapPsString( + (x) => x.slice(0, -1), + makePsString(e.ppp, e.ppf) + ); + const base = + countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f); + return { + fem: [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals), + ], + }; +} + +function vocPattern1Masc({ e, plurals }: PatternInput): InflectionsAndVocative { + if (isNounEntry(e) && endsInTob(e)) { + const base = mapPsString((x) => x.slice(0, -3), e); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}تبو`, f: `${base.f}tábo` }, + ]; + return { + inflections: [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}تابه`, f: `${base.f}taabú` }], + second, + ], + vocative: [[{ p: `${e.p}ه`, f: `${e.f}a` }], addPlurals(second, plurals)], + }; + } + const shwaEnding = hasShwaEnding(e); + const base = mapGen( + (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), + mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) + ); + if (shwaEnding && e.f.endsWith("ú")) { + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}ó` }, + ]; + return { + inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second], + vocative: [ + [{ p: `${base.p}ه`, f: `${base.f}á` }], + addPlurals(second, plurals), + ], + }; + } + // TODO: shouldn't this be accent-sensitive? + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + return { + inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second], + vocative: [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + addPlurals(second, plurals), + ], + }; +} + +// TODO this is HUGELY repetitive refactor this! +function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative { + const shwaEnding = hasShwaEnding(e); + const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding; + const base = mapGen( + (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), + hasFemEnding + ? mapPsString((x) => x.slice(0, -1), e) + : makePsString(e.p, e.f) + ); + if ( + endsWith( + [ + { p: "ع", f: "a" }, + { p: "ع", f: "a'" }, + ], + e + ) && + !["ا", "ی", "ې"].includes(e.p.at(-2) || "") + ) { + const base = applyPsString( + { + f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1), + }, + e + ); + if (accentIsOnEnd(e)) { + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}ó` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ې`, f: `${base.f}é` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ې`, f: `${base.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + if ( + endsWith([{ p: "ح", f: "a" }], e) && + !["ا", "ی", "ې"].includes(e.p.at(-2) || "") + ) { + const base = applyPsString( + { + f: (f) => f.slice(0, -1), + }, + e + ); + if (accentIsOnEnd(e)) { + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}ó` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ې`, f: `${base.f}é` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ې`, f: `${base.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + if (hasFemEnding && accentIsOnEnd(e)) { + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}ó` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: `${base.p}ه`, f: `${base.f}á` }], + [{ p: `${base.p}ې`, f: `${base.f}é` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + if (isFemNounEntry(e) && endsInConsonant(e)) { + const baseForInf = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; + const second: T.ArrayOneOrMore = [ + { p: `${baseForInf.p}و`, f: `${baseForInf.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [{ p: `${baseForInf.p}ې`, f: `${baseForInf.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; + } + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + [{ p: `${base.p}ې`, f: `${base.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; +} + +function vocPattern2Masc({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2)); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}یو`, f: `${base.f}iyo` }, + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + return { + inflections: [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ي`, f: `${base.f}ee` }], + second, + ], + vocative: [ + [{ p: `${base.p}یه`, f: `${base.f}iya` }], + addPlurals(second, plurals), + ], + }; +} + +function vocPattern2Fem({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString( + e.p.slice(0, -1), + e.f.slice(0, e.f.endsWith("ay") ? -2 : -1) + ); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}یو`, f: `${base.f}iyo` }, + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${base.p}ې`, f: `${base.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[0], addPlurals(second, plurals)], + }; +} + +function vocPattern3Masc({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString( + e.p.slice(0, -1), + // shouldn't be accents here but remove just to be sure + removeAccents(e.f.slice(0, -2)) + ); + const baseSyls = countSyllables(base); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}یو`, f: `${base.f}úyo` }, + { p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` }, + ]; + return { + inflections: [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ي`, f: `${base.f}${baseSyls ? "ée" : "ee"}` }], + second, + ], + vocative: [ + [{ p: `${base.p}یه`, f: `${base.f}úya` }], + addPlurals(second, plurals), + ], + }; +} + +function vocPattern3Fem({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString( + e.p.slice(0, -1), + // shouldn't be accents here but remove just to be sure + removeAccents(e.f.slice(0, -2)) + ); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}یو`, f: `${base.f}úyo` }, + { p: `${base.p}و`, f: `${base.f}ó` }, + ]; + const plain: T.ArrayOneOrMore = [ + { p: `${base.p}ۍ`, f: `${base.f}úy` }, + ]; + return { + inflections: [plain, plain, second], + vocative: [plain, addPlurals(second, plurals)], + }; +} + +function vocPattern4Masc({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; + const firstInf = accentOnNFromEnd( + makePsString(e.infap || "", e.infaf || ""), + 0 + ); + const secondBase = makePsString(e.infbp || "", e.infbf || ""); + const second: T.ArrayOneOrMore = [ + { p: `${secondBase.p}و`, f: `${secondBase.f}ó` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: e.p, f: e.f }], + [firstInf], + second, + ]; + if (endsInConsonant(e)) { + return { + inflections, + vocative: [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + addPlurals(second, plurals), + ], + }; + } + // TODO: is this even possible? + if (hasShwaEnding(e)) { + return { + inflections, + vocative: [ + [{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }], + addPlurals(second, plurals), + ], + }; + } + // exception for مېلمه, کوربه + return { + inflections, + vocative: [[{ p: e.p, f: e.f }], second], + }; +} + +function vocPattern4Fem({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString(e.infbp || "", e.infbf || ""); + const second = addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals); + const inflections: T.InflectionSet = [ + [{ p: `${base.p}ه`, f: `${base.f}á` }], + [{ p: `${base.p}ې`, f: `${base.f}é` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], second], + }; +} + +function vocPattern5Masc({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString(e.infbp || "", e.infbf || ""); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + return { + inflections: [ + [{ p: e.p, f: e.f }], + [{ p: `${base.p}ه`, f: `${base.f}u` }], + second, + ], + vocative: [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + addPlurals(second, plurals), + ], + }; +} + +function vocPattern5Fem({ e, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString(e.infbp || "", e.infbf || ""); + const second: T.ArrayOneOrMore = [ + { p: `${base.p}و`, f: `${base.f}o` }, + ]; + const inflections: T.InflectionSet = [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + [{ p: `${base.p}ې`, f: `${base.f}e` }], + second, + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(second, plurals)], + }; +} + +function aggregateInfsAndVoc( + masc: InflectionsAndVocative | undefined, + fem: InflectionsAndVocative | undefined +): { + inflections?: T.Inflections; + vocative?: T.PluralInflections; +} { + if (masc && fem) { + return { + inflections: { + masc: masc.inflections, + fem: fem.inflections, + }, + vocative: { + masc: masc.vocative, + fem: fem.vocative, + }, + }; + } + if (masc) { + return { + inflections: { + masc: masc.inflections, + }, + vocative: { + masc: masc.vocative, + }, + }; + } + if (fem) { + return { + inflections: { + fem: fem.inflections, + }, + vocative: { + fem: fem.vocative, + }, + }; + } + return { inflections: undefined, vocative: undefined }; +} + +function genderPlural(gender: T.Gender, plurals: Plurals): T.PsString[] { + if (!plurals) return []; + if (gender === "masc") { + return [ + ...(plurals.plural && "masc" in plurals.plural + ? plurals.plural.masc[1] + : []), + ...(plurals.arabicPlural && "masc" in plurals.arabicPlural + ? plurals.arabicPlural.masc[1] + : []), + ]; + } else { + return [ + ...(plurals.plural && "fem" in plurals.plural + ? plurals.plural.fem[1] + : []), + ...(plurals.arabicPlural && "fem" in plurals.arabicPlural + ? plurals.arabicPlural.fem[1] + : []), + ]; + } +} diff --git a/src/lib/src/pashto-inflector.test.ts b/src/lib/src/pashto-inflector.test.ts index 74d77b1d..72771c3d 100644 --- a/src/lib/src/pashto-inflector.test.ts +++ b/src/lib/src/pashto-inflector.test.ts @@ -1822,6 +1822,16 @@ const nouns: { ], ], }, + vocative: { + fem: [ + [{ p: "دوستي", f: "dostee" }], + [ + { p: "دوستیو", f: "dostúyo" }, + { p: "دوستو", f: "dostó" }, + { p: "دوستیانو", f: "dostiyáano" }, + ], + ], + }, }, }, // Feminine regular ending in ۍ diff --git a/src/lib/src/pashto-inflector.ts b/src/lib/src/pashto-inflector.ts index 62070833..913c425a 100644 --- a/src/lib/src/pashto-inflector.ts +++ b/src/lib/src/pashto-inflector.ts @@ -6,7 +6,6 @@ * */ -import { pashtoConsonants } from "./pashto-consonants"; import { concatInflections, splitDoubleWord, @@ -20,30 +19,16 @@ import { endsWith, concatPlurals, hasShwaEnding, - mapPsString, - endsInTob, } from "./p-text-helpers"; import { makePsString, removeFVarients } from "./accent-and-ps-utils"; import { - accentFSylsOnNFromEnd, accentOnNFromEnd, countSyllables, - hasAccents, removeAccents, - splitUpSyllables, } from "./accent-helpers"; import * as T from "../../types"; -import { applyPsString, fmapSingleOrLengthOpts } from "./fp-ps"; -import { getVocatives } from "./vocatives"; -import { - isAdjectiveEntry, - isNumberEntry, - isPattern1Entry, -} from "./type-predicates"; - -const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; -const endingInHayOrAynRegex = /[^ا][هع]$/; -// const endingInAlefRegex = /اع?$/; +import { getInfsAndVocative } from "./inflections-and-vocative"; +import { fmapSingleOrLengthOpts } from "./fp-ps"; export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { // If it's a noun/adj, inflect accordingly @@ -74,78 +59,16 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { if (w.c && w.c.includes("pl.")) { return handlePluralNounOrAdj(w); } - if ( - w.c && - (isAdjectiveEntry(word) || w.c.includes("unisex") || isNumberEntry(word)) - ) { - return handleUnisexWord(w); - } - if (w.c && w.c.includes("n. m.")) { - return handleMascNoun(w); - } - if (w.c && w.c.includes("n. f.")) { - return handleFemNoun(w); - } - // It's not a noun/adj - return false; -} -// LEVEL 2 FUNCTIONS -function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { - // Get last letter of Pashto and last two letters of phonetics - // TODO: !!! Handle weird endings / symbols ' etc. - const pEnd = word.p.slice(-1); - const plurals = makePlural(word); - const vocative = getVocatives(word, plurals); - if (word.noInf) { - return !plurals ? false : { ...plurals }; - } - if (word.infap && word.infaf && word.infbp && word.infbf) { - return { - inflections: inflectIrregularUnisex(word.p, word.f, [ - { p: word.infap, f: word.infaf }, - { p: word.infbp, f: word.infbf }, - ]), - vocative, - ...plurals, - }; - } - if (pEnd === "ی" && word.f.slice(-2) === "ay") { - return { - inflections: inflectRegularYayUnisex(word.p, word.f), - vocative, - ...plurals, - }; - } - if (pEnd === "ه" && word.g.slice(-1) === "u") { - return { - inflections: inflectRegularShwaEndingUnisex(word.p, word.f), - vocative, - ...plurals, - }; - } - if (pEnd === "ی" && word.f.slice(-2) === "áy") { - return { - inflections: inflectEmphasizedYayUnisex(word.p, word.f), - vocative, - ...plurals, - }; - } - if ( - pashtoConsonants.includes(pEnd) || - word.p.slice(-2) === "وی" || - word.p.slice(-2) === "ای" || - word.f.slice(-1) === "w" || - (word.p.slice(-1) === "ه" && word.f.slice(-1) === "h") - ) { - return { - inflections: inflectConsonantEndingUnisex(word.p, word.f), - vocative, - ...plurals, - }; + const plurals = makePlural(w); + const infAndVoc = getInfsAndVocative(w, plurals); + if (!infAndVoc && !plurals) { + return false; } - if (plurals) return plurals; - return false; + return { + ...plurals, + ...infAndVoc, + }; } function handlePluralNounOrAdj(w: T.DictionaryEntryNoFVars): T.InflectorOutput { @@ -158,158 +81,7 @@ function handlePluralNounOrAdj(w: T.DictionaryEntryNoFVars): T.InflectorOutput { return { ...plurals }; } -function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput { - // Get last letter of Pashto and last two letters of phonetics - // TODO: !!! Handle weird endings / symbols ' etc. - const plurals = makePlural(w); - const vocative = getVocatives(w, plurals); - if (w.noInf) { - return !plurals ? false : { ...plurals }; - } - const pEnd = w.p.slice(-1); - const fEnd = w.f.slice(-2); - if (w.infap && w.infaf && w.infbp && w.infbf) { - return { - inflections: inflectIrregularMasc(w.p, w.f, [ - { p: w.infap, f: w.infaf }, - { p: w.infbp, f: w.infbf }, - ]), - vocative, - ...plurals, - }; - } - if (endsInTob(w)) { - return { inflections: inflectTobMasc(w.p, w.f), vocative, ...plurals }; - } - // TODO: stopgap before refactoring - // @ts-ignore - if (isPattern1Entry(w)) { - return { - inflections: { - masc: inflectPattern1Masc( - // @ts-ignore - makePsString(w.p, w.f) - ), - }, - vocative, - ...plurals, - }; - } - if ( - pEnd === "ی" && - (fEnd === "áy" || (fEnd === "ay" && countSyllables(w) === 1)) - ) { - const inflections = inflectRegularEmphasizedYayMasc(w.p, w.f); - return { - inflections, - vocative, - ...plurals, - }; - } - if (pEnd === "ی" && fEnd === "ay") { - return { - inflections: inflectRegularYayMasc(w.p, w.f), - vocative, - ...plurals, - }; - } - return plurals ? { ...plurals } : false; -} - -function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { - // Get first of comma seperated phonetics entries - /* istanbul ignore next */ // will always have word.c at this point - const c = word.c || ""; - const animate = c.includes("anim."); - const pEnd = word.p.slice(-1); - const plurals = makePlural(word); - const vocative = getVocatives(word, plurals); - if (word.noInf) { - return !plurals ? false : { ...plurals }; - } - - if (endingInHayOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) { - return { - inflections: inflectRegularAFem(word.p, word.f), - vocative, - ...plurals, - }; - } - if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) { - return { - vocative, - inflections: inflectRegularAWithHimPEnding(word.p, word.f), - ...plurals, - }; - } - // TODO: better reusable function to check if something ends with a consonant - if ( - (pashtoConsonants.includes(pEnd) || word.f.slice(-1) === "w") && - !animate - ) { - return { - vocative, - inflections: inflectRegularInanMissingAFem(word.p, word.f), - ...plurals, - }; - } - if (pEnd === "ي" && !animate) { - return { - inflections: inflectRegularInanEeFem(word.p, word.f), - vocative, - ...plurals, - }; - } - if (pEnd === "ۍ") { - return { - inflections: inflectRegularUyFem(word.p, word.f), - vocative, - ...plurals, - }; - } - // if (endingInAlefRegex.test(word.p)) { - // return { inflections: inflectRegularAaFem(word.p, f) }; - // } - return plurals || vocative - ? { - ...(plurals ? plurals : {}), - ...(vocative - ? { - vocative, - } - : {}), - } - : false; -} - -// LEVEL 3 FUNCTIONS -function inflectIrregularUnisex( - p: string, - f: string, - inflections: Array<{ p: string; f: string }> -): T.Inflections { - const inf1 = removeAccents(inflections[1]); - const inf0 = removeAccents(inflections[0]); - const inf0fSyls = splitUpSyllables(inf0.f).length; - return { - masc: [ - [{ p, f }], - [ - { - p: inflections[0].p, - f: `${inf0.f.slice(0, -1)}${inf0fSyls === 1 ? "u" : "ú"}`, - }, - ], - [{ p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}` }], - ], - fem: [ - [{ p: `${inf1.p}ه`, f: `${inf1.f}${inf0fSyls === 1 ? "a" : "á"}` }], - [{ p: `${inf1.p}ې`, f: `${inf1.f}${inf0fSyls === 1 ? "e" : "é"}` }], - [{ p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}` }], - ], - }; -} - +// TODO: REMOVE THIS export function inflectRegularYayUnisex( p: string, f: string @@ -336,6 +108,7 @@ export function inflectRegularYayUnisex( }; } +// TODO: REMOVE THIS export function inflectRegularShwaEndingUnisex( pr: string, fr: string @@ -358,214 +131,6 @@ export function inflectRegularShwaEndingUnisex( }; } -function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections { - const baseP = p.slice(0, -1); - const baseF = f.slice(0, -2); - return { - masc: [ - [{ p, f }], - [{ p: `${baseP}ي`, f: `${baseF}ée` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}ó` }, - ], - ], - fem: [ - [{ p: `${baseP}ۍ`, f: `${baseF}úy` }], - [{ p: `${baseP}ۍ`, f: `${baseF}úy` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}ó` }, - ], - ], - }; -} - -function inflectPattern1Masc(e: T.PsString): T.InflectionSet { - const shwaEnding = hasShwaEnding(e); - const base = applyPsString( - { - f: (x) => (countSyllables(e) === 1 ? accentFSylsOnNFromEnd(x, 0) : x), - }, - mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) - ); - if (e.f.endsWith("ú")) { - return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}ó` }]]; - } - return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}o` }]]; -} - -function inflectConsonantEndingUnisex( - p: string, - f: string -): T.UnisexInflections { - const fSyls = splitUpSyllables(removeAccents(f)); - const iBase = - fSyls.length === 1 - ? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0)) - : makePsString(p, f); - return { - masc: [[{ p, f }], [{ p, f }], [{ p: `${iBase.p}و`, f: `${iBase.f}o` }]], - fem: [ - [{ p: `${iBase.p}ه`, f: `${iBase.f}a` }], - [{ p: `${iBase.p}ې`, f: `${iBase.f}e` }], - [{ p: `${iBase.p}و`, f: `${iBase.f}o` }], - ], - }; -} - -function inflectRegularYayMasc(p: string, f: string): T.Inflections { - const baseP = p.slice(0, -1); - const baseF = f.slice(0, -2); - return { - masc: [ - [{ p, f }], - [{ p: `${baseP}ي`, f: `${baseF}ee` }], - [ - { p: `${baseP}یو`, f: `${baseF}iyo` }, - { p: `${baseP}و`, f: `${baseF}o` }, - ], - ], - }; -} - -function inflectTobMasc(p: string, f: string): T.Inflections { - const base = removeAccents( - mapPsString((x) => x.slice(0, -3), makePsString(p, f)) - ); - return { - masc: [ - [{ p, f }], - [{ p: `${base.p}تابه`, f: `${base.f}taabú` }], - [{ p: `${base.p}تبو`, f: `${base.f}tábo` }], - ], - }; -} - -function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections { - const baseP = p.slice(0, -1); - const baseF = f.slice(0, -2); - - if (countSyllables(makePsString(p, f)) === 1) { - return { - masc: [ - [{ p, f }], - [{ p: `${baseP}ي`, f: `${baseF}ee` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}o` }, - ], - ], - }; - } - return { - masc: [ - [{ p, f }], - [{ p: `${baseP}ي`, f: `${baseF}ée` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}ó` }, - ], - ], - }; -} - -function inflectIrregularMasc( - p: string, - f: string, - inflections: Array<{ p: string; f: string }> -): T.Inflections { - let inf0f = removeAccents(inflections[0].f); - const inf0syls = splitUpSyllables(f).length; - const inf1f = removeAccents(inflections[1].f); - return { - masc: [ - [{ p, f }], - [ - { - p: inflections[0].p, - f: `${inf0f.slice(0, -1)}${inf0syls === 1 ? "u" : "ú"}`, - }, - ], - [ - { - p: `${inflections[1].p}و`, - f: `${inf1f}${inf0syls === 1 ? "o" : "ó"}`, - }, - ], - ], - }; -} - -function inflectRegularAFem(p: string, f: string): T.Inflections { - const withoutTrailingComma = ["'", "’"].includes(f.slice(-1)) - ? f.slice(0, -1) - : f; - const accentLast = hasAccents(withoutTrailingComma.slice(-1)); - const baseF = withoutTrailingComma.slice(0, -1); - const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1); - return { - fem: [ - [{ p, f }], - [{ p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}` }], - [{ p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}` }], - ], - }; -} - -function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections { - const baseF = f.slice(0, -1); - return { - fem: [ - [{ p, f }], - [{ p: `${p}ې`, f: `${baseF}e` }], - [{ p: `${p}و`, f: `${baseF}o` }], - ], - }; -} - -function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections { - const fBase = - splitUpSyllables(f).length === 1 ? accentFSylsOnNFromEnd(f, 0) : f; - return { - fem: [ - [{ p, f }], - [{ p: `${p}ې`, f: `${fBase}e` }], - [{ p: `${p}و`, f: `${fBase}o` }], - ], - }; -} - -function inflectRegularInanEeFem(p: string, f: string): T.Inflections { - const baseP = p.slice(0, -1); - const baseF = f.slice(0, -2); - return { - fem: [ - [{ p, f }], - [{ p: `${baseP}ۍ`, f: `${baseF}úy` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}ó` }, - ], - ], - }; -} - -function inflectRegularUyFem(p: string, f: string): T.Inflections { - const baseP = p.slice(0, -1); - const baseF = removeAccents(f.slice(0, -2)); - return { - fem: [ - [{ p, f: `${baseF}úy` }], - [{ p, f: `${baseF}úy` }], - [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}ó` }, - ], - ], - }; -} - function makePashtoPlural( word: T.DictionaryEntryNoFVars ): T.PluralInflections | undefined { diff --git a/src/lib/src/type-predicates.ts b/src/lib/src/type-predicates.ts index 5df4d27f..57ff8cce 100644 --- a/src/lib/src/type-predicates.ts +++ b/src/lib/src/type-predicates.ts @@ -98,7 +98,7 @@ export function isMascNounEntry(e: T.InflectableEntry): e is T.MascNounEntry { return !!e.c && e.c.includes("n. m."); } -export function isFemNounEntry(e: T.InflectableEntry): e is T.FemNounEntry { +export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry { return !!e.c && e.c.includes("n. f."); } diff --git a/src/lib/src/vocatives.ts b/src/lib/src/vocatives.ts deleted file mode 100644 index 489e34e0..00000000 --- a/src/lib/src/vocatives.ts +++ /dev/null @@ -1,371 +0,0 @@ -import * as T from "../../types"; -import { makePsString } from "./accent-and-ps-utils"; -import { - accentIsOnEnd, - accentOnNFromEnd, - countSyllables, - removeAccents, -} from "./accent-helpers"; -import { applyPsString, mapGen } from "./fp-ps"; -import { getInflectionPattern } from "./inflection-pattern"; -import { - endsInConsonant, - endsInTob, - hasShwaEnding, - mapPsString, - endsWith, -} from "./p-text-helpers"; -import { removeDuplicates } from "./phrase-building/vp-tools"; -import { - isAdjOrUnisexNounEntry, - isAnimNounEntry, - isFemNounEntry, - isInflectableEntry, - isMascNounEntry, - isNounEntry, - isNumberEntry, -} from "./type-predicates"; - -type Plurals = - | { - plural?: T.PluralInflections; - arabicPlural?: T.PluralInflections; - } - | undefined; - -export function getVocatives( - entry: T.DictionaryEntryNoFVars, - plurals: Plurals -): T.PluralInflections | undefined { - if (!isInflectableEntry(entry)) { - return undefined; - } - // @ts-ignore - const e: T.InflectableEntry = entry as T.InflectableEntry; - const pattern = getInflectionPattern(e); - if ( - pattern === 0 && - isFemNounEntry(e) && - isAnimNounEntry(e) && - endsInConsonant(e) - ) { - return vocFemAnimException({ e, plurals: genderPlural("fem", plurals) }); - } - const gender: T.Gender | "unisex" = - isAdjOrUnisexNounEntry(e) || isNumberEntry(e) - ? "unisex" - : isMascNounEntry(e) - ? "masc" - : "fem"; - if (pattern === 0 || pattern === 6) { - return undefined; - } - const funcs = patternFuncs[pattern]; - if (gender === "masc") { - return { - masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }), - }; - } - if (gender === "fem") { - return { - fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }), - }; - } - if (gender === "unisex") { - return { - masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }), - fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }), - }; - } -} - -type PatternInput = { - e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry; - plurals: T.PsString[]; -}; - -const patternFuncs: Record< - 1 | 2 | 3 | 4 | 5, - Record T.PluralInflectionSet> -> = { - 1: { - masc: vocPattern1Masc, - fem: vocPattern1Fem, - }, - 2: { - masc: vocPattern2Masc, - fem: vocPattern2Fem, - }, - 3: { - masc: vocPattern3Masc, - fem: vocPattern3Fem, - }, - 4: { - masc: vocPattern4Masc, - fem: vocPattern4Fem, - }, - 5: { - masc: vocPattern5Masc, - fem: vocPattern5Fem, - }, -}; - -function addPlurals( - e: T.ArrayOneOrMore, - plurals: T.PsString[] -): T.ArrayOneOrMore { - if (!plurals) { - return e; - } - return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore; -} - -function vocFemAnimException({ - e, - plurals, -}: PatternInput): T.PluralInflections { - if (!e.ppp || !e.ppf) { - throw new Error( - "plural missing for feminine animate exception noun " + e.p - ); - } - // TODO: HANDLE BETTER WITH PLURALS! - const plurBase = mapPsString( - (x) => x.slice(0, -1), - makePsString(e.ppp, e.ppf) - ); - const base = - countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f); - return { - fem: [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals), - ], - }; -} - -function vocPattern1Masc({ e, plurals }: PatternInput): T.PluralInflectionSet { - if (isNounEntry(e) && endsInTob(e)) { - const base = mapPsString((x) => x.slice(0, -3), e); - return [ - [{ p: `${e.p}ه`, f: `${e.f}a` }], - addPlurals([{ p: `${base.p}تبو`, f: `${base.f}tábo` }], plurals), - ]; - } - const shwaEnding = hasShwaEnding(e); - const base = mapGen( - (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), - mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) - ); - if (shwaEnding && e.f.endsWith("ú")) { - return [ - [{ p: `${base.p}ه`, f: `${base.f}á` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals), - ]; - } - return [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; -} - -function vocPattern1Fem({ e, plurals }: PatternInput): T.PluralInflectionSet { - const shwaEnding = hasShwaEnding(e); - const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding; - const base = mapGen( - (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), - hasFemEnding - ? mapPsString((x) => x.slice(0, -1), e) - : makePsString(e.p, e.f) - ); - if ( - endsWith( - [ - { p: "ع", f: "a" }, - { p: "ع", f: "a'" }, - ], - e - ) - ) { - const base = applyPsString( - { - f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1), - }, - e - ); - if (accentIsOnEnd(e)) { - return [ - [{ p: `${base.p}ې`, f: `${base.f}é` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals), - ]; - } - return [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; - } - if (endsWith([{ p: "ح", f: "a" }], e)) { - const base = applyPsString( - { - f: (f) => f.slice(0, -1), - }, - e - ); - if (accentIsOnEnd(e)) { - return [ - [{ p: `${base.p}ې`, f: `${base.f}é` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals), - ]; - } - return [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; - } - if (hasFemEnding && accentIsOnEnd(e)) { - return [ - [{ p: `${base.p}ې`, f: `${base.f}é` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals), - ]; - } - return [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; -} - -function vocPattern2Masc({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2)); - return [ - [{ p: `${base.p}یه`, f: `${base.f}iya` }], - addPlurals( - [ - { p: `${base.p}یو`, f: `${base.f}iyo` }, - { p: `${base.p}و`, f: `${base.f}o` }, - ], - plurals - ), - ]; -} - -function vocPattern2Fem({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString( - e.p.slice(0, -1), - e.f.slice(0, e.f.endsWith("ay") ? -2 : -1) - ); - return [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals( - [ - { p: `${base.p}یو`, f: `${base.f}iyo` }, - { p: `${base.p}و`, f: `${base.f}o` }, - ], - plurals - ), - ]; -} - -function vocPattern3Masc({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString( - e.p.slice(0, -1), - // shouldn't be accents here but remove just to be sure - removeAccents(e.f.slice(0, -2)) - ); - const baseSyls = countSyllables(base); - return [ - [{ p: `${base.p}یه`, f: `${base.f}úya` }], - addPlurals( - [ - { p: `${base.p}یو`, f: `${base.f}úyo` }, - { p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` }, - ], - plurals - ), - ]; -} - -function vocPattern3Fem({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString( - e.p.slice(0, -1), - // shouldn't be accents here but remove just to be sure - removeAccents(e.f.slice(0, -2)) - ); - // TODO: This works well for unisex nouns/adjs but would be redundant for fem. nouns? - return [ - [{ p: `${base.p}ۍ`, f: `${base.f}úy` }], - addPlurals( - [ - { p: `${base.p}یو`, f: `${base.f}úyo` }, - { p: `${base.p}و`, f: `${base.f}ó` }, - ], - plurals - ), - ]; -} - -function vocPattern4Masc({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; - const plurBase = makePsString(e.infbp || "", e.infbf || ""); - if (endsInConsonant(e)) { - return [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals), - ]; - } - // TODO: is this even possible? - if (hasShwaEnding(e)) { - return [ - [{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }], - addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals), - ]; - } - // exception for مېلمه, کوربه - return [[{ p: e.p, f: e.f }], [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }]]; -} - -function vocPattern4Fem({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString(e.infbp || "", e.infbf || ""); - return [ - [{ p: `${base.p}ې`, f: `${base.f}é` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals), - ]; -} - -function vocPattern5Masc({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString(e.infbp || "", e.infbf || ""); - return [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; -} - -function vocPattern5Fem({ e, plurals }: PatternInput): T.PluralInflectionSet { - const base = makePsString(e.infbp || "", e.infbf || ""); - return [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals), - ]; -} - -function genderPlural(gender: T.Gender, plurals: Plurals): T.PsString[] { - if (!plurals) return []; - if (gender === "masc") { - return [ - ...(plurals.plural && "masc" in plurals.plural - ? plurals.plural.masc[1] - : []), - ...(plurals.arabicPlural && "masc" in plurals.arabicPlural - ? plurals.arabicPlural.masc[1] - : []), - ]; - } else { - return [ - ...(plurals.plural && "fem" in plurals.plural - ? plurals.plural.fem[1] - : []), - ...(plurals.arabicPlural && "fem" in plurals.arabicPlural - ? plurals.arabicPlural.fem[1] - : []), - ]; - } -}