Skip to content

Commit

Permalink
litt disambigueringsarbeid
Browse files Browse the repository at this point in the history
  • Loading branch information
leneantonsen committed Nov 28, 2024
1 parent 06a0e59 commit 8b75db6
Showing 1 changed file with 68 additions and 21 deletions.
89 changes: 68 additions & 21 deletions src/cg3/disambiguator.cg3
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ LIST <vdic> = <vdic> ; #!! - `<vdic>`
LIST Sem/Act = Sem/Act Sem/Act_Clth Sem/Act_Domain Sem/Act_Domain_Measr Sem/Act_Event Sem/Act_Feat Sem/Act_Feat-psych Sem/Act_Fruit Sem/Act_Group Sem/Act_Hum Sem/Act_Hum_Obj Sem/Act_Money Sem/Act_Obj Sem/Act_Obj-play Sem/Act_Org Sem/Act_Perc-emo Sem/Act_Plc Sem/Act_Prod-audio Sem/Act_Prod-vis Sem/Act_Route Sem/Act_Semcon Sem/Act_State Sem/Act_Time Sem/Act_Tool-it Sem/Act_Txt ;
LIST Sem/Adr = Sem/Adr ;
LIST Sem/Amount = Sem/Amount Sem/Amount_Build Sem/Amount_Semcon ;
LIST Sem/Ani = SSem/Ani Sem/Ani_Body Sem/Ani_Body-abstr_Hum Sem/Ani_Build Sem/Ani_Build_Hum_Txt Sem/Ani_Buildpart Sem/Ani_Cat Sem/Ani_Clth Sem/Ani_Feat_Hum Sem/Ani_Feat_Plant Sem/Ani_Food Sem/Ani_Group Sem/Ani_Group_Hum Sem/Ani_Group_Prod-vis Sem/Ani_Hum Sem/Ani_Hum_Plc Sem/Ani_Hum_Time Sem/Ani_Obj Sem/Ani_Obj-el Sem/Ani_Org Sem/Ani_Plc Sem/Ani_Plc_Txt Sem/Ani_State Sem/Ani_Substnc Sem/Ani_Time Sem/Ani_Tool Sem/Ani_Veh ;
LIST Sem/Ani = Sem/Ani Sem/Ani_Body Sem/Ani_Body-abstr_Hum Sem/Ani_Build Sem/Ani_Build_Hum_Txt Sem/Ani_Buildpart Sem/Ani_Cat Sem/Ani_Clth Sem/Ani_Feat_Hum Sem/Ani_Feat_Plant Sem/Ani_Food Sem/Ani_Group Sem/Ani_Group_Hum Sem/Ani_Group_Prod-vis Sem/Ani_Hum Sem/Ani_Hum_Plc Sem/Ani_Hum_Time Sem/Ani_Obj Sem/Ani_Obj-el Sem/Ani_Org Sem/Ani_Plc Sem/Ani_Plc_Txt Sem/Ani_State Sem/Ani_Substnc Sem/Ani_Time Sem/Ani_Tool Sem/Ani_Veh ;
LIST Sem/Ani-bird = Sem/Ani-bird ;
LIST Sem/Ani-fish = Sem/Ani-fish ;
LIST Sem/Ani-insect = Sem/Ani-insect ;
Expand Down Expand Up @@ -603,7 +603,7 @@ LIST DAESNIE = "daesnie" "desnie" "duesnie" "dusnie" "åvtesne" "sjisjnjielisnie
LIST DAESTIE = "daestie" "destie" "duestie" "dustie" "åvteste" "sjisjnjielistie" "dustie" "miehtjielistie" "dubpielistie" "jilliebistie" "luvlielistie" "ålkoelistie" "gustie" "luvliegistie" "åarjalistie" "minngielistie" "bijjielistie" "sisnjielistie" "åarjielistie" "åerjielistie" "vuelielistie" "debpielistie" "duebpielistie" "åarjelistie" "noerhtelistie" "jillielistie" "luvliebistie" "vueliebistie" ; #!! DAESTIE


LIST ILLADV = "bæjjese" "davvese" "daase" "dahkoe" "diekie" "doekoe" "dohkoe" "gåatan" "jallese" "mænngese" "noerhtese" "olkese" "vueliebasse" "våålese" "åarjese" "åvtese" ; #!! ILLADV
LIST ILLADV = "bæjjese" "davvese" "daase" "dahkoe" "diekie" "doekoe" "dohkoe" "gåatan" "gåetide" "jallese" "mænngese" "noerhtese" "olkese" "vueliebasse" "våålese" "åarjese" "åvtese" ; #!! ILLADV

LIST INEADV1 = "daebpene" "debpene" "duebpene" "dubpene" "daagkoe" "dagkoe" "doegkoe" "dogkoe" "daajkoe" "dajkoe" "doejkoe" "dojkoe" ; #!! INEADV1

Expand Down Expand Up @@ -863,7 +863,7 @@ SELECT:R_Nom_ABBR_Prop Attr IF (0 ABBR + Attr OR Prop + Attr)(1 Sem/Sur) ;
SELECT:R_Nom_ABBR_Prop Attr IF (0C ABBR + Attr OR Prop + Attr) ;
REMOVE:R_Attr_ABBR Attr IF (0 ABBR + Nom)(NOT 1 Prop OR ABBR) ;
## Trond Trosterud
REMOVE:Ani_Attr Attr IF (0 (Prop Sem/Ani)) ;
REMOVE:Ani_Attr Attr IF (0 Prop + Sem/Ani) ;

LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "á" "æ" "ø" "å" "ö" "ä" "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "Á" "Æ" "Ø" "Å" "Ö" "Ä" ; #!! INITIAL
LIST NOT-SUR = "Galilea" "Judea" "Karasjok" "Noerhte-Trøndelag" "Saemie" "Trøndelag" "Tromsø" "Vuaktere" ;
Expand Down Expand Up @@ -940,7 +940,7 @@ SELECT:PoGenb Po (-1 Gen) ;
#!! ### Particles and adverbs

REMOVE:Pcle2 Adv IF (-2 BOS)(-1 Adv OR Ine)(0 Pcle);
REMOVE:AttrAdv Adv IF (0 N LINK -1 Attr - Prop) (NEGATE 0 ("vihth")) ;
REMOVE:AttrAdv Adv IF (0 N LINK -1C Attr - Prop) (NEGATE 0 ("vihth")) ;

SELECT:vaejtie Pcle (0 ("vaejtie") LINK *-1 BOS BARRIER WORD)(1 Pers) ;
#$ Vaejtie mijjieh, gaajhkesh låhkesh, maehtebe leessemebïjlesne åeredh?
Expand Down Expand Up @@ -1008,6 +1008,10 @@ SELECT:AdvFoc Adv (0 (Foc/ge)) ;
SELECT:AifGrade A (0 N LINK -1 GRADE-ADV) ;
#$ Joekoen bïjvele lea jïh guakedh.

SELECT:AifCoor A (0 N LINK -1 CC LINK -1 A) ;
SELECT:AifCoor A (0 N LINK 1 CC LINK 1 A) ;
#$ Men bïjvele jïh njaelkies ...


#!! ### Adjective or Indef
SELECT:jijnjeIndef Indef (0 ("jïjnje") LINK 1 N OR REAL-V OR EOS) ;
Expand Down Expand Up @@ -1119,6 +1123,8 @@ SELECT:PronPers Pron + Gen (0 Pron LINK 1C Po) ; #dan åvteste is mostly Dem, re
## Johan jih aahkam joekoen gaskese dan åvteste satnem veelji.
#REMOVE:PcleNotPron ("dån" Pcle) (0 Pron + Gen LINK 1 Po) ; # trengs ikke
#REMOVE:AttrNotGen Attr (0 Pron + Gen LINK 1 Po) ; # trengs ikke
SELECT:NGen N + Gen (0 Sem/Ani LINK 1 Sem/Body) ;
#$ Pluppe vïenhti tjetskien siejpiegietjiem vööjni.


#!! ### Adjective or not
Expand All @@ -1135,6 +1141,10 @@ SELECT:jallh (CC) (0 ("jallh"))(-1 BOS OR COMMA)(NEGATE 1 CC OR Pl3 OR Ine OR CO
SELECT:Acase $$CASES (0 A LINK -1 Dem + $$CASES)(NEGATE 0 Attr) ;
#$ Im dom guhkebem vielie vuejnieh.

SELECT:manA A + Nom (-1 ("man")) ;
#$ Jemmede man sjuevnjede!


#!! Rel or Interr OR Indef

SELECT:Indef Indef (0 ("mij") LINK -1 Neg) ;
Expand Down Expand Up @@ -1211,7 +1221,7 @@ REMOVE:PrtInNeg Prs IF (0 Neg)(*1C ConNeg + Prt BARRIER NOT-ADV);
REMOVE:PrsInNeg Prt IF (0 Neg)(*1C ConNeg BARRIER NOT-ADV LINK NOT 0 Prt);

SELECT:PersonInNeg $$PERS-TAG (0 Neg + Prt)(*1C $$PERS-TAG + ConNeg BARRIER NOT-ADV);
REMOVE:NegPrt (Neg Prt)(NEGATE 1 ("lea") + ConNeg + Prt) ;
REMOVE:NegPrt (Neg Prt)(NEGATE 1 REALCOPULAS + ConNeg + Prt) ;
## this is for removing the 'ij' etc.
## Ij lin gujht daarah altese vielie jienebh biejjieh daesnie.

Expand All @@ -1222,17 +1232,17 @@ SET VERB-LEMMA = (".*"r) + V ;
SELECT:infNotErr VERB-LEMMA + Inf - Err/Orth IF (0 VERB-LEMMA + VFIN + Err/Orth LINK NOT 0 VFIN - Err/Orth) ;
#$ Bøøremes jis åadtjodh saemie-laanten dåehkesne meatan.

SELECT:leahSg2 Sg2 IF (0 ("lea") + Prs + Sg2)(* ("datne") + Pron + Pers + Sg2 BARRIER S-BOUNDARY);
SELECT:leahSg2 Sg2 IF (0 REALCOPULAS + Prs + Sg2)(* ("datne") + Pron + Pers + Sg2 BARRIER S-BOUNDARY);

SELECT:leahPl3 Pl3 IF (0 ("lea") + Prs + Pl3)(* Pl + Nom OR Pl3 + Nom);
SELECT:leahPl3 Pl3 IF (0 REALCOPULAS + Prs + Pl3)(* Pl + Nom OR Pl3 + Nom);


#!! Selecting imperative sentence-initially with appropriate right context

#REMOVE:NotImpIfWord Imp IF (*-1 NOT-ADV-INT-PCLE BARRIER CS OR CC);
#REMOVE:NotIndIfImpAndExcl Ind IF (NEGATE *-1 Neg)(0 Imp)(*1 ("excl") BARRIER Ind);

SELECT:Imprta Imprt (*-1 BOS OR (":") OR Interj BARRIER WORD)(0 2-PERS)(NEGATE *1 2-PERS + Pers + Nom BARRIER S-BOUNDARY) ;
SELECT:Imprta Imprt (*-1 BOS OR (":") OR Interj BARRIER WORD)(0 2-PERS)(NOT 0 N)(NEGATE *1 2-PERS + Pers + Nom BARRIER S-BOUNDARY) ;
## Utnieh hijven.
#$ Govlede!
SELECT:Imprtb Imprt (-1 BOS OR (":") OR Interj)(1 COMMA) ;
Expand Down Expand Up @@ -1283,7 +1293,12 @@ REMOVE:V VFIN IF (0 A + Nom LINK -1 ("dan")) ;
#!! Select Inf
SELECT:inf Inf IF (0 Pl2 LINK -1 ("guktie") LINK -1 VFIN) ;
## Maanine dle leerebe guktie soptsestidh.
SELECT:inf Inf IF (0 Pl2 LINK NOT *-1 Pers + Pl2) ;
#SELECT:inf Inf IF (0 Pl2 LINK NOT *-1 Pers + Pl2) ;
SELECT:inf Inf IF (*-1 VFIN BARRIER S-BOUNDARY) ;
#$ Lasth eah jaksh gahtjedh eannan tjuatsa.

SELECT:inf Inf IF (*-1 ("buerie") + Superl BARRIER S-BOUNDARY) ;
#$ Bööremes voepth tjelmiej uvte utnedh.

#!! ## Mapping rules
# -------------------
Expand Down Expand Up @@ -1370,7 +1385,7 @@ SELECT:NumAttr Num + Attr + Ine IF (1 Ine) ;

#!! Select PrfPrc if DerNomAct

#SELECT: PrfPrc (*-1 ("lea") BARRIER V OR S-BOUNDARY) ;
#SELECT: PrfPrc (*-1 REALCOPULAS BARRIER V OR S-BOUNDARY) ;
SELECT:PrfPrs_DerNomAct PrfPrc (0 (Der/NomAct) LINK *-1 S-BOUNDARY BARRIER V)(NEGATE *1 REAL-V BARRIER S-BOUNDARY) ;
## Disse maahtam tjaeledh gosse sijhtem, jih aaj lohkedh gosse båarasåbpoe sjidteme.
## Manne aktem gärjam man nomme jupmele-vaerie lohkeme.
Expand Down Expand Up @@ -1460,23 +1475,33 @@ SELECT:VSg2IfPersSg2a V + Sg2 (NOT 0 N) ((*-1 Pers + Nom + Sg2 OR Rel + Sg + Nom
SELECT:Pl3WhenPlSubj V + Pl3 IF ((*-1 N + Pl + Nom OR (Num Sg Nom) OR (Pers Nom Pl3) BARRIER Sg2 OR S-BOUNDARY) OR (-1 N + Sg + Nom LINK -1 CC))(0 VFIN);
#$ Maam dah daelvege darjoeh?

#SELECT:Pl3WhenSubjCC V + Pl3 IF (-1 N + Sg + Nom LINK -1 CC)(0 ("lea" V));
REMOVE:Pl3 V + Pl3 (NEGATE *0 Nom BARRIER S-BOUNDARY) ;
#SELECT:Pl3WhenSubjCC V + Pl3 IF (-1 N + Sg + Nom LINK -1 CC)(0 REALCOPULAS);
#REMOVE:Pl3 V + Pl3 (NEGATE *0 Nom BARRIER S-BOUNDARY) ;
SELECT:Sg3WhenRelSg V + Sg3 ((-1 Rel + Sg + Nom) OR (-1 CC LINK *-1C V + Sg3 BARRIER S-BOUNDARY OR V));
#SELECT:Sg3WhenSg3toLeft V + Sg3 (-1 CC LINK *-1C V + Sg3 BARRIER S-BOUNDARY OR V);
SELECT:Sg3WhenSgtoLeft V + Sg3 (-1 N + Sg + Nom);
#$ Pluppe ohtsede maam akt mij tjaahpesje.


#!! Select Inf If Infv

SELECT:InfIfInfv V + Inf IF (*-1 INFV OR AUX BARRIER V) ;
# Manne båatam sagke buerebe datneste jieledh, Karijuse.
#$ Manne båatam sagke buerebe datneste jieledh, Karijuse.

SELECT:VFIN VFIN IF (0 INFV OR AUX LINK *1 Inf BARRIER NOT-ADV-PCLE) ;
#$ Pryövoeh åeredh, vuertieminie edtja löödtjedh.

SELECT:AUXIfInf AUX IF (*1 Inf BARRIER V)(NEGATE 0 A) ;
##Klaasen tjirrh olkese goevli doj veelkes baeniej gåajkoe, jih dellie maam akt bøøti ussjedidh.
## Klaasen tjirrh olkese goevli doj veelkes baeniej gåajkoe, jih dellie maam akt bøøti ussjedidh.

SELECT:PrfPrcIfLea PrfPrc (*-1 ("lea") OR (",") OR CC BARRIER V);
SELECT:PrfPrcIfLea PrfPrc (*-1 REALCOPULAS OR (",") OR CC BARRIER V);
## Gïjre lea sjïdteme daelvien männgan goh lea nåake gåatome jïh jïjnjh juvrh orreme.

SELECT:PrfPrcIfLea PrfPrc OR VFIN (0 TV LINK *-1 N + Acc BARRIER NOT-ADV-PCLE) ;
#$ Mah datne nænnoeslaakan gåetiem tseegkeme,
#$ Tjeapoeh-lijniem eejnegen åtnam.


SELECT:PlIfPlSubj Pl3 IF (*-1 Pl + Nom BARRIER NOT-ADV OR COMMA);
## Jijnjesh leah sijhteminie edtjibie gieline barkedh, maehtibie tjåahkoeh, seeminarh øørnedh gusnie åadtjoejibie gaavnedidh jih rååresjadtedh.

Expand Down Expand Up @@ -1505,7 +1530,7 @@ REMOVE:PropAttr Prop + Attr IF (NOT 1 Prop);
#SELECT:GenToAdp Gen IF (1 Adp); #NOTUSED # check this
#REMOVE:NotNomToAdp Nom IF (1 Adp); #NOTUSED # check this

SELECT:GenIfLeaNom Gen (1 ("lea" V) LINK 0 Pl3 OR Sg3 LINK *1 N + Nom BARRIER NOT-NPMOD) ;
SELECT:GenIfLeaNom Gen (1 REALCOPULAS LINK 0 Pl3 OR Sg3 LINK *1 N + Nom BARRIER NOT-NPMOD) ;
# Dan lin baenieh njaelmesne.

SELECT:AccIfSVOV (Acc Sg) IF (*-1C INFV BARRIER NOT-NPMOD)(1C V); # consider VFIN for INFV
Expand Down Expand Up @@ -1534,14 +1559,24 @@ REMOVE:SgGen Gen (0 Ill + Sg LINK 1 FMAINV) ;
REMOVE:SgIll Ill (0 Adv LINK 1 FMAINV LINK NOT 0 MOVEMENT-V) ;
#$ varki gåatan bïesem jïh aaj mov daelvie-dogkesh gåatan fihkem.

SELECT:NomifLeaADV Nom (1 REALCOPULAS LINK 0 Pl3 OR Sg3) (-1 DAESNIE) ;
#$ Tjetskie jïh Sluemege Pluppen luvnie derhviegåetesne, desnie suaja lea.

SELECT:NomifVFIN Nom + Pl (*1 VFIN + Pl3 BARRIER NOT-ADV-PCLE) ;
#$ Goeksegh jïjjege buelieh.


SELECT:IneIfEss/Com Ine (0 N + Ess OR N + Com) (1 ("viedtedh" V* TV Der1 Der/ldahke N Sg Ine) OR ("vïedteldahke" N Sg Ine)) ;
# Njolkedassh ålmine veadtaldahkesne
# Njolkedassh ålmine veadtaldahkesne

SELECT:IneIfEss/Com N + Pl + Ine (0 Ess OR Sg + Com) (-1 ("ovmessie")) ;

SELECT:IneIfPlc Pl + Ine (0 Sem/Plc) (*1 ("gaavnedh") BARRIER SV-BOUNDARY) ;
#$ Bïevline tjeehpes muerjieh gaavnem...


REMOVE:EssIfCom/Ine Ess (0 Sg + Com OR Pl + Ine) (*-1 Pron + Com OR Pron + Ine BARRIER NOT-NPMOD OR S-BOUNDARY) ;
# dajnie barkoeplaanine
# dajnie barkoeplaanine

SELECT:NPlAcc Acc (0 Pl + Acc OR Pron + Pl OR Pron + Pl3 LINK *1 V-TRANS BARRIER NOT-ADV LINK NOT 0 ("böötedh")) ;
SELECT:NPlAcc Acc (0 Pl + Acc OR Pron + Pl OR Pron + Pl3 LINK *1 COPULAS BARRIER NOT-ADV LINK *1 V-TRANS BARRIER NOT-ADV LINK NOT 0 ("böötedh")) ;
Expand All @@ -1557,7 +1592,7 @@ SELECT:NPlIll Ill (0 N + Acc) (NEGATE *0 V-TRANS BARRIER SV-BOUNDARY) ;

#!! ### Verb or Noun

SELECT:PlNomObj N + Pl + Nom IF (0 V + Pl3) (NEGATE *0 VFIN BARRIER NOT-NPMOD OR S-BOUNDARY) ;
#SELECT:PlNomObj N + Pl + Nom IF (0 V + Pl3) (NEGATE *0 VFIN BARRIER NOT-NPMOD OR S-BOUNDARY) ;
#...Henny Bergsland, mij vuelieh tjeeli mejtie Jonetta joejki

REMOVE:NotNomIfInf N + Pl + Nom IF (0 Inf)(*-1 Ind or Imp LINK *-1 Nom);
Expand Down Expand Up @@ -1841,7 +1876,7 @@ REMOVE:Pl2_Not_Inf Inf IF (0 (V TV Ind Prt Pl2))(*-1 Pron + Pl2 + Nom BARRIER VF

# Even stronger. No pro-drop. ## WARNING! This is probably too strong.
REMOVE:Inf_Not_Sg2_ProDrop (V TV Der1 Der/d Ind Prs Sg2) IF (NEGATE *-1 Pron + Sg2 + Nom);
REMOVE:Inf_Not_Pl2_ProDrop (V TV Ind Prt Pl2) IF (NEGATE *-1 Pron + Pl2 + Nom);
#REMOVE:Inf_Not_Pl2_ProDrop (V TV Ind Prt Pl2) IF (NEGATE *-1 Pron + Pl2 + Nom);

SELECT:PrsPrc PrsPrc IF (0 A + Nom LINK 1 N) ;
REMOVE:PrsPrc PrsPrc IF (0 N + Nom LINK NOT 1 N) ;
Expand Down Expand Up @@ -1959,7 +1994,7 @@ SELECT:rel Rel (0 ("guhte") LINK *-1 VFIN)(*1 VFIN) ;

# verb lemmas

SELECT:notErr VERB-LEMMA - Err/Orth IF (0 VERB-LEMMA + Err/Orth) ;
REMOVE:Err VERB-LEMMA + Err/Orth IF (0 VERB-LEMMA - Err/Orth) ;
#$ Veanhta påajhkh eeremes lyjhkieh niejth mej guhkies tjaebpies voepth. # veanhtedh vs vïenhtedh

# bïssedh OR bissedh
Expand All @@ -1978,6 +2013,12 @@ REMOVE:LEXböötedh ("böötedh") IF (0 ("båetedh")) (NEGATE *-1 Acc OR Nom + P
#$ Ealla lij desnie boelhketjem goh lohkehtæjja-skuvleste bøøti.


# "gåatodh" OR "gaatodh"
SELECT:LEXgåatodh ("gåatodh") IF (0 ("gaatodh")) (*0 Sem/Ani + Nom) ;
SELECT:LEXgaatodh ("gaatodh") IF (0 ("gåatodh")) (*0 ("biejjie") + Nom) ;
#$ Biejjie gååte.


# "gïesedh" OR "giesedh"
SELECT:LEXgiesedh ("giesedh") IF (0 ("gïesedh")) (-1 ("bijjelen")) ;
#$ Jaavoeladtje jïh Ijjeladtje bïllijieh, jïh gajpem jïh tjohpem tjelmiej bijjelen giesieh.
Expand All @@ -1989,6 +2030,10 @@ REMOVE:govledhIV IV IF (0 ("govledh") LINK *-1C Acc OR ("sïjhtedh") BARRIER S-B
#$ Sijhti aaj bieljide dompesjidh juktie idtji maam vielie dan bijre sijhth govledh.


SELECT:LEXlutnjestidh ("lutnjestidh") IF (0 ("lutnjiestidh")) ;
SELECT:LEXsvïhtjedh ("svïhtjedh") IF (0 ("svihtjedh")) ;


# "löökedh" OR "luekedh"
SELECT:LEXlöökedh ("löökedh") IF (0 ("luekedh")) ((-1 Sem/Body) OR (1 Sem/Body)) ;

Expand Down Expand Up @@ -2051,6 +2096,8 @@ SELECT:N A + Nom (0 N + Nom OR VFIN LINK 1 ("sjïdtedh")) ;
#$ Gosse aalka sjuevnjede sjïdtedh akte krokodilla baaktoe båata.
REMOVE:N N + Ine (0 A + Ine OR A + Ess LINK *1 ("årrodh")) ;

REMOVE:APl A + Pl (0 A + Sg) ;
#$ Man sjeavehth dle sjïdti!

SELECT SEMTAGS + N (0C N) ;
SELECT SUB:1 SEMTAGS + (cmp_attr) ; #For Apertium
Expand Down

0 comments on commit 8b75db6

Please sign in to comment.