Skip to content

Commit

Permalink
small fixes in the deirab/unmark routine
Browse files Browse the repository at this point in the history
  • Loading branch information
noureddin committed Aug 27, 2024
1 parent 59c53ef commit 91bfdee
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
14 changes: 9 additions & 5 deletions .quran.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ const aayaat_logic = (function () {
const unmark = (phrase) => phrase
// remove mosħaf formatting signs
.r(/\xa0\u06dd[٠-٩]+(?:\xa0\u06e9)?/, '') // ayah number & sajda if any
.r(/\u06de\xa0/, '') // start of rub el hizb if found
.r(/[\u06D6-\u06DC] /, '') // waqf signs
.r(/\u0305/g, '') // combining overline
.r(/^(.)\u0651/g, '$1') // initial shadda-of-idgham
.r(/\u06de\xa0/, '') // start of rub el hizb if found
.r(/[\u06D6-\u06DC] /, '') // waqf signs
.r(/\u0305/g, '') // combining overline
.r(/^(.)\u0651/g, '$1') // initial shadda-of-idgham
// remove final tashkeel signs (except shadda)
.r(/[\u06e4-\u06e6]+$/g, '') // madd-monfasel & madd sela
.r(/\u06e1$/, '') // jazm (quranic sukun)
Expand All @@ -15,11 +15,15 @@ const unmark = (phrase) => phrase
.r(/[\u08f1\u08f2]$/, '') // open tanween {damm, kasr}
.r(/\u064f\u06e2$/, '') // iqlab tanween damm
.r(/\u0650\u06ed$/, '') // iqlab tanween kasr
.r(/\u06e2$/, '') // iqlab on final noon
.r(/(ا)\u06df$/, '$1') // remove rounded zero from final alef
.r(/(ى)\u0670$/, '$1') // remove dagger alef from final alef maqsura
// (its existence depends on the first letter of the next word)
.r(/\u064b([اى]?)$/, '$1') // tanween fath
.r(/\u08f0([اى]?)$/, '$1') // open tanween fath
.r(/\u064e\u06e2([اى]?)$/, '$1') // iqlab tanween fath
.r(/\u064e([اى]?)$/, '$1') // just fath, before final alef (either kind), because of tanween (eg, إذا)
.r(/(ى)\u0670$/, '$1') // dagger alef from final alef maqsura (its existence depends on the first letter of the next word)
.r(/\u06e4(ا)$/, '$1') // madd before the final silent alef after waw

function phrasify ([ title, aayaat ]) {
// equiv. to: return ayat.flatMap(a => a.split(/(?<=[\u06D6-\u06DC] |\n)/))
Expand Down
Loading

0 comments on commit 91bfdee

Please sign in to comment.