Skip to content

Commit

Permalink
[uptex] set latin_ucs_flag 0x800000 for latin_ucs node (#170)
Browse files Browse the repository at this point in the history
  • Loading branch information
t-tk committed Jun 24, 2024
1 parent 55f68df commit 6a2a3a2
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
17 changes: 10 additions & 7 deletions source/texk/web2c/uptexdir/kanji.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#define CS_TOKEN_FLAG 0x1FFFFFFF
#define IVS_CHAR_LIMIT 0x4400000
#define CJK_CHAR_LIMIT 0x1000000
#define UCS_CHAR_LIMIT 0x120000
#define LATIN_CHAR_LIMIT 0x2E80
#define LATIN_UCS_FLAG 0x800000
#define CJK_TOKEN_FLAG 0xFFFFFF
#define CAT_LEFT_BRACE 1
#define CAT_DELIM_NUM 15
#define KCAT_KANJI 16
#define KCAT_MODIFIER 20
Expand All @@ -25,9 +25,9 @@ boolean check_kanji (integer c)

c0 = c & CJK_TOKEN_FLAG;
c1 = XXHi(c);
if (c1>=CAT_LEFT_BRACE && c1<=CAT_DELIM_NUM &&
c0 < UCS_CHAR_LIMIT) {
return is_char_kanji(c0);
if (c1<=CAT_DELIM_NUM && (c0 & LATIN_UCS_FLAG) &&
(c0 - LATIN_UCS_FLAG) < LATIN_CHAR_LIMIT) { /* kcatcode latin_ucs */
return is_char_kanji(c0 - LATIN_UCS_FLAG);
}
else if (c1>=KCAT_KANJI && c1<=KCAT_MODIFIER) {
return is_char_kanji(c0);
Expand All @@ -45,7 +45,7 @@ boolean is_char_ascii(integer c)

boolean is_char_kanji(integer c)
{
if (is_internalUPTEX())
if (is_internalUPTEX())
return ((c >= 0)&&(c<IVS_CHAR_LIMIT));
else
return iskanji1(Hi(c)) && iskanji2(Lo(c));
Expand Down Expand Up @@ -86,8 +86,11 @@ integer ktoken_to_chr(integer c)
{
if (c > KCAT_KANJI_IVS * CJK_CHAR_LIMIT)
return (c - KCAT_KANJI_IVS * CJK_CHAR_LIMIT);
else
else {
if (XXHi(c)<=CAT_DELIM_NUM && (c & LATIN_UCS_FLAG))
c = c - LATIN_UCS_FLAG;
return (c % CJK_CHAR_LIMIT);
}
}

/* Ref. http://www.unicode.org/Public/UNIDATA/Blocks.txt */
Expand Down
13 changes: 7 additions & 6 deletions source/texk/web2c/uptexdir/uptex-m.ch
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ else if (kcode_pos=1)or((kcode_pos>=@'11)and(kcode_pos<=@'12))
@d max_ivs_val=@"4400000 {to separate wchar with ivs and kcatcode}
@d max_ucs_val=@"110000 {largest Unicode Scalar Value}
@d max_latin_val=@"2E80
@d latin_ucs_flag=@"800000
@z
@x
Expand Down Expand Up @@ -638,7 +639,7 @@ if cat=other_kchar then k:=k-multilenbuffchar(cur_chr)+1; {now |k| points to fir
else
cur_tok:=(cur_cmd*max_cjk_val)+cur_chr
else if (cur_cmd=latin_ucs) then
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr+latin_ucs_flag
else cur_tok:=(cur_cmd*max_char_val)+cur_chr
@z
Expand Down Expand Up @@ -673,7 +674,7 @@ if cat=other_kchar then k:=k-multilenbuffchar(cur_chr)+1; {now |k| points to fir
else
cur_tok:=(cur_cmd*max_cjk_val)+cur_chr
else if (cur_cmd=latin_ucs) then
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr+latin_ucs_flag
else cur_tok:=(cur_cmd*max_char_val)+cur_chr
@z
Expand All @@ -688,7 +689,7 @@ if cat=other_kchar then k:=k-multilenbuffchar(cur_chr)+1; {now |k| points to fir
else
cur_tok:=(cur_cmd*max_cjk_val)+cur_chr
else if (cur_cmd=latin_ucs) then
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr
cur_tok:=(cat_code(cur_chr)*max_cjk_val)+cur_chr+latin_ucs_flag
else cur_tok:=(cur_cmd*max_char_val)+cur_chr
@z
Expand Down Expand Up @@ -807,7 +808,7 @@ while k<pool_ptr do
begin t:=so(str_pool[k]);
if t>=@"180 then { there is no |wchar_token| whose code is 0--127. }
begin t:=fromBUFFshort(str_pool, pool_ptr, k); cc:=kcat_code(kcatcodekey(t));
if (cc=latin_ucs) then cc:=other_char;
if (cc=latin_ucs) then begin cc:=other_char; t:=t+latin_ucs_flag; end;
if (cc=not_cjk) then cc:=other_kchar;
if (cc=kanji)and(t>=max_cjk_val) then cc:=kanji_ivs;
t:=t+cc*max_cjk_val;
Expand Down Expand Up @@ -2121,7 +2122,7 @@ begin if is_char_node(link(p)) then
else if cur_cmd=not_cjk then
info(main_p):=KANJI(cur_chr)+other_kchar*max_cjk_val
else if cur_cmd=latin_ucs then
info(main_p):=KANJI(cur_chr)+cat_code(cur_chr)*max_cjk_val
info(main_p):=KANJI(cur_chr)+cat_code(cur_chr)*max_cjk_val+latin_ucs_flag
else { Does this case occur? }
info(main_p):=KANJI(cur_chr)+kcat_code(kcatcodekey(KANJI(cur_chr)))*max_cjk_val;
@z
Expand Down Expand Up @@ -2216,7 +2217,7 @@ end;
function check_kcat_code(@!ct:integer;@!cx:integer):integer;
begin
if (((ct>=kanji)or((ct=latin_ucs)and(cx<max_ucs_val)))and(enable_cjk_token=0))or(enable_cjk_token=2)then
if (((ct>=kanji)or((ct=latin_ucs)and(cx<max_latin_val)))and(enable_cjk_token=0))or(enable_cjk_token=2)then
check_kcat_code:=1
else check_kcat_code:=0;
end;
Expand Down

0 comments on commit 6a2a3a2

Please sign in to comment.