From 9819f460a668cb925596608887476b4446beac71 Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Mon, 14 Jun 2021 22:21:13 +0900 Subject: [PATCH 01/17] pdfutils.ch: added \toucs primitive --- source/texk/web2c/eptexdir/eptex.ech | 4 ++-- source/texk/web2c/eptexdir/eptex_version.h | 2 +- source/texk/web2c/eptexdir/pdfutils.ch | 9 ++++++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/source/texk/web2c/eptexdir/eptex.ech b/source/texk/web2c/eptexdir/eptex.ech index 75d3b0335..c3427b805 100644 --- a/source/texk/web2c/eptexdir/eptex.ech +++ b/source/texk/web2c/eptexdir/eptex.ech @@ -26,8 +26,8 @@ @y @d eTeX_version_string=='-2.6' {current \eTeX\ version} @# -@d epTeX_version_string=='-210218' -@d epTeX_version_number==210218 +@d epTeX_version_string=='-210614' +@d epTeX_version_number==210614 @z @x e-pTeX: banner diff --git a/source/texk/web2c/eptexdir/eptex_version.h b/source/texk/web2c/eptexdir/eptex_version.h index a79725d29..dbf5b0202 100644 --- a/source/texk/web2c/eptexdir/eptex_version.h +++ b/source/texk/web2c/eptexdir/eptex_version.h @@ -1 +1 @@ -#define EPTEX_VERSION "210218" +#define EPTEX_VERSION "210614" diff --git a/source/texk/web2c/eptexdir/pdfutils.ch b/source/texk/web2c/eptexdir/pdfutils.ch index cfb228eb4..4008220fb 100644 --- a/source/texk/web2c/eptexdir/pdfutils.ch +++ b/source/texk/web2c/eptexdir/pdfutils.ch @@ -877,7 +877,8 @@ begin str_toks:=str_toks_cat(b,0); end; @d pdf_convert_codes = pdf_first_expand_code+8 {end of \pdfTeX-like command codes} @d Uchar_convert_code = pdf_convert_codes {command code for \.{\\Uchar}} @d Ucharcat_convert_code = pdf_convert_codes+1 {command code for \.{\\Ucharcat}} -@d eptex_convert_codes = pdf_convert_codes+2 {end of \epTeX's command codes} +@d toucs_convert_code = pdf_convert_codes+2 {command code for \.{\\toucs}} +@d eptex_convert_codes = pdf_convert_codes+3 {end of \epTeX's command codes} @d job_name_code=eptex_convert_codes {command code for \.{\\jobname}} @z @@ -906,6 +907,7 @@ primitive("jobname",convert,job_name_code);@/ normal_deviate_code: print_esc("pdfnormaldeviate"); Uchar_convert_code: print_esc("Uchar"); Ucharcat_convert_code: print_esc("Ucharcat"); + toucs_convert_code: print_esc("toucs"); @z @x @@ -1122,6 +1124,7 @@ Ucharcat_convert_code: end else cat:=cur_val; cur_val:=i; end; +toucs_convert_code: scan_char_num; @z @x @@ -1135,6 +1138,8 @@ Uchar_convert_code: if is_char_ascii(cur_val) then print_char(cur_val) else print_kanji(cur_val); Ucharcat_convert_code: if cat primitive("Ucharcat",convert,Ucharcat_convert_code);@/ @!@:Ucharcat_}{\.{\\Ucharcat} primitive@> +primitive("toucs",convert,toucs_convert_code);@/ +@!@:toucs_}{\.{\\toucs} primitive@> @z @x From 6555c9c1bad633b7e0e0f9767c6effb036abadcf Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Sat, 19 Jun 2021 23:14:01 +0900 Subject: [PATCH 02/17] implement \ucs and \toucs in pTeX --- source/texk/web2c/eptexdir/eptex.defines | 1 + source/texk/web2c/eptexdir/eptex.ech | 4 +-- source/texk/web2c/eptexdir/eptex_version.h | 2 +- source/texk/web2c/eptexdir/pdfutils.ch | 9 +----- source/texk/web2c/ptexdir/ptex-base.ch | 20 +++++++++---- source/texk/web2c/ptexdir/ptex.defines | 2 ++ source/texk/web2c/ptexdir/ptex_version.h | 2 +- source/texk/web2c/uptexdir/uptex-m.ch | 33 +++------------------- source/texk/web2c/uptexdir/uptex.defines | 1 + 9 files changed, 28 insertions(+), 46 deletions(-) diff --git a/source/texk/web2c/eptexdir/eptex.defines b/source/texk/web2c/eptexdir/eptex.defines index 4ec825245..d6f6c9613 100644 --- a/source/texk/web2c/eptexdir/eptex.defines +++ b/source/texk/web2c/eptexdir/eptex.defines @@ -28,6 +28,7 @@ @define function fromSJIS (); @define function fromKUTEN (); @define function UCStoUTF8 (); +@define function fromUCS (); @define function toUCS (); @define function notkanjicharseq (); diff --git a/source/texk/web2c/eptexdir/eptex.ech b/source/texk/web2c/eptexdir/eptex.ech index c3427b805..9988ce5aa 100644 --- a/source/texk/web2c/eptexdir/eptex.ech +++ b/source/texk/web2c/eptexdir/eptex.ech @@ -26,8 +26,8 @@ @y @d eTeX_version_string=='-2.6' {current \eTeX\ version} @# -@d epTeX_version_string=='-210614' -@d epTeX_version_number==210614 +@d epTeX_version_string=='-210619' +@d epTeX_version_number==210619 @z @x e-pTeX: banner diff --git a/source/texk/web2c/eptexdir/eptex_version.h b/source/texk/web2c/eptexdir/eptex_version.h index dbf5b0202..6858bdba8 100644 --- a/source/texk/web2c/eptexdir/eptex_version.h +++ b/source/texk/web2c/eptexdir/eptex_version.h @@ -1 +1 @@ -#define EPTEX_VERSION "210614" +#define EPTEX_VERSION "210619" diff --git a/source/texk/web2c/eptexdir/pdfutils.ch b/source/texk/web2c/eptexdir/pdfutils.ch index 4008220fb..cfb228eb4 100644 --- a/source/texk/web2c/eptexdir/pdfutils.ch +++ b/source/texk/web2c/eptexdir/pdfutils.ch @@ -877,8 +877,7 @@ begin str_toks:=str_toks_cat(b,0); end; @d pdf_convert_codes = pdf_first_expand_code+8 {end of \pdfTeX-like command codes} @d Uchar_convert_code = pdf_convert_codes {command code for \.{\\Uchar}} @d Ucharcat_convert_code = pdf_convert_codes+1 {command code for \.{\\Ucharcat}} -@d toucs_convert_code = pdf_convert_codes+2 {command code for \.{\\toucs}} -@d eptex_convert_codes = pdf_convert_codes+3 {end of \epTeX's command codes} +@d eptex_convert_codes = pdf_convert_codes+2 {end of \epTeX's command codes} @d job_name_code=eptex_convert_codes {command code for \.{\\jobname}} @z @@ -907,7 +906,6 @@ primitive("jobname",convert,job_name_code);@/ normal_deviate_code: print_esc("pdfnormaldeviate"); Uchar_convert_code: print_esc("Uchar"); Ucharcat_convert_code: print_esc("Ucharcat"); - toucs_convert_code: print_esc("toucs"); @z @x @@ -1124,7 +1122,6 @@ Ucharcat_convert_code: end else cat:=cur_val; cur_val:=i; end; -toucs_convert_code: scan_char_num; @z @x @@ -1138,8 +1135,6 @@ Uchar_convert_code: if is_char_ascii(cur_val) then print_char(cur_val) else print_kanji(cur_val); Ucharcat_convert_code: if cat primitive("Ucharcat",convert,Ucharcat_convert_code);@/ @!@:Ucharcat_}{\.{\\Ucharcat} primitive@> -primitive("toucs",convert,toucs_convert_code);@/ -@!@:toucs_}{\.{\\toucs} primitive@> @z @x diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index c1b23f1c8..e88ef4986 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -77,9 +77,9 @@ @d banner_k==TeX_banner_k @y @d pTeX_version=3 -@d pTeX_minor_version=9 +@d pTeX_minor_version=10 @d pTeX_revision==".0" -@d pTeX_version_string=='-p3.9.0' {current \pTeX\ version} +@d pTeX_version_string=='-p3.10.0' {current \pTeX\ version} @# @d pTeX_banner=='This is pTeX, Version 3.141592653',pTeX_version_string @d pTeX_banner_k==pTeX_banner @@ -2354,8 +2354,10 @@ help6("Dimensions can be in units of em, ex, zw, zh, in, pt, pc,")@/ @d sjis_code=7 {command code for \.{\\sjis}} @d jis_code=8 {command code for \.{\\jis}} @d kuten_code=9 {command code for \.{\\kuten}} -@d ptex_revision_code=10 {command code for \.{\\ptexrevision}} -@d ptex_convert_codes=11 {end of \pTeX's command codes} +@d ucs_code=10 {command code for \.{\\ucs}} +@d toucs_code=11 {command code for \.{\\ucs}} +@d ptex_revision_code=12 {command code for \.{\\ptexrevision}} +@d ptex_convert_codes=13 {end of \pTeX's command codes} @d job_name_code=ptex_convert_codes {command code for \.{\\jobname}} @z @@ -2375,6 +2377,10 @@ primitive("jis",convert,jis_code); @!@:jis_}{\.{\\jis} primitive@> primitive("kuten",convert,kuten_code); @!@:kuten_}{\.{\\kuten} primitive@> +primitive("ucs",convert,ucs_code); +@!@:ucs_}{\.{\\ucs} primitive@> +primitive("toucs",convert,toucs_code); +@!@:toucs_}{\.{\\toucs} primitive@> primitive("ptexrevision",convert,ptex_revision_code); @!@:ptexrevision_}{\.{\\ptexrevision} primitive@> @z @@ -2388,6 +2394,8 @@ primitive("ptexrevision",convert,ptex_revision_code); sjis_code:print_esc("sjis"); jis_code:print_esc("jis"); kuten_code:print_esc("kuten"); + ucs_code:print_esc("ucs"); + toucs_code:print_esc("ucs"); ptex_revision_code:print_esc("ptexrevision"); @z @@ -2412,7 +2420,7 @@ string_code, meaning_code: begin save_scanner_status:=scanner_status; KANJI(cx):=0; case c of number_code,roman_numeral_code, -kansuji_code,euc_code,sjis_code,jis_code,kuten_code: scan_int; +kansuji_code,euc_code,sjis_code,jis_code,kuten_code,ucs_code,toucs_code: scan_int; ptex_revision_code: do_nothing; string_code, meaning_code: begin save_scanner_status:=scanner_status; scanner_status:=normal; get_token; @@ -2438,6 +2446,8 @@ jis_code: print_int(fromJIS(cur_val)); euc_code: print_int(fromEUC(cur_val)); sjis_code: print_int(fromSJIS(cur_val)); kuten_code: print_int(fromKUTEN(cur_val)); +ucs_code: print_int(fromUCS(cur_val)); +toucs_code: print_int(toUCS(cur_val)); ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); string_code:if cur_cs<>0 then sprint_cs(cur_cs) diff --git a/source/texk/web2c/ptexdir/ptex.defines b/source/texk/web2c/ptexdir/ptex.defines index 8dd0d7742..e087b5457 100644 --- a/source/texk/web2c/ptexdir/ptex.defines +++ b/source/texk/web2c/ptexdir/ptex.defines @@ -20,6 +20,8 @@ @define function fromEUC (); @define function fromSJIS (); @define function fromKUTEN (); +@define function fromUCS (); +@define function toUCS (); @define function notkanjicharseq (); @define function checkkanji (); diff --git a/source/texk/web2c/ptexdir/ptex_version.h b/source/texk/web2c/ptexdir/ptex_version.h index 168afe751..91d996f48 100644 --- a/source/texk/web2c/ptexdir/ptex_version.h +++ b/source/texk/web2c/ptexdir/ptex_version.h @@ -1 +1 @@ -#define PTEX_VERSION "p3.9.0" +#define PTEX_VERSION "p3.10.0" diff --git a/source/texk/web2c/uptexdir/uptex-m.ch b/source/texk/web2c/uptexdir/uptex-m.ch index 78f830cb8..ee5785e63 100644 --- a/source/texk/web2c/uptexdir/uptex-m.ch +++ b/source/texk/web2c/uptexdir/uptex-m.ch @@ -633,21 +633,12 @@ while k @y -@!@:kuten_}{\.{\\kuten} primitive@> -primitive("ucs",convert,ucs_code); -@!@:ucs_}{\.{\\ucs} primitive@> +@d ptex_revision_code=12 {command code for \.{\\ptexrevision}} +@d uptex_revision_code=13 {command code for \.{\\uptexrevision}} +@d ptex_convert_codes=14 {end of \pTeX's command codes} @z @x @@ -660,13 +651,6 @@ primitive("uptexrevision",convert,uptex_revision_code); @!@:uptexrevision_}{\.{\\uptexrevision} primitive@> @z -@x - kuten_code:print_esc("kuten"); -@y - kuten_code:print_esc("kuten"); - ucs_code:print_esc("ucs"); -@z - @x ptex_revision_code:print_esc("ptexrevision"); @y @@ -675,26 +659,17 @@ primitive("uptexrevision",convert,uptex_revision_code); @z @x -kansuji_code,euc_code,sjis_code,jis_code,kuten_code: scan_int; ptex_revision_code: do_nothing; string_code, meaning_code: begin save_scanner_status:=scanner_status; scanner_status:=normal; get_token; if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then {|wchar_token|} @y -kansuji_code,euc_code,sjis_code,jis_code,kuten_code,ucs_code: scan_int; ptex_revision_code, uptex_revision_code: do_nothing; string_code, meaning_code: begin save_scanner_status:=scanner_status; scanner_status:=normal; get_token; if (cur_cmd>=kanji)and(cur_cmd<=hangul) then {|wchar_token|} @z -@x -kuten_code: print_int(fromKUTEN(cur_val)); -@y -kuten_code: print_int(fromKUTEN(cur_val)); -ucs_code: print_int(fromUCS(cur_val)); -@z - @x ptex_revision_code: print(pTeX_revision); @y diff --git a/source/texk/web2c/uptexdir/uptex.defines b/source/texk/web2c/uptexdir/uptex.defines index 52243e571..9a59b7b0a 100644 --- a/source/texk/web2c/uptexdir/uptex.defines +++ b/source/texk/web2c/uptexdir/uptex.defines @@ -32,6 +32,7 @@ @define function fromSJIS (); @define function fromKUTEN (); @define function fromUCS (); +@define function toUCS (); @define function notkanjicharseq (); @define function checkkanji (); From 61bd9c1afbbad0a8d340ea5f6c92bbc076ce72ed Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Sun, 20 Jun 2021 14:01:29 +0900 Subject: [PATCH 03/17] ChangeLog --- source/texk/web2c/eptexdir/ChangeLog | 6 ++++++ source/texk/web2c/ptexdir/ChangeLog | 6 ++++++ source/texk/web2c/ptexdir/ptex-base.ch | 7 +++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/source/texk/web2c/eptexdir/ChangeLog b/source/texk/web2c/eptexdir/ChangeLog index 7c10d9ef1..787c22214 100644 --- a/source/texk/web2c/eptexdir/ChangeLog +++ b/source/texk/web2c/eptexdir/ChangeLog @@ -1,3 +1,9 @@ +2021-06-20 Hironori Kitagawa + + * eptex.defines: Add fromUCS() and toUCS(). + * eptex.ech, eptex_version.h: e-pTeX version 210619 + (adapted to pTeX p3.10.0). + 2021-03-23 Karl Berry * TL'21. diff --git a/source/texk/web2c/ptexdir/ChangeLog b/source/texk/web2c/ptexdir/ChangeLog index 78e691a28..f41bef87b 100644 --- a/source/texk/web2c/ptexdir/ChangeLog +++ b/source/texk/web2c/ptexdir/ChangeLog @@ -1,3 +1,9 @@ +2021-06-20 Hironori Kitagawa + + * ptex-base.ch: Added \ucs (from upTeX) and \toucs. + * ptex.defines: Add fromUCS() and toUCS(). + * ptex_version.h: pTeX version p3.10.0. + 2021-03-23 Karl Berry * TL'21. diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index e88ef4986..8afc82ac8 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -64,6 +64,7 @@ % (2021-02-18) HK pTeX p3.9.0. Add \ifjfont and \iftfont (in 2020-02-06, by HY), % Bug fix for getting \kansujichar, % based on TeX 3.141592653 +% (2021-06-20) HK pTeX p3.10.0. Add \ucs and \toucs. @x % Here is TeX material that gets inserted after \input webmac @@ -2446,8 +2447,10 @@ jis_code: print_int(fromJIS(cur_val)); euc_code: print_int(fromEUC(cur_val)); sjis_code: print_int(fromSJIS(cur_val)); kuten_code: print_int(fromKUTEN(cur_val)); -ucs_code: print_int(fromUCS(cur_val)); -toucs_code: print_int(toUCS(cur_val)); +ucs_code: if is_char_ascii(cur_val) then print_int(cur_val) + else print_int(fromUCS(cur_val)); +toucs_code: if is_char_ascii(cur_val) then print_int(cur_val) + else print_int(toUCS(cur_val)); ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); string_code:if cur_cs<>0 then sprint_cs(cur_cs) From 9f94196791f3500d2ab6e307cf9449e5a23199f0 Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Sun, 20 Jun 2021 14:36:04 +0900 Subject: [PATCH 04/17] fix: \show\toucs --- source/texk/web2c/ptexdir/ptex-base.ch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index 8afc82ac8..34d9f48ef 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2396,7 +2396,7 @@ primitive("ptexrevision",convert,ptex_revision_code); jis_code:print_esc("jis"); kuten_code:print_esc("kuten"); ucs_code:print_esc("ucs"); - toucs_code:print_esc("ucs"); + toucs_code:print_esc("toucs"); ptex_revision_code:print_esc("ptexrevision"); @z From f5f90b9d51f953ee709b8c6970b1ef13d1a27a50 Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Sun, 20 Jun 2021 18:04:02 +0900 Subject: [PATCH 05/17] \ucs, \toucs: 0--255, added tests --- source/texk/web2c/ptexdir/ptex-base.ch | 6 ++--- source/texk/web2c/ptexdir/tests/ucs.tex | 29 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 source/texk/web2c/ptexdir/tests/ucs.tex diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index 34d9f48ef..97f645b89 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2447,10 +2447,10 @@ jis_code: print_int(fromJIS(cur_val)); euc_code: print_int(fromEUC(cur_val)); sjis_code: print_int(fromSJIS(cur_val)); kuten_code: print_int(fromKUTEN(cur_val)); -ucs_code: if is_char_ascii(cur_val) then print_int(cur_val) +ucs_code: if cur_val=0 then print_int(0) + { |fromUCS(0)| returns A2AF in EUC, which does not exist } else print_int(fromUCS(cur_val)); -toucs_code: if is_char_ascii(cur_val) then print_int(cur_val) - else print_int(toUCS(cur_val)); +toucs_code: print_int(toUCS(cur_val)); ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); string_code:if cur_cs<>0 then sprint_cs(cur_cs) diff --git a/source/texk/web2c/ptexdir/tests/ucs.tex b/source/texk/web2c/ptexdir/tests/ucs.tex new file mode 100644 index 000000000..ed5350fd1 --- /dev/null +++ b/source/texk/web2c/ptexdir/tests/ucs.tex @@ -0,0 +1,29 @@ +%#!eptex + +\def\typeout{\immediate\write17} + +\ifnum`あ="A4A2 \typeout{} +\else\ifnum`ア="8341 \typeout{} +\else\typeout{}\fi\fi + +\typeout{\string\ucs.} +\newcount\cntA +\loop\ifnum\cntA<256 + \ifnum0=\ucs\cntA\else\typeout{\the\cntA: \ucs\cntA (\Uchar\ucs\cntA)}\fi + \advance\cntA1 +\repeat + + +\typeout{\string\toucs.} +\cntA=0 +\loop\ifnum\cntA<256 + \ifnum0=\toucs\cntA\else\typeout{\the\cntA: \toucs\cntA}\fi + \advance\cntA1 +\repeat +\typeout{\toucs"0, \toucs"80, \toucs"D7, \toucs"A7, \toucs"10000} +\typeout{\toucs"8341, \toucs"EAA4, \toucs"EAA5} +\typeout{\toucs"A4A2, \toucs"F4A6, \toucs"F4A7} + + +\bye + From 2f3c2056cbfacf36ed10178a2a24b0ecbfaa7a21 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Mon, 21 Jun 2021 23:56:33 +0900 Subject: [PATCH 06/17] tiny --- source/texk/web2c/ptexdir/ptex-base.ch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index 97f645b89..fdb6af942 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2356,7 +2356,7 @@ help6("Dimensions can be in units of em, ex, zw, zh, in, pt, pc,")@/ @d jis_code=8 {command code for \.{\\jis}} @d kuten_code=9 {command code for \.{\\kuten}} @d ucs_code=10 {command code for \.{\\ucs}} -@d toucs_code=11 {command code for \.{\\ucs}} +@d toucs_code=11 {command code for \.{\\toucs}} @d ptex_revision_code=12 {command code for \.{\\ptexrevision}} @d ptex_convert_codes=13 {end of \pTeX's command codes} @d job_name_code=ptex_convert_codes {command code for \.{\\jobname}} From d073455e29ba22a210fe0234bce462d140453bbc Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 08:15:28 +0900 Subject: [PATCH 07/17] ptexenc.c, kanjicnv.c: invalid code -> -1 --- source/texk/ptexenc/kanjicnv.c | 30 +++++++++++++++++++++++------- source/texk/ptexenc/ptexenc.c | 2 +- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/source/texk/ptexenc/kanjicnv.c b/source/texk/ptexenc/kanjicnv.c index 75793863a..db6813185 100644 --- a/source/texk/ptexenc/kanjicnv.c +++ b/source/texk/ptexenc/kanjicnv.c @@ -5,6 +5,18 @@ #include #include +boolean isJISkanji1(int c) +{ + c &= 0xff; + return (0x21 <= c && c <= 0x7e); +} + +boolean isJISkanji2(int c) +{ + c &= 0xff; + return (0x21 <= c && c <= 0x7e); +} + boolean isEUCkanji1(int c) { c &= 0xff; @@ -32,11 +44,15 @@ boolean isSJISkanji2(int c) /* EUC <=> JIS X 0208 code conversion */ int EUCtoJIS(int kcode) { + if (!isEUCkanji1(HI(kcode))) return -1; + if (!isEUCkanji2(LO(kcode))) return -1; return (kcode & 0x7f7f); } int JIStoEUC(int kcode) { + if (!isJISkanji1(HI(kcode)) return -1; + if (!isJISkanji2(LO(kcode)) return -1; return (kcode | 0x8080); } @@ -45,8 +61,8 @@ int SJIStoJIS(int kcode) { int byte1, byte2; - byte1 = HI(kcode); - byte2 = LO(kcode); + byte1 = HI(kcode); if (!isSJISkanji1(byte1)) return -1; + byte2 = LO(kcode); if (!isSJISkanji2(byte2)) return -1; byte1 -= ( byte1>=0xa0 ) ? 0xc1 : 0x81; kcode = ((byte1<<1) + 0x21)<<8; if ( byte2 >= 0x9f ) { @@ -63,8 +79,8 @@ int JIStoSJIS(int kcode) int high, low; int nh, nl; - high = HI(kcode); - low = LO(kcode); + high = HI(kcode); if (!isJISkanji1(high)) return -1; + low = LO(kcode); if (!isJISkanji2(low)) return -1; nh = ((high-0x21)>>1) + 0x81; if (nh > 0x9f) nh += 0x40; if (high & 1) { @@ -75,7 +91,7 @@ int JIStoSJIS(int kcode) if (isSJISkanji1(nh) && isSJISkanji2(nl)) { return HILO(nh, nl); } else { - return 0x813f; + return -1; } } @@ -94,8 +110,8 @@ int EUCtoSJIS(int kcode) int KUTENtoJIS(int kcode) { /* in case of undefined in kuten code table */ - if (HI(kcode) == 0 || HI(kcode) > 95) return -1; - if (LO(kcode) == 0 || LO(kcode) > 95) return -1; + if (HI(kcode) == 0 || HI(kcode) >= 95) return -1; + if (LO(kcode) == 0 || LO(kcode) >= 95) return -1; return kcode + 0x2020; } diff --git a/source/texk/ptexenc/ptexenc.c b/source/texk/ptexenc/ptexenc.c index 91cb4fa6f..e4f24eb48 100644 --- a/source/texk/ptexenc/ptexenc.c +++ b/source/texk/ptexenc/ptexenc.c @@ -382,7 +382,7 @@ long fromUCS(long kcode) { if (is_internalUPTEX()) return UCStoUPTEX(kcode); kcode = UCS2toJIS(kcode); - if (kcode == 0) return 0; + if (kcode == 0) return -1; return fromJIS(kcode); } From 7f7963598095d3b04475d6f0d382641afdc9b712 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 08:16:10 +0900 Subject: [PATCH 08/17] jisx0208.h: remove unused entry in UnicodeTbl --- source/texk/ptexenc/jisx0208.h | 104 --------------------------------- 1 file changed, 104 deletions(-) diff --git a/source/texk/ptexenc/jisx0208.h b/source/texk/ptexenc/jisx0208.h index c0a5ffa43..8992c2e9f 100644 --- a/source/texk/ptexenc/jisx0208.h +++ b/source/texk/ptexenc/jisx0208.h @@ -1102,110 +1102,6 @@ static unsigned short UnicodeTbl[][94] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - { /* category 85 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - { /* category 86 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - { /* category 87 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - { /* category 88 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - { /* category 89 */ - 0x7E8A, 0x891C, 0x9348, 0x9288, 0x84DC, 0x4FC9, 0x70BB, 0x6631, - 0x68C8, 0x92F9, 0x66FB, 0x5F45, 0x4E28, 0x4EE1, 0x4EFC, 0x4F00, - 0x4F03, 0x4F39, 0x4F56, 0x4F92, 0x4F8A, 0x4F9A, 0x4F94, 0x4FCD, - 0x5040, 0x5022, 0x4FFF, 0x501E, 0x5046, 0x5070, 0x5042, 0x5094, - 0x50F4, 0x50D8, 0x514A, 0x5164, 0x519D, 0x51BE, 0x51EC, 0x5215, - 0x529C, 0x52A6, 0x52C0, 0x52DB, 0x5300, 0x5307, 0x5324, 0x5372, - 0x5393, 0x53B2, 0x53DD, 0xFA0E, 0x549C, 0x548A, 0x54A9, 0x54FF, - 0x5586, 0x5759, 0x5765, 0x57AC, 0x57C8, 0x57C7, 0xFA0F, 0xFA10, - 0x589E, 0x58B2, 0x590B, 0x5953, 0x595B, 0x595D, 0x5963, 0x59A4, - 0x59BA, 0x5B56, 0x5BC0, 0x752F, 0x5BD8, 0x5BEC, 0x5C1E, 0x5CA6, - 0x5CBA, 0x5CF5, 0x5D27, 0x5D53, 0xFA11, 0x5D42, 0x5D6D, 0x5DB8, - 0x5DB9, 0x5DD0, 0x5F21, 0x5F34, 0x5F67, 0x5FB7}, - { /* category 90 */ - 0x5FDE, 0x605D, 0x6085, 0x608A, 0x60DE, 0x60D5, 0x6120, 0x60F2, - 0x6111, 0x6137, 0x6130, 0x6198, 0x6213, 0x62A6, 0x63F5, 0x6460, - 0x649D, 0x64CE, 0x654E, 0x6600, 0x6615, 0x663B, 0x6609, 0x662E, - 0x661E, 0x6624, 0x6665, 0x6657, 0x6659, 0xFA12, 0x6673, 0x6699, - 0x66A0, 0x66B2, 0x66BF, 0x66FA, 0x670E, 0xF929, 0x6766, 0x67BB, - 0x6852, 0x67C0, 0x6801, 0x6844, 0x68CF, 0xFA13, 0x6968, 0xFA14, - 0x6998, 0x69E2, 0x6A30, 0x6A6B, 0x6A46, 0x6A73, 0x6A7E, 0x6AE2, - 0x6AE4, 0x6BD6, 0x6C3F, 0x6C5C, 0x6C86, 0x6C6F, 0x6CDA, 0x6D04, - 0x6D87, 0x6D6F, 0x6D96, 0x6DAC, 0x6DCF, 0x6DF8, 0x6DF2, 0x6DFC, - 0x6E39, 0x6E5C, 0x6E27, 0x6E3C, 0x6EBF, 0x6F88, 0x6FB5, 0x6FF5, - 0x7005, 0x7007, 0x7028, 0x7085, 0x70AB, 0x710F, 0x7104, 0x715C, - 0x7146, 0x7147, 0xFA15, 0x71C1, 0x71FE, 0x72B1}, - { /* category 91 */ - 0x72BE, 0x7324, 0xFA16, 0x7377, 0x73BD, 0x73C9, 0x73D6, 0x73E3, - 0x73D2, 0x7407, 0x73F5, 0x7426, 0x742A, 0x7429, 0x742E, 0x7462, - 0x7489, 0x749F, 0x7501, 0x756F, 0x7682, 0x769C, 0x769E, 0x769B, - 0x76A6, 0xFA17, 0x7746, 0x52AF, 0x7821, 0x784E, 0x7864, 0x787A, - 0x7930, 0xFA18, 0xFA19, 0xFA1A, 0x7994, 0xFA1B, 0x799B, 0x7AD1, - 0x7AE7, 0xFA1C, 0x7AEB, 0x7B9E, 0xFA1D, 0x7D48, 0x7D5C, 0x7DB7, - 0x7DA0, 0x7DD6, 0x7E52, 0x7F47, 0x7FA1, 0xFA1E, 0x8301, 0x8362, - 0x837F, 0x83C7, 0x83F6, 0x8448, 0x84B4, 0x8553, 0x8559, 0x856B, - 0xFA1F, 0x85B0, 0xFA20, 0xFA21, 0x8807, 0x88F5, 0x8A12, 0x8A37, - 0x8A79, 0x8AA7, 0x8ABE, 0x8ADF, 0xFA22, 0x8AF6, 0x8B53, 0x8B7F, - 0x8CF0, 0x8CF4, 0x8D12, 0x8D76, 0xFA23, 0x8ECF, 0xFA24, 0xFA25, - 0x9067, 0x90DE, 0xFA26, 0x9115, 0x9127, 0x91DA}, - { /* category 92 */ - 0x91D7, 0x91DE, 0x91ED, 0x91EE, 0x91E4, 0x91E5, 0x9206, 0x9210, - 0x920A, 0x923A, 0x9240, 0x923C, 0x924E, 0x9259, 0x9251, 0x9239, - 0x9267, 0x92A7, 0x9277, 0x9278, 0x92E7, 0x92D7, 0x92D9, 0x92D0, - 0xFA27, 0x92D5, 0x92E0, 0x92D3, 0x9325, 0x9321, 0x92FB, 0xFA28, - 0x931E, 0x92FF, 0x931D, 0x9302, 0x9370, 0x9357, 0x93A4, 0x93C6, - 0x93DE, 0x93F8, 0x9431, 0x9445, 0x9448, 0x9592, 0xF9DC, 0xFA29, - 0x969D, 0x96AF, 0x9733, 0x973B, 0x9743, 0x974D, 0x974F, 0x9751, - 0x9755, 0x9857, 0x9865, 0xFA2A, 0xFA2B, 0x9927, 0xFA2C, 0x999E, - 0x9A4E, 0x9AD9, 0x9ADC, 0x9B75, 0x9B72, 0x9B8F, 0x9BB1, 0x9BBB, - 0x9C00, 0x9D70, 0x9D6B, 0xFA2D, 0x9E19, 0x9ED1, 0x0000, 0x0000, - 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, - 0x2178, 0x2179, 0xFFE2, 0xFFE4, 0xFF07, 0xFF02} }; #endif /* JISX0208_H */ From d3bf66aa0197c2a72b362d4637b40a3c780bc49d Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 08:18:06 +0900 Subject: [PATCH 09/17] ptex-base.ch: no longer necessary --- source/texk/web2c/ptexdir/ptex-base.ch | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index fdb6af942..1624de1de 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2447,9 +2447,7 @@ jis_code: print_int(fromJIS(cur_val)); euc_code: print_int(fromEUC(cur_val)); sjis_code: print_int(fromSJIS(cur_val)); kuten_code: print_int(fromKUTEN(cur_val)); -ucs_code: if cur_val=0 then print_int(0) - { |fromUCS(0)| returns A2AF in EUC, which does not exist } - else print_int(fromUCS(cur_val)); +ucs_code: print_int(fromUCS(cur_val)); toucs_code: print_int(toUCS(cur_val)); ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); From 74a43c03d2a65bef97eedaaebc994f1baa68ccbb Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 08:26:15 +0900 Subject: [PATCH 10/17] oops --- source/texk/ptexenc/kanjicnv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/texk/ptexenc/kanjicnv.c b/source/texk/ptexenc/kanjicnv.c index db6813185..87aebe66d 100644 --- a/source/texk/ptexenc/kanjicnv.c +++ b/source/texk/ptexenc/kanjicnv.c @@ -51,8 +51,8 @@ int EUCtoJIS(int kcode) int JIStoEUC(int kcode) { - if (!isJISkanji1(HI(kcode)) return -1; - if (!isJISkanji2(LO(kcode)) return -1; + if (!isJISkanji1(HI(kcode))) return -1; + if (!isJISkanji2(LO(kcode))) return -1; return (kcode | 0x8080); } From d439ecef529ca6de1b43a44104a5a89382acb1a1 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 08:53:49 +0900 Subject: [PATCH 11/17] kanjicnv.h: just in case --- source/texk/ptexenc/ptexenc/kanjicnv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/texk/ptexenc/ptexenc/kanjicnv.h b/source/texk/ptexenc/ptexenc/kanjicnv.h index 3ca732d51..d4ddfb378 100644 --- a/source/texk/ptexenc/ptexenc/kanjicnv.h +++ b/source/texk/ptexenc/ptexenc/kanjicnv.h @@ -11,6 +11,9 @@ #define LO(x) ( ((int)(x)) & 0xff) #define HILO(x,y) ((LO(x) << 8) | LO(y)) +extern boolean isJISkanji1(int c); +extern boolean isJISkanji2(int c); + extern boolean isEUCkanji1(int c); extern boolean isEUCkanji2(int c); From d32cfbebc6f7990d6efe8d88b0ce4d7172308645 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sat, 26 Jun 2021 20:37:47 +0900 Subject: [PATCH 12/17] ptexenc.c: fromUCS changed to -1 --- source/texk/ptexenc/ptexenc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/texk/ptexenc/ptexenc.c b/source/texk/ptexenc/ptexenc.c index e4f24eb48..6df0403a5 100644 --- a/source/texk/ptexenc/ptexenc.c +++ b/source/texk/ptexenc/ptexenc.c @@ -673,8 +673,8 @@ static void get_utf8(int i, FILE *fp) break; } - j = (u != 0) ? toBUFF(fromUCS(u)) : 0; - if (j == 0) { /* can't represent (typically umlaut o in EUC) */ + j = (u != 0) ? toBUFF(fromUCS(u)) : -1; + if (j == -1) { /* can't represent (typically umlaut o in EUC) */ write_hex(i); if (i2 != EOF) write_hex(i2); if (i3 != EOF) write_hex(i3); From 5ead93ada070637b82f6fef40e981ce43572dd5b Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sun, 27 Jun 2021 17:13:49 +0900 Subject: [PATCH 13/17] invalid code 0 -> -1, detect in ptex-base.ch --- source/texk/ptexenc/kanjicnv.c | 30 +++++++++++++------------- source/texk/ptexenc/ptexenc.c | 6 +++--- source/texk/web2c/ptexdir/ptex-base.ch | 15 ++++++++----- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/source/texk/ptexenc/kanjicnv.c b/source/texk/ptexenc/kanjicnv.c index 87aebe66d..d93bbed5f 100644 --- a/source/texk/ptexenc/kanjicnv.c +++ b/source/texk/ptexenc/kanjicnv.c @@ -44,15 +44,17 @@ boolean isSJISkanji2(int c) /* EUC <=> JIS X 0208 code conversion */ int EUCtoJIS(int kcode) { - if (!isEUCkanji1(HI(kcode))) return -1; - if (!isEUCkanji2(LO(kcode))) return -1; + if ((kcode<=0 || kcode>0x10000)) return 0; + if (!isEUCkanji1(HI(kcode))) return 0; + if (!isEUCkanji2(LO(kcode))) return 0; return (kcode & 0x7f7f); } int JIStoEUC(int kcode) { - if (!isJISkanji1(HI(kcode))) return -1; - if (!isJISkanji2(LO(kcode))) return -1; + if ((kcode<=0 || kcode>0x10000)) return 0; + if (!isJISkanji1(HI(kcode))) return 0; + if (!isJISkanji2(LO(kcode))) return 0; return (kcode | 0x8080); } @@ -61,8 +63,9 @@ int SJIStoJIS(int kcode) { int byte1, byte2; - byte1 = HI(kcode); if (!isSJISkanji1(byte1)) return -1; - byte2 = LO(kcode); if (!isSJISkanji2(byte2)) return -1; + if ((kcode<=0 || kcode>0x10000)) return 0; + byte1 = HI(kcode); if (!isSJISkanji1(byte1)) return 0; + byte2 = LO(kcode); if (!isSJISkanji2(byte2)) return 0; byte1 -= ( byte1>=0xa0 ) ? 0xc1 : 0x81; kcode = ((byte1<<1) + 0x21)<<8; if ( byte2 >= 0x9f ) { @@ -79,8 +82,9 @@ int JIStoSJIS(int kcode) int high, low; int nh, nl; - high = HI(kcode); if (!isJISkanji1(high)) return -1; - low = LO(kcode); if (!isJISkanji2(low)) return -1; + if ((kcode<=0 || kcode>0x10000)) return 0; + high = HI(kcode); if (!isJISkanji1(high)) return 0; + low = LO(kcode); if (!isJISkanji2(low)) return 0; nh = ((high-0x21)>>1) + 0x81; if (nh > 0x9f) nh += 0x40; if (high & 1) { @@ -88,11 +92,7 @@ int JIStoSJIS(int kcode) if (low > 0x5f) nl++; } else nl = low + 0x7e; - if (isSJISkanji1(nh) && isSJISkanji2(nl)) { - return HILO(nh, nl); - } else { - return -1; - } + return HILO(nh, nl); } /* Shift JIS <=> EUC Kanji code conversion */ @@ -110,8 +110,8 @@ int EUCtoSJIS(int kcode) int KUTENtoJIS(int kcode) { /* in case of undefined in kuten code table */ - if (HI(kcode) == 0 || HI(kcode) >= 95) return -1; - if (LO(kcode) == 0 || LO(kcode) >= 95) return -1; + if (HI(kcode) == 0 || HI(kcode) > 94) return 0; + if (LO(kcode) == 0 || LO(kcode) > 94) return 0; return kcode + 0x2020; } diff --git a/source/texk/ptexenc/ptexenc.c b/source/texk/ptexenc/ptexenc.c index 6df0403a5..91cb4fa6f 100644 --- a/source/texk/ptexenc/ptexenc.c +++ b/source/texk/ptexenc/ptexenc.c @@ -382,7 +382,7 @@ long fromUCS(long kcode) { if (is_internalUPTEX()) return UCStoUPTEX(kcode); kcode = UCS2toJIS(kcode); - if (kcode == 0) return -1; + if (kcode == 0) return 0; return fromJIS(kcode); } @@ -673,8 +673,8 @@ static void get_utf8(int i, FILE *fp) break; } - j = (u != 0) ? toBUFF(fromUCS(u)) : -1; - if (j == -1) { /* can't represent (typically umlaut o in EUC) */ + j = (u != 0) ? toBUFF(fromUCS(u)) : 0; + if (j == 0) { /* can't represent (typically umlaut o in EUC) */ write_hex(i); if (i2 != EOF) write_hex(i2); if (i3 != EOF) write_hex(i3); diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index 1624de1de..e167ba52e 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2443,11 +2443,16 @@ string_code:if cur_cs<>0 then sprint_cs(cur_cs) case c of number_code: print_int(cur_val); roman_numeral_code: print_roman_int(cur_val); -jis_code: print_int(fromJIS(cur_val)); -euc_code: print_int(fromEUC(cur_val)); -sjis_code: print_int(fromSJIS(cur_val)); -kuten_code: print_int(fromKUTEN(cur_val)); -ucs_code: print_int(fromUCS(cur_val)); +jis_code: begin cur_val:=fromJIS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +euc_code: begin cur_val:=fromEUC(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +sjis_code: begin cur_val:=fromSJIS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +kuten_code: begin cur_val:=fromKUTEN(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +ucs_code: begin cur_val:=fromUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; toucs_code: print_int(toUCS(cur_val)); ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); From 3d8183b4bf87d2fbb16e3ea64cae42a34dec04e0 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Sun, 27 Jun 2021 19:28:03 +0900 Subject: [PATCH 14/17] uptex-m.ch: \ucs0 should be 0 for internal-uptex --- source/texk/web2c/uptexdir/uptex-m.ch | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/texk/web2c/uptexdir/uptex-m.ch b/source/texk/web2c/uptexdir/uptex-m.ch index ee5785e63..3c53039d5 100644 --- a/source/texk/web2c/uptexdir/uptex-m.ch +++ b/source/texk/web2c/uptexdir/uptex-m.ch @@ -670,6 +670,15 @@ string_code, meaning_code: begin save_scanner_status:=scanner_status; if (cur_cmd>=kanji)and(cur_cmd<=hangul) then {|wchar_token|} @z +@x +ucs_code: begin cur_val:=fromUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +@y +ucs_code: if (isinternalUPTEX) then print_int(fromUCS(cur_val)) + else begin cur_val:=fromUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; +@z + @x ptex_revision_code: print(pTeX_revision); @y From 81ad436ec263673524451ee60d30ae124372d710 Mon Sep 17 00:00:00 2001 From: Hironobu Yamashita Date: Mon, 28 Jun 2021 23:10:24 +0900 Subject: [PATCH 15/17] ptex-base.ch: wrap print_hex for negative value --- source/texk/web2c/ptexdir/ptex-base.ch | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index e167ba52e..b8b998b46 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -329,6 +329,17 @@ else wterm(')'); @z +@x pTeX: print_hex for "Invalid KANJI code" or "Invalid KANSUJI char" errors +@ Old versions of \TeX\ needed a procedure called |print_ASCII| whose function +@y +@ Hexadecimal printing. + +@p procedure print_hex_safe(@!n:integer); +begin if n<0 then print_int(n) else print_hex(n); end; + +@ Old versions of \TeX\ needed a procedure called |print_ASCII| whose function +@z + @x @d max_halfword==@"FFFFFFF {largest allowable value in a |halfword|} @y @@ -6468,7 +6479,7 @@ set_kansuji_char: begin p:=cur_chr; scan_int; n:=cur_val; scan_optional_equals; scan_int; if not is_char_kanji(cur_val) then begin print_err("Invalid KANSUJI char ("); - print_hex(cur_val); print_char(")"); + print_hex_safe(cur_val); print_char(")"); @.Invalid KANSUJI char@> help1("I'm skipping this control sequences.");@/ error; return; @@ -6626,7 +6637,7 @@ if is_char_kanji(n) then define(inhibit_xsp_code_base+j,cur_val,n); end else - begin print_err("Invalid KANJI code ("); print_hex(n); print_char(")"); + begin print_err("Invalid KANJI code ("); print_hex_safe(n); print_char(")"); @.Invalid KANJI code@> help1("I'm skipping this control sequences.");@/ error; return; @@ -6714,7 +6725,7 @@ else if p=pre_break_penalty_code then print("pre") else if p=post_break_penalty_code then print("post") else print_char("?"); - print("breakpenalty ("); print_hex(n); print_char(")"); + print("breakpenalty ("); print_hex_safe(n); print_char(")"); @.Invalid KANJI code@> help1("I'm skipping this control sequences.");@/ error; return; From 95835d3de70df83621a6dfc3df58c8342a42b32f Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Mon, 28 Jun 2021 23:37:52 +0900 Subject: [PATCH 16/17] ptex-base.ch: cleanup --- source/texk/web2c/ptexdir/ptex-base.ch | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index b8b998b46..a1de918bb 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -334,8 +334,7 @@ else @y @ Hexadecimal printing. -@p procedure print_hex_safe(@!n:integer); -begin if n<0 then print_int(n) else print_hex(n); end; +@d print_hex_safe(#)==if #<0 then print_int(#) else print_hex(#) @ Old versions of \TeX\ needed a procedure called |print_ASCII| whose function @z @@ -7412,7 +7411,7 @@ end; @ @= cur_q:=tail; if inhibit_glue_flag<>true then - begin { print("IF");print_int(cur_l); } + begin if cur_ltrue then end; end else - begin { print("IT");print_int(cur_l); } + begin if cur_l Date: Tue, 29 Jun 2021 20:25:37 +0900 Subject: [PATCH 17/17] \toucs returns -1 for invalid code --- source/texk/web2c/ptexdir/ptex-base.ch | 3 ++- source/texk/web2c/uptexdir/uptex-m.ch | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/source/texk/web2c/ptexdir/ptex-base.ch b/source/texk/web2c/ptexdir/ptex-base.ch index a1de918bb..b1a9ec963 100644 --- a/source/texk/web2c/ptexdir/ptex-base.ch +++ b/source/texk/web2c/ptexdir/ptex-base.ch @@ -2463,7 +2463,8 @@ kuten_code: begin cur_val:=fromKUTEN(cur_val); if cur_val=0 then print_int(-1) else print_int(cur_val); end; ucs_code: begin cur_val:=fromUCS(cur_val); if cur_val=0 then print_int(-1) else print_int(cur_val); end; -toucs_code: print_int(toUCS(cur_val)); +toucs_code: begin cur_val:=toUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; ptex_revision_code: print(pTeX_revision); kansuji_code: print_kansuji(cur_val); string_code:if cur_cs<>0 then sprint_cs(cur_cs) diff --git a/source/texk/web2c/uptexdir/uptex-m.ch b/source/texk/web2c/uptexdir/uptex-m.ch index 3c53039d5..74f8a20a7 100644 --- a/source/texk/web2c/uptexdir/uptex-m.ch +++ b/source/texk/web2c/uptexdir/uptex-m.ch @@ -673,10 +673,15 @@ string_code, meaning_code: begin save_scanner_status:=scanner_status; @x ucs_code: begin cur_val:=fromUCS(cur_val); if cur_val=0 then print_int(-1) else print_int(cur_val); end; +toucs_code: begin cur_val:=toUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; @y ucs_code: if (isinternalUPTEX) then print_int(fromUCS(cur_val)) else begin cur_val:=fromUCS(cur_val); if cur_val=0 then print_int(-1) else print_int(cur_val); end; +toucs_code: if (isinternalUPTEX) then print_int(toUCS(cur_val)) + else begin cur_val:=toUCS(cur_val); + if cur_val=0 then print_int(-1) else print_int(cur_val); end; @z @x