Skip to content

Commit

Permalink
Merge branch 'dgud/stdlib/unicode-fix/GH-8748/OTP-19210' into maint
Browse files Browse the repository at this point in the history
* dgud/stdlib/unicode-fix/GH-8748/OTP-19210:
  Handle ranges in UnicodeData.txt
  • Loading branch information
dgud committed Sep 9, 2024
2 parents 2b76a58 + a613789 commit 273faca
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
Binary file modified lib/stdlib/test/unicode_util_SUITE_data/unicode_table.bin
Binary file not shown.
45 changes: 37 additions & 8 deletions lib/stdlib/uc_spec/gen_unicode_mod.escript
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,25 @@ file_open(File) ->

parse_unicode_data(Line0, Acc) ->
Line = string:chomp(Line0),
[CodePoint,Name,Cat,Class,_BiDi,Decomp,
[CodePoint,Name0,Cat,Class,_BiDi,Decomp,
_N1,_N2,_N3,_BDMirror,_Uni1,_Iso|Case] = tokens(Line, ";"),
{Dec,Comp} = case to_decomp(Decomp) of
{_, _} = Compabil -> {[], Compabil};
Canon -> {Canon, []}
end,
[{hex_to_int(CodePoint),
#cp{name=list_to_binary(Name),class=to_class(Class),
dec=Dec, comp=Comp, cs=to_case(Case), cat=Cat}}
|Acc].
{Range, Name} = pick_range(Name0),
case Range of
last ->
CP = #cp{name=list_to_binary(Name),class=to_class(Class),
dec=Dec, comp=Comp, cs=to_case(Case), cat=Cat},
fill_range(Acc, CP, hex_to_int(CodePoint));
_ ->
[{hex_to_int(CodePoint),
#cp{name=list_to_binary(Name),class=to_class(Class),
dec=Dec, comp=Comp, cs=to_case(Case), cat=Cat}}
|Acc]
end.


to_class(String) ->
list_to_integer(string:trim(String, both)).
Expand All @@ -111,6 +120,26 @@ to_case(["","",""]) -> [];
to_case([Upper,Lower,Title]) ->
{hex_to_int(Upper),hex_to_int(Lower),hex_to_int(Title),[]}.

pick_range([$<|Rest]) ->
range_1(tokens(Rest, ","));
pick_range(Name) ->
{false, Name}.

range_1([Name, " First>"]) ->
{first, Name};
range_1([Name, " Last>"]) ->
{last, Name};
range_1(Name) ->
{false, lists:droplast(Name)}.

fill_range([{Start, CP}|_]=Acc, CP, Last) ->
fill_range_1(Start+1, Last, CP, Acc).

fill_range_1(Start, Last, CP, Acc) when Start =< Last ->
fill_range_1(Start+1, Last, CP, [{Start,CP}|Acc]);
fill_range_1(_Start, _Last, _CP, Acc) ->
Acc.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

parse_special_casing("03A3;" ++ _, Acc) ->
Expand Down Expand Up @@ -1026,12 +1055,12 @@ gen_category(Fd, [{CP, {_, _, _, NextCat}}|Rest], Cat, Start, End, All, Acc)
true ->
case Cat of
letter ->
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w-> subcat_letter(CP);~n",
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> subcat_letter(CP);~n",
[Start, End]),
gen_category(Fd, Rest, NextCat, CP, CP, All,
lists:reverse(lists:seq(Start, End)) ++ Acc);
_ ->
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w-> ~w;~n", [Start, End, Cat]),
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> ~w;~n", [Start, End, Cat]),
gen_category(Fd, Rest, NextCat, CP, CP, All, Acc)
end
end;
Expand All @@ -1044,7 +1073,7 @@ gen_category(Fd, [{CP, {_, _, _, NewCat}}|Rest]=Cont, Cat, Start, End, All, Acc)
true ->
case Cat of
letter ->
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w-> subcat_letter(CP);~n",
io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> subcat_letter(CP);~n",
[Start, End]),
gen_category(Fd, Rest, NewCat, CP, CP, All,
lists:reverse(lists:seq(Start, End)) ++ Acc);
Expand Down

0 comments on commit 273faca

Please sign in to comment.