From 429dc57c9c1447c245174de70f08cfa1bae5437e Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Sat, 23 Nov 2024 13:41:10 +0100 Subject: [PATCH] Do not allow name characters immediately after a number In cases such as "123a45", the scanner did not report an error, instead returning two tokens '123' and 'a45'. The splitting point depends on the base, so e.g., "16#12fg34" was scanned as '16#12f' and 'g34'. This change makes the scanner reject numbers immediately followed by a name character. --- lib/stdlib/src/erl_scan.erl | 19 ++++++ lib/stdlib/test/erl_scan_SUITE.erl | 96 +++++++++++++++++------------- 2 files changed, 74 insertions(+), 41 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index 5b4893b4313b..d70317fe1c35 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -447,6 +447,15 @@ string_quote(_) -> $". %" (is_integer(C) andalso (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240))). -define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)). +-define(NAMECHAR(C), + (is_integer(C) andalso + (C >= $a andalso C =< $z orelse + C >= $A andalso C =< $Z orelse + C =:= $_ orelse + C >= $0 andalso C =< $9 orelse + C =:= $@ orelse + C >= $ß andalso C =< $ÿ andalso C =/= $÷ andalso + C >= $À andalso C =< $Þ andalso C =/= $×))). -define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)). -define(UNICODE(C), (is_integer(C) andalso @@ -1802,6 +1811,8 @@ scan_number([$_]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) -> scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us); +scan_number([$.,C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0); scan_number([$.]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> @@ -1818,6 +1829,8 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> %% Extremely unlikely to occur in practice. scan_error({illegal,base}, Line, Col, Line, Col, Cs0) end; +scan_number([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0); scan_number([]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}}; scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) -> @@ -1857,6 +1870,8 @@ scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) with_underscore); scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; +scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0); scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) -> @@ -1889,6 +1904,8 @@ scan_fraction([$_]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E -> scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us); +scan_fraction([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0); scan_fraction([]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}}; scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) -> @@ -1915,6 +1932,8 @@ scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore); scan_exponent([$_]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; +scan_exponent([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0); scan_exponent([]=Cs, St, Line, Col, Toks, Ncs, Us) -> {more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}}; scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) -> diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 28a67b28075c..64577554deaa 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -313,24 +313,37 @@ integers() -> fun({S, I}) -> test_string(S, [{integer, {1, 1}, I}]) end, UnderscoreSamples), - UnderscoreErrors = - ["123_", - "123__", - "123_456_", - "123__456", - "_123", + NotIntegers = + ["_123", "__123"], lists:foreach( fun(S) -> case erl_scan:string(S) of {ok, [{integer, _, _}|_], _} -> error({unexpected_integer, S}); - _ -> + {ok, _, _} -> ok end - end, UnderscoreErrors), - test_string("_123", [{var,{1,1},'_123'}]), - test_string("123_", [{integer,{1,1},123},{var,{1,4},'_'}]), + end, NotIntegers), + IntegerErrors = + ["123_", + "123__", + "123_456_", + "123__456", + "123_.456", + "123abc", + "12@"], + lists:foreach( + fun(S) -> + case erl_scan:string(S) of + {error,{1,erl_scan,{illegal,integer}},_} -> + ok; + {error,Err,_} -> + error({unexpected_error, S, Err}); + Succ -> + error({unexpected_success, S, Succ}) + end + end, IntegerErrors), ok. base_integers() -> @@ -350,8 +363,6 @@ base_integers() -> {error,{{1,1},erl_scan,{base,1000}},{1,6}} = erl_scan:string("1_000#000", {1,1}, []), - test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]), - [begin Str = BS ++ "#" ++ S, E = 2 + length(BS), @@ -360,12 +371,6 @@ base_integers() -> end || {BS,S} <- [{"3","3"},{"15","f"},{"12","c"}, {"1_5","f"},{"1_2","c"}] ], - {ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"), - {ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} = - erl_scan_string("16#ef@", {1,1}, []), - {ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} = - erl_scan_string("16#eg@", {1,1}, []), - UnderscoreSamples = [{"16#1234_ABCD_EF56", 16#1234abcdef56}, {"2#0011_0101_0011", 2#001101010011}, @@ -376,7 +381,7 @@ base_integers() -> fun({S, I}) -> test_string(S, [{integer, {1, 1}, I}]) end, UnderscoreSamples), - UnderscoreErrors = + IntegerErrors = ["16_#123ABC", "16#123_", "16#_123", @@ -384,17 +389,23 @@ base_integers() -> "16#_ABC", "2#_0101", "1__6#ABC", - "16#AB__CD"], + "16#AB__CD", + "16#eg", + "16#ef@", + "10_#", + "10#12a4", + "10#12A4"], lists:foreach( fun(S) -> case erl_scan:string(S) of - {ok, [{integer, _, _}|_], _} -> - error({unexpected_integer, S}); - _ -> - ok + {error,{1,erl_scan,{illegal,integer}},_} -> + ok; + {error,Err,_} -> + error({unexpected_error, S, Err}); + Succ -> + error({unexpected_success, S, Succ}) end - end, UnderscoreErrors), - test_string("16#123_", [{integer,{1,1},291},{var,{1,7},'_'}]), + end, IntegerErrors), test_string("_16#ABC", [{var,{1,1},'_16'},{'#',{1,4}},{var,{1,5},'ABC'}]), ok. @@ -405,7 +416,6 @@ floats() -> test_string(FS, Ts) end || FS <- ["1.0","001.17","3.31200","1.0e0","1.0E17", "34.21E-18", "17.0E+14"]], - test_string("1.e2", [{integer,{1,1},1},{'.',{1,2}},{atom,{1,3},e2}]), {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string("1.0e400"), @@ -430,25 +440,29 @@ floats() -> fun({S, I}) -> test_string(S, [{float, {1, 1}, I}]) end, UnderscoreSamples), - UnderscoreErrors = - ["123_.456", - "123._456", - "123.456_", - "123._", - "1._23e10", + FloatErrors = + ["123.456_", + "1.23_e10", "1.23e_10", - "1.23e10_"], + "1.23e10_", + "123.45_e6", + "123.45a12", + "123.45e23a12", + "1.e2", + "12._34", + "123.a4" + ], lists:foreach( fun(S) -> case erl_scan:string(S) of - {ok, [{float, _, _}|_], _} -> - error({unexpected_float, S}); - _ -> - ok + {error,{1,erl_scan,{illegal,float}},_} -> + ok; + {error,Err,_} -> + error({unexpected_error, S, Err}); + Succ -> + error({unexpected_success, S, Succ}) end - end, UnderscoreErrors), - test_string("123._", [{integer,{1,1},123},{'.',{1,4}},{var,{1,5},'_'}]), - test_string("1.23_e10", [{float,{1,1},1.23},{var,{1,5},'_e10'}]), + end, FloatErrors), ok. dots() ->