Skip to content

Commit

Permalink
Add z_string:unaccent/1 (#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
mworrell authored Nov 24, 2024
1 parent 8528e70 commit 7dd5737
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/z_string.erl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
nospaces/1,
line/1,
len/1,
unaccent/1,
normalize/1,
to_rootname/1,
to_name/1,
Expand Down Expand Up @@ -518,6 +519,16 @@ to_name1(<<_/utf8, Rest/binary>>, Acc) ->
to_name1(Rest, <<Acc/binary, "_">>).


%% @doc Remove all accents from all characters.
-spec unaccent(S) -> S1 when
S :: unicode:chardata(),
S1 :: binary().
unaccent(S) ->
{ok, Re} = re:compile(<<"\\p{Mn}">>, [unicode]),
NFD = unicode:characters_to_nfd_binary(S),
WithoutAccents = re:replace(NFD, Re, <<>>, [global]),
unicode:characters_to_nfc_binary(WithoutAccents).


%% @doc Transliterate an unicode string to an ascii string with lowercase characters.
%% Tries to transliterate some characters to a..z
Expand Down Expand Up @@ -573,6 +584,8 @@ normalize(<<"ó"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"ò"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"Ó"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"Ò"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"ô"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"Ô"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"ß"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$s,$s>>);
normalize(<<"ç"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$c>>);
normalize(<<"Ç"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$c>>);
Expand Down

0 comments on commit 7dd5737

Please sign in to comment.