diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b51fd5936..4cbf372a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,17 +6,17 @@ repos: hooks: - id: check-merge-conflict - id: debug-statements - stages: [commit] + stages: [pre-commit] - id: end-of-file-fixer - stages: [commit] + stages: [pre-commit] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort - stages: [commit] + stages: [pre-commit] - repo: https://github.com/psf/black rev: 23.12.1 hooks: - id: black language_version: python3 - stages: [commit] + stages: [pre-commit] diff --git a/mathics/builtin/atomic/strings.py b/mathics/builtin/atomic/strings.py index 41ccf2722..6244b2190 100644 --- a/mathics/builtin/atomic/strings.py +++ b/mathics/builtin/atomic/strings.py @@ -15,26 +15,22 @@ from mathics.core.atoms import Integer, Integer0, Integer1, String from mathics.core.attributes import A_LISTABLE, A_PROTECTED -from mathics.core.builtin import Builtin, Predefined, PrefixOperator, Test +from mathics.core.builtin import Builtin, Predefined, PrefixOperator from mathics.core.convert.expression import to_mathics_list -from mathics.core.convert.python import from_bool -from mathics.core.convert.regex import to_regex from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression from mathics.core.parser import MathicsFileLineFeeder, parse -from mathics.core.symbols import Symbol, SymbolTrue from mathics.core.systemsymbols import ( SymbolFailed, SymbolInputForm, SymbolNone, SymbolOutputForm, + SymbolToExpression, ) -from mathics.eval.strings import eval_ToString +from mathics.eval.strings import eval_StringContainsQ, eval_ToString from mathics.settings import SYSTEM_CHARACTER_ENCODING -SymbolToExpression = Symbol("ToExpression") - # covers all of the variations. Here we just give some minimal basics # Data taken from: @@ -130,48 +126,6 @@ def push(i, iter, form): push(i, iter, form) -def _pattern_search(name, string, patt, evaluation, options, matched): - # Get the pattern list and check validity for each - if patt.has_form("List", None): - patts = patt.elements - else: - patts = [patt] - re_patts = [] - for p in patts: - py_p = to_regex(p, show_message=evaluation.message) - if py_p is None: - evaluation.message("StringExpression", "invld", p, patt) - return - re_patts.append(py_p) - - flags = re.MULTILINE - if options["System`IgnoreCase"] is SymbolTrue: - flags = flags | re.IGNORECASE - - def _search(patts, str, flags, matched): - if any(re.search(p, str, flags=flags) for p in patts): - return from_bool(matched) - return from_bool(not matched) - - # Check string validity and perform regex searchhing - if string.has_form("List", None): - py_s = [s.get_string_value() for s in string.elements] - if any(s is None for s in py_s): - evaluation.message( - name, "strse", Integer1, Expression(Symbol(name), string, patt) - ) - return - return to_mathics_list(*[_search(re_patts, s, flags, matched) for s in py_s]) - else: - py_s = string.get_string_value() - if py_s is None: - evaluation.message( - name, "strse", Integer1, Expression(Symbol(name), string, patt) - ) - return - return _search(re_patts, py_s, flags, matched) - - def anchor_pattern(patt): """ anchors a regex in order to force matching against an entire string. @@ -691,35 +645,11 @@ class StringContainsQ(Builtin): def eval(self, string, patt, evaluation: Evaluation, options: dict): "StringContainsQ[string_, patt_, OptionsPattern[%(name)s]]" - return _pattern_search( + return eval_StringContainsQ( self.__class__.__name__, string, patt, evaluation, options, True ) -class StringQ(Test): - """ - - :WMA link: - https://reference.wolfram.com/language/ref/StringQ.html -
-
'StringQ[$expr$]' -
returns 'True' if $expr$ is a 'String', or 'False' otherwise. -
- - >> StringQ["abc"] - = True - >> StringQ[1.5] - = False - >> Select[{"12", 1, 3, 5, "yz", x, y}, StringQ] - = {12, yz} - """ - - summary_text = "test whether an expression is a string" - - def test(self, expr) -> bool: - return isinstance(expr, String) - - class StringRepeat(Builtin): """ diff --git a/mathics/builtin/string/__init__.py b/mathics/builtin/string/__init__.py index 5db90bd50..d9abdaad9 100644 --- a/mathics/builtin/string/__init__.py +++ b/mathics/builtin/string/__init__.py @@ -2,3 +2,4 @@ Strings and Characters """ +# FIXME: Redo. This is a Tech note, not a Guide Section. diff --git a/mathics/builtin/string/characters.py b/mathics/builtin/string/characters.py index a7ba6b3bd..337cfb897 100644 --- a/mathics/builtin/string/characters.py +++ b/mathics/builtin/string/characters.py @@ -2,7 +2,7 @@ """ Characters in Strings """ - +# FIXME: Redo: this is part of a Tech note, not a guide section. from mathics.core.atoms import String from mathics.core.attributes import A_LISTABLE, A_PROTECTED, A_READ_PROTECTED @@ -72,74 +72,6 @@ def eval(self, start, stop, evaluation: Evaluation): return ListExpression(*[String(chr(code)) for code in range(start, stop + 1)]) -class DigitQ(Builtin): - """ - - :WMA link: - https://reference.wolfram.com/language/ref/DigitQ.html - -
-
'DigitQ[$string$]' -
yields 'True' if all the characters in the $string$ are \ - digits, and yields 'False' otherwise. - -
- - >> DigitQ["9"] - = True - - >> DigitQ["a"] - = False - - >> DigitQ["01001101011000010111010001101000011010010110001101110011"] - = True - - >> DigitQ["-123456789"] - = False - - """ - - rules = { - "DigitQ[string_]": ( - "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]" - ), - } - summary_text = "test whether all the characters are digits" - - -class LetterQ(Builtin): - """ - - :WMA link: - https://reference.wolfram.com/language/ref/LetterQ.html - -
-
'LetterQ[$string$]' -
yields 'True' if all the characters in the $string$ are \ - letters, and yields 'False' otherwise. -
- - >> LetterQ["m"] - = True - - >> LetterQ["9"] - = False - - >> LetterQ["Mathics"] - = True - - >> LetterQ["Welcome to Mathics"] - = False - """ - - rules = { - "LetterQ[string_]": ( - "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]" - ), - } - summary_text = "test whether all the characters are letters" - - class LowerCaseQ(Test): """ :WMA link:https://reference.wolfram.com/language/ref/LowerCaseQ.html @@ -159,8 +91,10 @@ class LowerCaseQ(Test): summary_text = "test whether all the characters are lower-case letters" - def test(self, s) -> bool: - return isinstance(s, String) and all(c.islower() for c in s.get_string_value()) + def test(self, expr) -> bool: + return isinstance(expr, String) and all( + c.islower() for c in expr.get_string_value() + ) class ToLowerCase(Builtin): @@ -224,5 +158,7 @@ class UpperCaseQ(Test): summary_text = "test whether all the characters are upper-case letters" - def test(self, s) -> bool: - return isinstance(s, String) and all(c.isupper() for c in s.get_string_value()) + def test(self, expr) -> bool: + return isinstance(expr, String) and all( + c.isupper() for c in expr.get_string_value() + ) diff --git a/mathics/builtin/string/operations.py b/mathics/builtin/string/operations.py index 89d477132..5250e18cd 100644 --- a/mathics/builtin/string/operations.py +++ b/mathics/builtin/string/operations.py @@ -11,7 +11,6 @@ _parallel_match, _StringFind, mathics_split, - to_regex, ) from mathics.core.atoms import Integer, Integer1, Integer3, String from mathics.core.attributes import ( @@ -23,6 +22,7 @@ ) from mathics.core.builtin import Builtin, InfixOperator from mathics.core.convert.python import from_python +from mathics.core.convert.regex import to_regex from mathics.core.evaluation import Evaluation from mathics.core.expression import BoxError, Expression, string_list from mathics.core.expression_predefined import MATHICS3_INFINITY diff --git a/mathics/builtin/string/patterns.py b/mathics/builtin/string/patterns.py index 3ebb4e5a5..f0edd5777 100644 --- a/mathics/builtin/string/patterns.py +++ b/mathics/builtin/string/patterns.py @@ -3,28 +3,14 @@ String Patterns """ -import re - -from mathics.builtin.atomic.strings import ( - _evaluate_match, - _parallel_match, - _pattern_search, - _StringFind, - anchor_pattern, - to_regex, -) -from mathics.core.atoms import Integer1, String -from mathics.core.attributes import A_FLAT, A_LISTABLE, A_ONE_IDENTITY, A_PROTECTED +from mathics.builtin.atomic.strings import _evaluate_match, _parallel_match, _StringFind +from mathics.core.atoms import String +from mathics.core.attributes import A_FLAT, A_ONE_IDENTITY, A_PROTECTED from mathics.core.builtin import Builtin, InfixOperator from mathics.core.evaluation import Evaluation -from mathics.core.expression import Expression from mathics.core.list import ListExpression -from mathics.core.symbols import Symbol, SymbolFalse, SymbolTrue from mathics.eval.strings import eval_StringFind -SymbolStringMatchQ = Symbol("StringMatchQ") -SymbolStringExpression = Symbol("StringExpression") - class DigitCharacter(Builtin): """ @@ -264,137 +250,6 @@ def eval(self, args, evaluation: Evaluation): return String("".join(args)) -class StringFreeQ(Builtin): - """ - :WMA link: - https://reference.wolfram.com/language/ref/StringFreeQ.html - -
-
'StringFreeQ["$string$", $patt$]' -
returns True if no substring in $string$ matches the string \ - expression $patt$, and returns False otherwise. - -
'StringFreeQ[{"s1", "s2", ...}, patt]' -
returns the list of results for each element of string list. - -
'StringFreeQ["string", {p1, p2, ...}]' -
returns True if no substring matches any of the $pi$. - -
'StringFreeQ[patt]' -
represents an operator form of StringFreeQ that can be applied \ - to an expression. -
- - >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"] - = False - - >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"] - = True - - >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True] - = False - - >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"] - = {True, True, True, False, False} - - - >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"} - = {False, False, False, True, True, True, True, True, False} - - >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True] - = {True, True, False, False, False} - - """ - - options = { - "IgnoreCase": "False", - } - - rules = { - "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]", - } - - summary_text = "test whether a string is free of substrings matching a pattern" - - def eval(self, string, patt, evaluation: Evaluation, options: dict): - "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]" - return _pattern_search( - self.__class__.__name__, string, patt, evaluation, options, False - ) - - -class StringMatchQ(Builtin): - r""" - :WMA link: - https://reference.wolfram.com/language/ref/StringMatchQ.html - -
-
'StringMatchQ["string", $pattern$]' -
checks is "string" matches $pattern$ -
- - >> StringMatchQ["abc", "abc"] - = True - - >> StringMatchQ["abc", "abd"] - = False - - >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..] - = True - - Use StringMatchQ as an operator - >> StringMatchQ[LetterCharacter]["a"] - = True - """ - - attributes = A_LISTABLE | A_PROTECTED - - options = { - "IgnoreCase": "False", - "SpellingCorrections": "None", - } - - rules = { - "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]", - } - summary_text = "test whether a string matches a pattern" - - def eval(self, string, patt, evaluation: Evaluation, options: dict): - "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]" - py_string = string.get_string_value() - if py_string is None: - evaluation.message( - "StringMatchQ", - "strse", - Integer1, - Expression(SymbolStringMatchQ, string, patt), - ) - return - - re_patt = to_regex( - patt, show_message=evaluation.message, abbreviated_patterns=True - ) - if re_patt is None: - evaluation.message( - "StringExpression", - "invld", - patt, - Expression(SymbolStringExpression, patt), - ) - return - - re_patt = anchor_pattern(re_patt) - - flags = re.MULTILINE - if options["System`IgnoreCase"] is SymbolTrue: - flags = flags | re.IGNORECASE - - if re.match(re_patt, py_string, flags=flags) is None: - return SymbolFalse - else: - return SymbolTrue - - class WhitespaceCharacter(Builtin): r""" :WMA link: @@ -421,7 +276,6 @@ class WhitespaceCharacter(Builtin): summary_text = "space, newline, tab, or other whitespace character" -# strings.to_regex() seems to have the implementation here. class WordBoundary(Builtin): """ :WMA link: diff --git a/mathics/builtin/testing_expressions/string_tests.py b/mathics/builtin/testing_expressions/string_tests.py new file mode 100644 index 000000000..f3a810ab7 --- /dev/null +++ b/mathics/builtin/testing_expressions/string_tests.py @@ -0,0 +1,287 @@ +""" +String Tests +""" + +import re + +from mathics_scanner import SingleLineFeeder, TranslateError + +from mathics.builtin.atomic.strings import anchor_pattern +from mathics.core.atoms import Integer1, String +from mathics.core.attributes import A_LISTABLE, A_PROTECTED +from mathics.core.builtin import Builtin, Test +from mathics.core.convert.regex import to_regex +from mathics.core.evaluation import Evaluation +from mathics.core.expression import Expression +from mathics.core.parser.util import parse +from mathics.core.symbols import Symbol, SymbolFalse, SymbolTrue +from mathics.core.systemsymbols import SymbolStringExpression, SymbolStringMatchQ +from mathics.eval.strings import eval_StringContainsQ + + +class DigitQ(Builtin): + """ + + :WMA link: + https://reference.wolfram.com/language/ref/DigitQ.html + +
+
'DigitQ[$string$]' +
yields 'True' if all the characters in the $string$ are \ + digits, and yields 'False' otherwise. + +
+ + >> DigitQ["9"] + = True + + >> DigitQ["a"] + = False + + >> DigitQ["01001101011000010111010001101000011010010110001101110011"] + = True + + >> DigitQ["-123456789"] + = False + + """ + + rules = { + "DigitQ[string_]": ( + "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]" + ), + } + summary_text = "test whether all the characters are digits" + + +class LetterQ(Builtin): + """ + + :WMA link: + https://reference.wolfram.com/language/ref/LetterQ.html + +
+
'LetterQ[$string$]' +
yields 'True' if all the characters in the $string$ are \ + letters, and yields 'False' otherwise. +
+ + >> LetterQ["m"] + = True + + >> LetterQ["9"] + = False + + >> LetterQ["Mathics"] + = True + + >> LetterQ["Welcome to Mathics"] + = False + """ + + rules = { + "LetterQ[string_]": ( + "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]" + ), + } + summary_text = "test whether all the characters are letters" + + +class StringFreeQ(Builtin): + """ + :WMA link: + https://reference.wolfram.com/language/ref/StringFreeQ.html + +
+
'StringFreeQ["$string$", $patt$]' +
returns True if no substring in $string$ matches the string \ + expression $patt$, and returns False otherwise. + +
'StringFreeQ[{"s1", "s2", ...}, patt]' +
returns the list of results for each element of string list. + +
'StringFreeQ["string", {p1, p2, ...}]' +
returns True if no substring matches any of the $pi$. + +
'StringFreeQ[patt]' +
represents an operator form of StringFreeQ that can be applied \ + to an expression. +
+ + >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"] + = False + + >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"] + = True + + >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True] + = False + + >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"] + = {True, True, True, False, False} + + + >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"} + = {False, False, False, True, True, True, True, True, False} + + >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True] + = {True, True, False, False, False} + + """ + + options = { + "IgnoreCase": "False", + } + + rules = { + "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]", + } + + summary_text = "test whether a string is free of substrings matching a pattern" + + def eval(self, string, patt, evaluation: Evaluation, options: dict): + "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]" + return eval_StringContainsQ( + self.__class__.__name__, string, patt, evaluation, options, False + ) + + +class StringMatchQ(Builtin): + r""" + :WMA link: + https://reference.wolfram.com/language/ref/StringMatchQ.html + +
+
'StringMatchQ["string", $pattern$]' +
checks is "string" matches $pattern$ +
+ + >> StringMatchQ["abc", "abc"] + = True + + >> StringMatchQ["abc", "abd"] + = False + + >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..] + = True + + Use StringMatchQ as an operator + >> StringMatchQ[LetterCharacter]["a"] + = True + """ + + attributes = A_LISTABLE | A_PROTECTED + + options = { + "IgnoreCase": "False", + "SpellingCorrections": "None", + } + + rules = { + "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]", + } + summary_text = "test whether a string matches a pattern" + + def eval(self, string, patt, evaluation: Evaluation, options: dict): + "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]" + py_string = string.get_string_value() + if py_string is None: + evaluation.message( + "StringMatchQ", + "strse", + Integer1, + Expression(SymbolStringMatchQ, string, patt), + ) + return + + re_patt = to_regex( + patt, show_message=evaluation.message, abbreviated_patterns=True + ) + if re_patt is None: + evaluation.message( + "StringExpression", + "invld", + patt, + Expression(SymbolStringExpression, patt), + ) + return + + re_patt = anchor_pattern(re_patt) + + flags = re.MULTILINE + if options["System`IgnoreCase"] is SymbolTrue: + flags = flags | re.IGNORECASE + + if re.match(re_patt, py_string, flags=flags) is None: + return SymbolFalse + else: + return SymbolTrue + + +class StringQ(Test): + """ + + :WMA link: + https://reference.wolfram.com/language/ref/StringQ.html +
+
'StringQ[$expr$]' +
returns 'True' if $expr$ is a 'String', or 'False' otherwise. +
+ + >> StringQ["abc"] + = True + >> StringQ[1.5] + = False + >> Select[{"12", 1, 3, 5, "yz", x, y}, StringQ] + = {12, yz} + """ + + summary_text = "test whether an expression is a string" + + def test(self, expr) -> bool: + return isinstance(expr, String) + + +class SyntaxQ(Builtin): + """ + + :WMA link: + https://reference.wolfram.com/language/ref/SyntaxQ.html +
+
'SyntaxQ["string"]' +
returns 'True' if "string" corresponds to a syntactically correct input for a Mathics3 expression, or 'False' otherwise. +
+ + >> SyntaxQ["a[b"] + = False + + >> SyntaxQ["a[b]"] + = True + """ + + # FIXME: + # replace messages[string] with the below. + # Better is to have this kind of thing done a function for doing this. + # Same things for "noopt" message which does not exist yet. + messages = {"string": "String expected at position `1` in `2`."} + + summary_text = ( + "test whether a string is a syntactically-correct a Mathics3 expression" + ) + + def eval(self, string, evaluation: Evaluation): + "SyntaxQ[string_]" + + if not isinstance(string, String): + evaluation.message( + "SyntaxQ", "string", Integer1, Expression(Symbol("SyntaxQ"), string) + ) + return + + feeder = SingleLineFeeder(string.value) + try: + parse(evaluation.definitions, feeder) + except TranslateError: + return SymbolFalse + else: + return SymbolTrue diff --git a/mathics/core/systemsymbols.py b/mathics/core/systemsymbols.py index 2f6d36f8a..46a343569 100644 --- a/mathics/core/systemsymbols.py +++ b/mathics/core/systemsymbols.py @@ -257,6 +257,7 @@ SymbolStringForm = Symbol("System`StringForm") SymbolStringInsert = Symbol("System`StringInsert") SymbolStringJoin = Symbol("System`StringJoin") +SymbolStringMatchQ = Symbol("System`StringMatchQ") SymbolStringPosition = Symbol("System`StringPosition") SymbolStringQ = Symbol("System`StringQ") SymbolStringRiffle = Symbol("System`StringRiffle") @@ -276,6 +277,7 @@ SymbolTimes = Symbol("System`Times") SymbolThrow = Symbol("System`Throw") SymbolThreshold = Symbol("System`Threshold") +SymbolToExpression = Symbol("System`ToExpression") SymbolToString = Symbol("System`ToString") SymbolTotal = Symbol("System`Total") SymbolTraditionalForm = Symbol("System`TraditionalForm") diff --git a/mathics/eval/strings.py b/mathics/eval/strings.py index 25af27fac..96a73a2c4 100644 --- a/mathics/eval/strings.py +++ b/mathics/eval/strings.py @@ -5,6 +5,7 @@ from mathics.core.atoms import Integer1, Integer3, String from mathics.core.convert.expression import to_mathics_list +from mathics.core.convert.python import from_bool from mathics.core.convert.regex import to_regex from mathics.core.element import BaseElement from mathics.core.evaluation import Evaluation @@ -24,6 +25,48 @@ def eval_ToString( return String(text) +def eval_StringContainsQ(name, string, patt, evaluation, options, matched): + # Get the pattern list and check validity for each + if patt.has_form("List", None): + patts = patt.elements + else: + patts = [patt] + re_patts = [] + for p in patts: + py_p = to_regex(p, show_message=evaluation.message) + if py_p is None: + evaluation.message("StringExpression", "invld", p, patt) + return + re_patts.append(py_p) + + flags = re.MULTILINE + if options["System`IgnoreCase"] is SymbolTrue: + flags = flags | re.IGNORECASE + + def _search(patts, str, flags, matched): + if any(re.search(p, str, flags=flags) for p in patts): + return from_bool(matched) + return from_bool(not matched) + + # Check string validity and perform regex searchhing + if string.has_form("List", None): + py_s = [s.get_string_value() for s in string.elements] + if any(s is None for s in py_s): + evaluation.message( + name, "strse", Integer1, Expression(Symbol(name), string, patt) + ) + return + return to_mathics_list(*[_search(re_patts, s, flags, matched) for s in py_s]) + else: + py_s = string.get_string_value() + if py_s is None: + evaluation.message( + name, "strse", Integer1, Expression(Symbol(name), string, patt) + ) + return + return _search(re_patts, py_s, flags, matched) + + def eval_StringFind(self, string, rule, n, evaluation, options, cases): if n.sameQ(Symbol("System`Private`Null")): expr = Expression(Symbol(self.get_name()), string, rule)