From a6d1ade774b247e64c47706a9b65f427aef192ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20St=C3=BCber?= Date: Wed, 13 Nov 2024 13:50:39 +0100 Subject: [PATCH 01/11] Name Token as Unicode Identifier --- .../main/grammars/de/monticore/MCBasics.mc4 | 17 +++++++--- .../literals/TestMCCommonLiterals.mc4 | 1 + .../mcliterals/StringLiteralsTest.java | 31 ++++++++++++++++--- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 index 1d07234757..15731b2bea 100644 --- a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 +++ b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 @@ -17,10 +17,19 @@ component grammar MCBasics { This nonterminal is used as core reference mechanism e.g. for symbols. */ - token Name = - ( 'a'..'z' | 'A'..'Z' | '_' | '$' ) - ( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' | '$' )*; - + token Name = + + ( + ~('\u0000') + {Character.isUnicodeIdentifierStart(_input.LA(-1))}? + ) + + ( + ~('\u0000') + {Character.isJavaIdentifierPart(_input.LA(-1))}? + )* + ; + /*=================================================================*/ fragment token NEWLINE = diff --git a/monticore-grammar/src/test/grammars/de/monticore/literals/TestMCCommonLiterals.mc4 b/monticore-grammar/src/test/grammars/de/monticore/literals/TestMCCommonLiterals.mc4 index ff4096bc15..ad6625edff 100644 --- a/monticore-grammar/src/test/grammars/de/monticore/literals/TestMCCommonLiterals.mc4 +++ b/monticore-grammar/src/test/grammars/de/monticore/literals/TestMCCommonLiterals.mc4 @@ -3,5 +3,6 @@ package de.monticore.literals; grammar TestMCCommonLiterals extends de.monticore.literals.MCCommonLiterals { +A = Name; } diff --git a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java index 76f102eccf..9617fe9cec 100644 --- a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java +++ b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java @@ -5,6 +5,7 @@ import de.monticore.literals.mccommonliterals._ast.ASTStringLiteral; import de.monticore.literals.mcliteralsbasis._ast.ASTLiteral; import de.monticore.literals.testmccommonliterals.TestMCCommonLiteralsMill; +import de.monticore.literals.testmccommonliterals._ast.ASTA; import de.se_rwth.commons.logging.Log; import de.se_rwth.commons.logging.LogStub; import org.junit.jupiter.api.Assertions; @@ -12,6 +13,9 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; public class StringLiteralsTest { @@ -25,12 +29,31 @@ public void initLog() { private void checkStringLiteral(String expected, String actual) throws IOException { ASTLiteral lit = MCLiteralsTestHelper.getInstance().parseLiteral(actual); - Assertions.assertTrue(lit instanceof ASTStringLiteral); - Assertions.assertEquals(expected, ((ASTStringLiteral) lit).getValue()); + assertTrue(lit instanceof ASTStringLiteral); + assertEquals(expected, ((ASTStringLiteral) lit).getValue()); - Assertions.assertTrue(Log.getFindings().isEmpty()); + assertTrue(Log.getFindings().isEmpty()); } - + + @Test + public void testName() throws IOException { + Optional ast = TestMCCommonLiteralsMill.parser().parse_StringA("Meier"); + assertTrue(ast.isPresent()); + + ast = TestMCCommonLiteralsMill.parser().parse_StringA("Müller"); + assertTrue(ast.isPresent()); + + ast = TestMCCommonLiteralsMill.parser().parse_StringA("Vπ"); + assertTrue(ast.isPresent()); + assertEquals("Vπ", ast.get().getName()); + + assertFalse(Character.isUnicodeIdentifierStart('1')); + assertTrue(Character.isUnicodeIdentifierPart('1')); + + assertFalse(Character.isUnicodeIdentifierStart('.')); + assertFalse(Character.isUnicodeIdentifierPart('.')); + } + @Test public void testStringLiterals() { try { From d4cb9044457b32878c673cc8b674417673e1be17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20St=C3=BCber?= Date: Thu, 14 Nov 2024 09:14:36 +0100 Subject: [PATCH 02/11] try to parse the name --- .../de/monticore/mcliterals/StringLiteralsTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java index 9617fe9cec..2f31da3326 100644 --- a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java +++ b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java @@ -13,6 +13,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.util.List; import java.util.Optional; import static org.junit.jupiter.api.Assertions.*; @@ -47,11 +48,22 @@ public void testName() throws IOException { assertTrue(ast.isPresent()); assertEquals("Vπ", ast.get().getName()); + ast = TestMCCommonLiteralsMill.parser().parse_StringA("f"); + assertTrue(ast.isPresent()); + + assertFalse(Character.isUnicodeIdentifierStart('1')); assertTrue(Character.isUnicodeIdentifierPart('1')); assertFalse(Character.isUnicodeIdentifierStart('.')); assertFalse(Character.isUnicodeIdentifierPart('.')); + + List notAllowed = List.of('.', '(', ')', '+', ',', '/', ' '); + for (Character s : notAllowed) { + assertFalse(Character.isUnicodeIdentifierPart(s), "Character <"+s+"> is not allowed"); + assertFalse(Character.isUnicodeIdentifierStart(s), "Character <"+s+"> is not allowed"); + } + } @Test From 0413a156735252aa3d6897201b96e064cfd5e52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20St=C3=BCber?= Date: Thu, 14 Nov 2024 09:19:55 +0100 Subject: [PATCH 03/11] Parse a, b, c, d AND e --- .../java/de/monticore/mcliterals/StringLiteralsTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java index 2f31da3326..0b65e9c004 100644 --- a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java +++ b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java @@ -48,9 +48,12 @@ public void testName() throws IOException { assertTrue(ast.isPresent()); assertEquals("Vπ", ast.get().getName()); - ast = TestMCCommonLiteralsMill.parser().parse_StringA("f"); - assertTrue(ast.isPresent()); - + List souldParseName = List.of("a", "b", "c", "d", "e", "f", "g", "h"); + for (String s : souldParseName) { + ast = TestMCCommonLiteralsMill.parser().parse_StringA(s); + assertTrue(ast.isPresent(), "Could not parse string '" + s + "'"); + assertEquals(s, ast.get().getName()); + } assertFalse(Character.isUnicodeIdentifierStart('1')); assertTrue(Character.isUnicodeIdentifierPart('1')); From 089d594693abb73975f16073d4da817be5a7053b Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Thu, 21 Nov 2024 12:20:15 +0100 Subject: [PATCH 04/11] Update PatternMatching for keyrules --- .../codegen/mc2cd/TransformationHelper.java | 41 +------ .../codegen/parser/MCGrammarInfo.java | 100 +++--------------- 2 files changed, 17 insertions(+), 124 deletions(-) diff --git a/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java b/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java index 0ec4fecd64..884d47f813 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java @@ -14,7 +14,6 @@ import de.monticore.cdbasis._ast.ASTCDDefinition; import de.monticore.cdbasis._ast.ASTCDType; import de.monticore.grammar.MCGrammarSymbolTableHelper; -import de.monticore.grammar.RegExpBuilder; import de.monticore.grammar.grammar._ast.*; import de.monticore.grammar.grammar._symboltable.AdditionalAttributeSymbol; import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol; @@ -38,19 +37,18 @@ import java.io.IOException; import java.util.*; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; public final class TransformationHelper { - public static final String DEFAULT_FILE_EXTENSION = ".java"; - public static final String AST_PREFIX = "AST"; public static final String LIST_SUFFIX = "s"; public static final int STAR = -1; + public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*"); + protected static List reservedCdNames = Arrays.asList( // CD4A "derived", @@ -609,38 +607,5 @@ public static Optional getMax(ASTAdditionalAttribute ast) { } return Optional.empty(); } - - public static boolean isFragment(ASTProd astNode) { - return !(astNode instanceof ASTLexProd) - || ((ASTLexProd) astNode).isFragment(); - } - - public static Optional calculateLexPattern(MCGrammarSymbol grammar, - ASTLexProd lexNode) { - Optional ret = Optional.empty(); - - final String lexString = getLexString(grammar, lexNode); - try { - if ("[[]".equals(lexString)) { - return Optional.ofNullable(Pattern.compile("[\\[]")); - } else { - return Optional.ofNullable(Pattern.compile(lexString)); - } - } catch (PatternSyntaxException e) { - Log.error("0xA0913 Internal error with pattern handling for lex rules. Pattern: " + lexString - + "\n", e); - } - return ret; - } - - protected static String getLexString(MCGrammarSymbol grammar, ASTLexProd lexNode) { - StringBuilder builder = new StringBuilder(); - RegExpBuilder regExp = new RegExpBuilder(builder, grammar); - Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser(); - traverser.add4Grammar(regExp); - traverser.setGrammarHandler(regExp); - lexNode.accept(traverser); - return builder.toString(); - } - + } diff --git a/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java b/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java index 382acc1d23..fee344e2a6 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java @@ -3,9 +3,6 @@ package de.monticore.codegen.parser; import com.google.common.collect.*; -import de.monticore.codegen.cd2java.DecorationHelper; -import de.monticore.codegen.mc2cd.TransformationHelper; -import de.monticore.grammar.DirectLeftRecursionDetector; import de.monticore.grammar.LexNamer; import de.monticore.grammar.MCGrammarSymbolTableHelper; import de.monticore.grammar.PredicatePair; @@ -24,10 +21,8 @@ import java.util.*; import java.util.Map.Entry; -import java.util.regex.Pattern; -import static de.monticore.codegen.mc2cd.TransformationHelper.calculateLexPattern; -import static de.monticore.codegen.mc2cd.TransformationHelper.isFragment; +import static de.monticore.codegen.mc2cd.TransformationHelper.NAME_PATTERN; /** * Contains information about a grammar which is required for the parser @@ -39,23 +34,18 @@ public class MCGrammarInfo { * Keywords of the processed grammar and its super grammars */ protected Set keywords = Sets.newLinkedHashSet(); - - /** - * Lexer patterns - */ - protected Map> lexerPatterns = new LinkedHashMap<>(); /** * Additional java code for parser defined in antlr concepts of the processed * grammar and its super grammars */ - protected List additionalParserJavaCode = new ArrayList(); + protected List additionalParserJavaCode = new ArrayList<>(); /** * Additional java code for lexer defined in antlr concepts of the processed * grammar and its super grammars */ - protected List additionalLexerJavaCode = new ArrayList(); + protected List additionalLexerJavaCode = new ArrayList<>(); /** * Predicates @@ -78,7 +68,6 @@ public class MCGrammarInfo { public MCGrammarInfo(MCGrammarSymbol grammarSymbol) { this.grammarSymbol = grammarSymbol; - buildLexPatterns(); findAllKeywords(); grammarSymbol.getTokenRulesWithInherited().forEach(t -> addSplitRule(t)); grammarSymbol.getKeywordRulesWithInherited().forEach(k -> keywordRules.add(k)); @@ -117,7 +106,7 @@ protected void addSubRules() { for (MCGrammarSymbol grammar : grammarsToHandle) { HashMap> ruleMap = Maps.newLinkedHashMap(); // Collect superclasses and superinterfaces for classes - for (ASTClassProd classProd : ((ASTMCGrammar) grammar.getAstNode()) + for (ASTClassProd classProd : (grammar.getAstNode()) .getClassProdList()) { List ruleRefs = Lists.newArrayList(); ruleRefs.addAll(classProd.getSuperRuleList()); @@ -126,8 +115,7 @@ protected void addSubRules() { } // Collect superclasses and superinterfaces for abstract classes - for (ASTAbstractProd classProd : ((ASTMCGrammar) grammar.getAstNode()) - .getAbstractProdList()) { + for (ASTAbstractProd classProd : grammar.getAstNode().getAbstractProdList()) { List ruleRefs = Lists.newArrayList(); ruleRefs.addAll(classProd.getSuperRuleList()); ruleRefs.addAll(classProd.getSuperInterfaceRuleList()); @@ -135,8 +123,7 @@ protected void addSubRules() { } // Collect superinterfaces for interfaces - for (ASTInterfaceProd classProd : ((ASTMCGrammar) grammar.getAstNode()) - .getInterfaceProdList()) { + for (ASTInterfaceProd classProd : grammar.getAstNode().getInterfaceProdList()) { List ruleRefs = Lists.newArrayList(); ruleRefs.addAll(classProd.getSuperInterfaceRuleList()); ruleMap.put(classProd.getName(), ruleRefs); @@ -166,20 +153,6 @@ protected void addSubrule(String superrule, String subrule, ASTRuleReference rul } - protected Collection addLeftRecursiveRuleForProd(ASTClassProd ast) { - List superProds = TransformationHelper.getAllSuperProds(ast); - Collection names = new ArrayList<>(); - superProds.forEach(s -> names.add(s.getName())); - DirectLeftRecursionDetector detector = new DirectLeftRecursionDetector(); - for (ASTAlt alt : ast.getAltList()) { - if (detector.isAlternativeLeftRecursive(alt, names)) { - names.add(ast.getName()); - return names; - } - } - return Lists.newArrayList(); - } - /** * @return grammarSymbol */ @@ -264,21 +237,16 @@ public boolean isKeyword(String name, MCGrammarSymbol grammar) { matches = true; found = true; } - + // Automatically detect if not specified - if (!found && lexerPatterns.containsKey(grammar)) { - for (Pattern p : lexerPatterns.get(grammar)) { - - if (p.matcher(name).matches()) { - matches = true; - Log.debug(name + " is considered as a keyword because it matches " + p + " " - + "(grammarsymtab)", MCGrammarSymbol.class.getSimpleName()); - break; - } - + if (!found) { + if (NAME_PATTERN.matcher(name).matches()) { + matches = true; + Log.debug(name + " is considered as a keyword because it matches " + NAME_PATTERN + " " + + "(grammarsymtab)", MCGrammarSymbol.class.getSimpleName()); } } - + return matches; } @@ -286,7 +254,7 @@ public List getSubRulesForParsing(String ruleName) { // Consider superclass Optional ruleByName = grammarSymbol.getProdWithInherited(ruleName); List predicateList = Lists.newArrayList(); - if (!ruleByName.isPresent()) { + if (ruleByName.isEmpty()) { return predicateList; } @@ -335,46 +303,6 @@ protected void findAllKeywords() { } } - - protected void buildLexPatterns() { - buildLexPatterns(grammarSymbol); - grammarSymbol.getSuperGrammarSymbols().forEach(g -> buildLexPatterns(g)); - } - - protected void buildLexPatterns(MCGrammarSymbol grammar) { - List patterns = lexerPatterns.get(grammar); - if (patterns == null) { - patterns = new ArrayList<>(); - lexerPatterns.put(grammar, patterns); - } - - for (ProdSymbol rule : grammar.getProdsWithInherited().values()) { - if (rule.isPresentAstNode() && rule.isIsLexerProd()) { - if (!isFragment(rule.getAstNode())) { - Optional lexPattern = calculateLexPattern( - grammar, - (ASTLexProd) rule.getAstNode()); - - if (lexPattern.isPresent()) { - patterns.add(lexPattern.get()); - } - } - } - } - } - - public static String getListName(ASTNonTerminal a) { - String name; - if (a.isPresentUsageName()) { - name = a.getUsageName(); - } else { - // Use Nonterminal name as attribute name starting with lower case - // for a list (iterated) nonterminal a 's' is added for the name - name = a.getName(); - } - return name + DecorationHelper.GET_SUFFIX_LIST; - } - protected boolean mustBeKeyword(String rule) { return keywords.contains(rule); From 4e027fbecc6092cdd970ec0e87a1d87a485a42df Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Thu, 21 Nov 2024 12:20:37 +0100 Subject: [PATCH 05/11] Update definition of Name --- .../main/grammars/de/monticore/MCBasics.mc4 | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 index 15731b2bea..f3b733fafd 100644 --- a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 +++ b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 @@ -19,14 +19,22 @@ component grammar MCBasics { */ token Name = - ( - ~('\u0000') - {Character.isUnicodeIdentifierStart(_input.LA(-1))}? + ( ('a'..'z' | 'A'..'Z' | '$' | '_') // these are the "java letters" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') + {Character.isJavaIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') + {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? ) - ( - ~('\u0000') + (('a'..'z' | 'A'..'Z' | '0'..'9' | '$' | '_') // these are the "java letters" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') {Character.isJavaIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') + { Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? )* ; From 356e76fce3b7965ef3ef7e2817b3713cd71a9906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20St=C3=BCber?= Date: Tue, 26 Nov 2024 11:25:42 +0100 Subject: [PATCH 06/11] Unicode Identifier instead of Java Identifiert, and refactored test --- .../src/main/grammars/de/monticore/MCBasics.mc4 | 8 ++++---- .../de/monticore/mcliterals/StringLiteralsTest.java | 12 +++--------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 index f3b733fafd..aa4f83b8f1 100644 --- a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 +++ b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 @@ -22,19 +22,19 @@ component grammar MCBasics { ( ('a'..'z' | 'A'..'Z' | '$' | '_') // these are the "java letters" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') - {Character.isJavaIdentifierStart(_input.LA(-1))}? + {Character.isUnicodeIdentifierStart(_input.LA(-1))}? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') - {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + {Character.isUnicodeIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? ) (('a'..'z' | 'A'..'Z' | '0'..'9' | '$' | '_') // these are the "java letters" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') - {Character.isJavaIdentifierPart(_input.LA(-1))}? + {Character.isUnicodeIdentifierPart(_input.LA(-1))}? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') - { Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + { Character.isUnicodeIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? )* ; diff --git a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java index 0b65e9c004..045cc06145 100644 --- a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java +++ b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java @@ -41,15 +41,9 @@ public void testName() throws IOException { Optional ast = TestMCCommonLiteralsMill.parser().parse_StringA("Meier"); assertTrue(ast.isPresent()); - ast = TestMCCommonLiteralsMill.parser().parse_StringA("Müller"); - assertTrue(ast.isPresent()); - - ast = TestMCCommonLiteralsMill.parser().parse_StringA("Vπ"); - assertTrue(ast.isPresent()); - assertEquals("Vπ", ast.get().getName()); - - List souldParseName = List.of("a", "b", "c", "d", "e", "f", "g", "h"); - for (String s : souldParseName) { + List shouldParseName = List.of("Müller", "Vπ", "a", "b", "c" + , "d", "e", "f", "g", "h"); + for (String s : shouldParseName) { ast = TestMCCommonLiteralsMill.parser().parse_StringA(s); assertTrue(ast.isPresent(), "Could not parse string '" + s + "'"); assertEquals(s, ast.get().getName()); From 1878659c692a1ee4590c96ff7e782b968629fe56 Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Tue, 17 Dec 2024 13:01:54 +0100 Subject: [PATCH 07/11] Rework the recognition of keywords --- .../codegen/parser/MCGrammarInfo.java | 50 ++++--------------- .../codegen/parser/antlr/Grammar2Antlr.java | 6 +-- .../parser/antlr/Grammar2ParseVisitor.java | 4 +- 3 files changed, 14 insertions(+), 46 deletions(-) diff --git a/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java b/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java index fee344e2a6..4973580d41 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/parser/MCGrammarInfo.java @@ -221,33 +221,15 @@ public Set getKeywords() { } /** - * Checks if the terminal or constant name is a and has to be - * defined in the parser. + * Checks if the terminal or constant name is a keyword and could + * be replaced by a name * * @param name - rule to check - * @return true, if the terminal or constant name is a and has to - * be defined in the parser. + * @return true, if the terminal or constant name is a keyword and could + * be replaced by a name */ - public boolean isKeyword(String name, MCGrammarSymbol grammar) { - boolean matches = false; - boolean found = false; - - // Check with options - if (mustBeKeyword(name)) { - matches = true; - found = true; - } - - // Automatically detect if not specified - if (!found) { - if (NAME_PATTERN.matcher(name).matches()) { - matches = true; - Log.debug(name + " is considered as a keyword because it matches " + NAME_PATTERN + " " - + "(grammarsymtab)", MCGrammarSymbol.class.getSimpleName()); - } - } - - return matches; + public boolean isKeyword(String name) { + return keywords.contains(name); } public List getSubRulesForParsing(String ruleName) { @@ -292,9 +274,7 @@ protected void findAllKeywords() { } } } - Optional refGrammarSymbol = MCGrammarSymbolTableHelper - .getMCGrammarSymbol(astProd.getEnclosingScope()); - TerminalVisitor tv = new TerminalVisitor(refGrammarSymbol); + TerminalVisitor tv = new TerminalVisitor(); Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser(); traverser.add4Grammar(tv); astProd.accept(traverser); @@ -304,18 +284,8 @@ protected void findAllKeywords() { } - protected boolean mustBeKeyword(String rule) { - return keywords.contains(rule); - } - protected class TerminalVisitor implements GrammarVisitor2 { - TerminalVisitor(Optional refGrammarSymbol) { - this.refGrammarSymbol = refGrammarSymbol; - } - - Optional refGrammarSymbol; - public GrammarTraverser getTraverser() { return traverser; } @@ -328,16 +298,14 @@ public void setTraverser(GrammarTraverser traverser) { @Override public void visit(ASTTerminal keyword) { - if (isKeyword(keyword.getName(), grammarSymbol) - || (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) { + if (NAME_PATTERN.matcher(keyword.getName()).matches()) { keywords.add(keyword.getName()); } } @Override public void visit(ASTConstant keyword) { - if (isKeyword(keyword.getName(), grammarSymbol) - || (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) { + if (NAME_PATTERN.matcher(keyword.getName()).matches()) { keywords.add(keyword.getName()); } } diff --git a/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2Antlr.java b/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2Antlr.java index 884aa03352..ba85941786 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2Antlr.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2Antlr.java @@ -258,7 +258,7 @@ public void handle(ASTConstantGroup ast) { if (x.isPresentKeyConstant()) { addToCodeSection(createKeyPredicate(x.getKeyConstant().getStringList(), tmpName + label)); - } else if (!grammarInfo.isKeyword(x.getName(), grammarEntry)) { + } else if (!grammarInfo.isKeyword(x.getName())) { addToCodeSection(tmpName + label + parserHelper.getOrComputeLexSymbolName(x.getName())); } else if (grammarInfo.getKeywordRules().contains(x.getName())) { addToCodeSection(tmpName + label + parserHelper.getKeyRuleName(x.getName())); @@ -378,7 +378,7 @@ public void visit(ASTTerminal ast) { String rulename; if (ast.getName().isEmpty()) { rulename = ""; - } else if (grammarInfo.isKeyword(ast.getName(), grammarEntry) && grammarInfo.getKeywordRules().contains(ast.getName())) { + } else if (grammarInfo.isKeyword(ast.getName()) && grammarInfo.getKeywordRules().contains(ast.getName())) { rulename = parserHelper.getKeyRuleName(ast.getName()); } else { rulename = parserHelper.getOrComputeLexSymbolName(ast.getName().intern()); @@ -890,7 +890,7 @@ boolean getASTMax(ASTNonTerminal ast) { protected void addActionForKeyword(ASTTerminal keyword, ProdSymbol rule, boolean isList, String tmpNamePlusLbl) { addToCodeSection("("); String rulename = ""; - if (grammarInfo.isKeyword(keyword.getName(), grammarEntry)) { + if (grammarInfo.isKeyword(keyword.getName())) { rulename = parserHelper.getOrComputeLexSymbolName(keyword.getName()); } diff --git a/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2ParseVisitor.java b/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2ParseVisitor.java index 3da1d41fea..8023151a6a 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2ParseVisitor.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/parser/antlr/Grammar2ParseVisitor.java @@ -636,7 +636,7 @@ public void handle(ASTTerminal node) { protected String getRuleName(String name) { if (name.isEmpty()) { return ""; - } else if (grammarInfo.isKeyword(name, parserGeneratorHelper.getGrammarSymbol()) && grammarInfo.getKeywordRules().contains(name)) { + } else if (grammarInfo.isKeyword(name) && grammarInfo.getKeywordRules().contains(name)) { return parserGeneratorHelper.getKeyRuleName(name); } else { return parserGeneratorHelper.getCachedLexSymbolName(name.intern()).orElse("##no-usagename-for-rulename"); @@ -781,7 +781,7 @@ protected String getRuleName(ASTConstant constant) { } else if (constant.isPresentTokenConstant()) { return parserGeneratorHelper.getCachedLexSymbolName(constant.getTokenConstant().getString()) .orElse("##no-usagename-rulename-tc"); - } else if (!grammarInfo.isKeyword(constant.getName(), parserGeneratorHelper.getGrammarSymbol())) { + } else if (!grammarInfo.isKeyword(constant.getName())) { return parserGeneratorHelper.getCachedLexSymbolName(constant.getName()) .orElse("##no-usagename-rulename-k"); } else if (grammarInfo.getKeywordRules().contains(constant.getName())) { From ad152184a07494edf3639df70595a99e6b159ce5 Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Wed, 18 Dec 2024 10:01:45 +0100 Subject: [PATCH 08/11] don't add keys twice --- .../grammar/grammar/_symboltable/GrammarScopesGenitor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/monticore-grammar/src/main/java/de/monticore/grammar/grammar/_symboltable/GrammarScopesGenitor.java b/monticore-grammar/src/main/java/de/monticore/grammar/grammar/_symboltable/GrammarScopesGenitor.java index a5412e4c2f..28855c122e 100644 --- a/monticore-grammar/src/main/java/de/monticore/grammar/grammar/_symboltable/GrammarScopesGenitor.java +++ b/monticore-grammar/src/main/java/de/monticore/grammar/grammar/_symboltable/GrammarScopesGenitor.java @@ -104,7 +104,6 @@ public void visit (ASTTerminal node) { public void visit(ASTKeyTerminal node) { // only create a symbol for ASTKeyTerminals that have a usage name // only with usage name is shown in AST - grammarSymbol.noKeywords.addAll(node.getKeyConstant().getStringList()); if(node.isPresentUsageName()){ RuleComponentSymbolBuilder symbolBuilder = GrammarMill.ruleComponentSymbolBuilder().setName(node.getName()); symbolBuilder.setName(node.isPresentUsageName()?node.getUsageName():""); From 873acabc14430600d5666e101fb66e7dc566f4cc Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Wed, 18 Dec 2024 10:03:35 +0100 Subject: [PATCH 09/11] use getKeywordRulesWithInherited --- .../grammar/cocos/KeyRuleWithoutName.java | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleWithoutName.java b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleWithoutName.java index 7f8361d878..aa17e4c51b 100644 --- a/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleWithoutName.java +++ b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleWithoutName.java @@ -2,14 +2,9 @@ package de.monticore.grammar.cocos; -import de.monticore.grammar.grammar.GrammarMill; -import de.monticore.grammar.grammar._ast.ASTGrammarNode; -import de.monticore.grammar.grammar._ast.ASTKeyConstant; import de.monticore.grammar.grammar._ast.ASTMCGrammar; import de.monticore.grammar.grammar._cocos.GrammarASTMCGrammarCoCo; import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol; -import de.monticore.grammar.grammar._visitor.GrammarTraverser; -import de.monticore.grammar.grammar._visitor.GrammarVisitor2; import de.se_rwth.commons.logging.Log; /** @@ -25,24 +20,10 @@ public class KeyRuleWithoutName implements GrammarASTMCGrammarCoCo { public void check(ASTMCGrammar gr) { MCGrammarSymbol grSymbol = gr.getSymbol(); if (!gr.isComponent() && !grSymbol.getProdWithInherited("Name").isPresent()) { - if (!gr.getKeywordRuleList().isEmpty() || new FindKeyConstant().getResult(gr)) { + if (!grSymbol.getKeywordRulesWithInherited().isEmpty()) { Log.error(ERROR_CODE + ERROR_MSG_FORMAT, gr.get_SourcePositionStart()); } } } - protected class FindKeyConstant implements GrammarVisitor2 { - protected boolean hasKeyConstant = false; - - public boolean getResult(ASTGrammarNode ast) { - GrammarTraverser traverser = GrammarMill.traverser(); - traverser.add4Grammar(this); - ast.accept(traverser); - return hasKeyConstant; - } - - public void visit(ASTKeyConstant ast) { - hasKeyConstant = true; - } - } } From b45fa337bf53d96fd14c9154b79248d07a17bb30 Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Wed, 18 Dec 2024 10:07:11 +0100 Subject: [PATCH 10/11] new coco KeyRuleMatchingSimpleName --- .../codegen/mc2cd/TransformationHelper.java | 1 + .../java/de/monticore/grammar/LexNamer.java | 5 ++- .../monticore/grammar/cocos/GrammarCoCos.java | 1 + .../cocos/KeyRuleMatchingSimpleName.java | 31 +++++++++++++++++ .../cocos/KeyRuleMatchingSimpleNameTest.java | 33 +++++++++++++++++++ .../grammar/cocos/invalid/A0145/A0145.mc4 | 9 +++++ .../grammar/cocos/invalid/A0145/A0145a.mc4 | 11 +++++++ 7 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleName.java create mode 100644 monticore-grammar/src/test/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleNameTest.java create mode 100644 monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145.mc4 create mode 100644 monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145a.mc4 diff --git a/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java b/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java index 884d47f813..417a56a3b7 100644 --- a/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java +++ b/monticore-generator/src/main/java/de/monticore/codegen/mc2cd/TransformationHelper.java @@ -47,6 +47,7 @@ public final class TransformationHelper { public static final int STAR = -1; + @Deprecated // Use LexNamer.NAME_PATTERN after release 7.7.0 public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*"); protected static List reservedCdNames = Arrays.asList( diff --git a/monticore-grammar/src/main/java/de/monticore/grammar/LexNamer.java b/monticore-grammar/src/main/java/de/monticore/grammar/LexNamer.java index 69f4c6a5df..60ab5d5055 100644 --- a/monticore-grammar/src/main/java/de/monticore/grammar/LexNamer.java +++ b/monticore-grammar/src/main/java/de/monticore/grammar/LexNamer.java @@ -6,6 +6,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.regex.Pattern; import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol; import de.se_rwth.commons.logging.Log; @@ -16,7 +17,9 @@ * */ public class LexNamer { - + + public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*"); + protected int constantCounter = 0; protected int lexCounter = 0; diff --git a/monticore-grammar/src/main/java/de/monticore/grammar/cocos/GrammarCoCos.java b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/GrammarCoCos.java index b148ccb19d..c1c65004a8 100644 --- a/monticore-grammar/src/main/java/de/monticore/grammar/cocos/GrammarCoCos.java +++ b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/GrammarCoCos.java @@ -75,6 +75,7 @@ public Grammar_WithConceptsCoCoChecker getCoCoChecker() { checker.addCoCo(new ExternalNTNoASTRule()); checker.addCoCo(new DerivedAndManualListName()); checker.addCoCo(new KeyRuleWithoutName()); + checker.addCoCo(new KeyRuleMatchingSimpleName()); checker.addCoCo(new SymbolWithManyNames()); checker.addCoCo(new OverridingAdditionalAttributes()); checker.addCoCo(new NoExtensionOfSymbolThatOnlySpansScope()); diff --git a/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleName.java b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleName.java new file mode 100644 index 0000000000..e3c2c156b4 --- /dev/null +++ b/monticore-grammar/src/main/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleName.java @@ -0,0 +1,31 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore.grammar.cocos; + +import de.monticore.grammar.grammar._ast.ASTMCGrammar; +import de.monticore.grammar.grammar._cocos.GrammarASTMCGrammarCoCo; +import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol; +import de.se_rwth.commons.logging.Log; + +import static de.monticore.grammar.LexNamer.NAME_PATTERN; + +/** + * Checks that keywords that are replaced by names also match "Name" + */ +public class KeyRuleMatchingSimpleName implements GrammarASTMCGrammarCoCo { + + public static final String ERROR_CODE = "0xA0145"; + + public static final String ERROR_MSG_FORMAT = "The keyword %s does not match the standard name pattern."; + + @Override + public void check(ASTMCGrammar gr) { + MCGrammarSymbol grSymbol = gr.getSymbol(); + for (String str: grSymbol.getNoKeywordsList()) { + if (!NAME_PATTERN.matcher(str).matches()) { + Log.error(String.format(ERROR_CODE + ERROR_MSG_FORMAT, str), gr.get_SourcePositionStart()); + } + } + } + +} diff --git a/monticore-grammar/src/test/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleNameTest.java b/monticore-grammar/src/test/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleNameTest.java new file mode 100644 index 0000000000..adb3db2b52 --- /dev/null +++ b/monticore-grammar/src/test/java/de/monticore/grammar/cocos/KeyRuleMatchingSimpleNameTest.java @@ -0,0 +1,33 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore.grammar.cocos; + +import de.monticore.grammar.grammar_withconcepts._cocos.Grammar_WithConceptsCoCoChecker; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class KeyRuleMatchingSimpleNameTest extends CocoTest{ + private final String grammar = "de.monticore.grammar.cocos.invalid.A0145.A0145"; + + @BeforeEach + public void init() { + checker = new Grammar_WithConceptsCoCoChecker(); + checker.addCoCo(new KeyRuleMatchingSimpleName()); + } + + @Test + public void testInvalid() { + testInvalidGrammar(grammar, KeyRuleMatchingSimpleName.ERROR_CODE, String.format(KeyRuleMatchingSimpleName.ERROR_MSG_FORMAT, "foo&"), checker); + } + + @Test + public void testInvalid2() { + testInvalidGrammar(grammar+"a", KeyRuleMatchingSimpleName.ERROR_CODE, String.format(KeyRuleMatchingSimpleName.ERROR_MSG_FORMAT, "foo&"), checker); + } + + @Test + public void testCorrect(){ + testValidGrammar("de.monticore.grammar.cocos.valid.Attributes", checker); + } + +} diff --git a/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145.mc4 b/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145.mc4 new file mode 100644 index 0000000000..93adccca7e --- /dev/null +++ b/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145.mc4 @@ -0,0 +1,9 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore.grammar.cocos.invalid.A0145; + +grammar A0145 { + + Foo = key("foo&"); + +} diff --git a/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145a.mc4 b/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145a.mc4 new file mode 100644 index 0000000000..ef041a329c --- /dev/null +++ b/monticore-grammar/src/test/resources/de/monticore/grammar/cocos/invalid/A0145/A0145a.mc4 @@ -0,0 +1,11 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore.grammar.cocos.invalid.A0145; + +grammar A0145a { + + Foo = "foo&"; + + nokeyword "foo&"; + +} From 569afa4ae92d299b3b5278c9719445455d6bae1c Mon Sep 17 00:00:00 2001 From: Marita Breuer Date: Wed, 18 Dec 2024 10:52:53 +0100 Subject: [PATCH 11/11] test overriding "Name" --- .../main/grammars/de/monticore/MCBasics.mc4 | 23 +------- .../de/monticore/TestOverrideMCBasics.mc4 | 31 ++++++++++ .../de/monticore/OverrideMCBasicsTest.java | 57 +++++++++++++++++++ .../mcliterals/StringLiteralsTest.java | 27 --------- 4 files changed, 91 insertions(+), 47 deletions(-) create mode 100644 monticore-grammar/src/test/grammars/de/monticore/TestOverrideMCBasics.mc4 create mode 100644 monticore-grammar/src/test/java/de/monticore/OverrideMCBasicsTest.java diff --git a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 index aa4f83b8f1..b3fa8ee064 100644 --- a/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 +++ b/monticore-grammar/src/main/grammars/de/monticore/MCBasics.mc4 @@ -17,26 +17,9 @@ component grammar MCBasics { This nonterminal is used as core reference mechanism e.g. for symbols. */ - token Name = - - ( ('a'..'z' | 'A'..'Z' | '$' | '_') // these are the "java letters" below 0x7F - | // covers all characters above 0x7F which are not a surrogate - ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') - {Character.isUnicodeIdentifierStart(_input.LA(-1))}? - | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF - ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') - {Character.isUnicodeIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? - ) - - (('a'..'z' | 'A'..'Z' | '0'..'9' | '$' | '_') // these are the "java letters" below 0x7F - | // covers all characters above 0x7F which are not a surrogate - ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') - {Character.isUnicodeIdentifierPart(_input.LA(-1))}? - | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF - ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') - { Character.isUnicodeIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? - )* - ; + token Name = + ( 'a'..'z' | 'A'..'Z' | '_' | '$' ) + ( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' | '$' )*; /*=================================================================*/ diff --git a/monticore-grammar/src/test/grammars/de/monticore/TestOverrideMCBasics.mc4 b/monticore-grammar/src/test/grammars/de/monticore/TestOverrideMCBasics.mc4 new file mode 100644 index 0000000000..4765566c83 --- /dev/null +++ b/monticore-grammar/src/test/grammars/de/monticore/TestOverrideMCBasics.mc4 @@ -0,0 +1,31 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore; + +grammar TestOverrideMCBasics extends de.monticore.MCBasics { + + @Override + token Name = + + ( ('a'..'z' | 'A'..'Z' | '$' | '_') // these are the "java letters" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') + {Character.isUnicodeIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') + {Character.isUnicodeIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ) + + (('a'..'z' | 'A'..'Z' | '0'..'9' | '$' | '_') // these are the "java letters" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~('\u0000'..'\u007F' | '\uD800'..'\uDBFF') + {Character.isUnicodeIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + ('\uD800'..'\uDBFF') ('\uDC00'..'\uDFFF') + { Character.isUnicodeIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + )* + ; + + Foo = Name; + +} diff --git a/monticore-grammar/src/test/java/de/monticore/OverrideMCBasicsTest.java b/monticore-grammar/src/test/java/de/monticore/OverrideMCBasicsTest.java new file mode 100644 index 0000000000..2dcb266d31 --- /dev/null +++ b/monticore-grammar/src/test/java/de/monticore/OverrideMCBasicsTest.java @@ -0,0 +1,57 @@ +/* (c) https://github.com/MontiCore/monticore */ + +package de.monticore; + +import de.monticore.testoverridemcbasics.TestOverrideMCBasicsMill; +import de.monticore.testoverridemcbasics._ast.ASTFoo; +import de.se_rwth.commons.logging.Log; +import de.se_rwth.commons.logging.LogStub; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; + +public class OverrideMCBasicsTest { + + @BeforeEach + public void initLog() { + LogStub.init(); + Log.enableFailQuick(false); + TestOverrideMCBasicsMill.reset(); + TestOverrideMCBasicsMill.init(); + } + + @Test + public void testName() throws IOException { + Optional ast = TestOverrideMCBasicsMill.parser().parse_StringFoo("Meier"); + assertTrue(ast.isPresent()); + + List shouldParseName = List.of("Müller", "Vπ", "a", "b", "c" + , "d", "e", "f", "g", "h"); + for (String s : shouldParseName) { + ast = TestOverrideMCBasicsMill.parser().parse_StringFoo(s); + assertTrue(ast.isPresent(), "Could not parse string '" + s + "'"); + assertEquals(s, ast.get().getName()); + } + } + + @Test + public void testUnicodeMethods() { + assertFalse(Character.isUnicodeIdentifierStart('1')); + assertTrue(Character.isUnicodeIdentifierPart('1')); + + assertFalse(Character.isUnicodeIdentifierStart('.')); + assertFalse(Character.isUnicodeIdentifierPart('.')); + + List notAllowed = List.of('.', '(', ')', '+', ',', '/', ' '); + for (Character s : notAllowed) { + assertFalse(Character.isUnicodeIdentifierPart(s), "Character <"+s+"> is not allowed"); + assertFalse(Character.isUnicodeIdentifierStart(s), "Character <"+s+"> is not allowed"); + } + } + +} diff --git a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java index 045cc06145..02e77b5d7b 100644 --- a/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java +++ b/monticore-grammar/src/test/java/de/monticore/mcliterals/StringLiteralsTest.java @@ -36,33 +36,6 @@ private void checkStringLiteral(String expected, String actual) throws IOExcepti assertTrue(Log.getFindings().isEmpty()); } - @Test - public void testName() throws IOException { - Optional ast = TestMCCommonLiteralsMill.parser().parse_StringA("Meier"); - assertTrue(ast.isPresent()); - - List shouldParseName = List.of("Müller", "Vπ", "a", "b", "c" - , "d", "e", "f", "g", "h"); - for (String s : shouldParseName) { - ast = TestMCCommonLiteralsMill.parser().parse_StringA(s); - assertTrue(ast.isPresent(), "Could not parse string '" + s + "'"); - assertEquals(s, ast.get().getName()); - } - - assertFalse(Character.isUnicodeIdentifierStart('1')); - assertTrue(Character.isUnicodeIdentifierPart('1')); - - assertFalse(Character.isUnicodeIdentifierStart('.')); - assertFalse(Character.isUnicodeIdentifierPart('.')); - - List notAllowed = List.of('.', '(', ')', '+', ',', '/', ' '); - for (Character s : notAllowed) { - assertFalse(Character.isUnicodeIdentifierPart(s), "Character <"+s+"> is not allowed"); - assertFalse(Character.isUnicodeIdentifierStart(s), "Character <"+s+"> is not allowed"); - } - - } - @Test public void testStringLiterals() { try {