Skip to content

Commit

Permalink
Merge pull request #72 from MontiCore/unicode_identifier
Browse files Browse the repository at this point in the history
simplify Name-token and keyword detection, fix fragmented identifiers
  • Loading branch information
luepges authored Dec 30, 2024
2 parents 992bf30 + 569afa4 commit bb23e03
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 190 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import de.monticore.cdbasis._ast.ASTCDDefinition;
import de.monticore.cdbasis._ast.ASTCDType;
import de.monticore.grammar.MCGrammarSymbolTableHelper;
import de.monticore.grammar.RegExpBuilder;
import de.monticore.grammar.grammar._ast.*;
import de.monticore.grammar.grammar._symboltable.AdditionalAttributeSymbol;
import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol;
Expand All @@ -38,19 +37,19 @@
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;

public final class TransformationHelper {

public static final String DEFAULT_FILE_EXTENSION = ".java";

public static final String AST_PREFIX = "AST";

public static final String LIST_SUFFIX = "s";

public static final int STAR = -1;

@Deprecated // Use LexNamer.NAME_PATTERN after release 7.7.0
public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*");

protected static List<String> reservedCdNames = Arrays.asList(
// CD4A
"derived",
Expand Down Expand Up @@ -609,38 +608,5 @@ public static Optional<Integer> getMax(ASTAdditionalAttribute ast) {
}
return Optional.empty();
}

public static boolean isFragment(ASTProd astNode) {
return !(astNode instanceof ASTLexProd)
|| ((ASTLexProd) astNode).isFragment();
}

public static Optional<Pattern> calculateLexPattern(MCGrammarSymbol grammar,
ASTLexProd lexNode) {
Optional<Pattern> ret = Optional.empty();

final String lexString = getLexString(grammar, lexNode);
try {
if ("[[]".equals(lexString)) {
return Optional.ofNullable(Pattern.compile("[\\[]"));
} else {
return Optional.ofNullable(Pattern.compile(lexString));
}
} catch (PatternSyntaxException e) {
Log.error("0xA0913 Internal error with pattern handling for lex rules. Pattern: " + lexString
+ "\n", e);
}
return ret;
}

protected static String getLexString(MCGrammarSymbol grammar, ASTLexProd lexNode) {
StringBuilder builder = new StringBuilder();
RegExpBuilder regExp = new RegExpBuilder(builder, grammar);
Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser();
traverser.add4Grammar(regExp);
traverser.setGrammarHandler(regExp);
lexNode.accept(traverser);
return builder.toString();
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
package de.monticore.codegen.parser;

import com.google.common.collect.*;
import de.monticore.codegen.cd2java.DecorationHelper;
import de.monticore.codegen.mc2cd.TransformationHelper;
import de.monticore.grammar.DirectLeftRecursionDetector;
import de.monticore.grammar.LexNamer;
import de.monticore.grammar.MCGrammarSymbolTableHelper;
import de.monticore.grammar.PredicatePair;
Expand All @@ -24,10 +21,8 @@

import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Pattern;

import static de.monticore.codegen.mc2cd.TransformationHelper.calculateLexPattern;
import static de.monticore.codegen.mc2cd.TransformationHelper.isFragment;
import static de.monticore.codegen.mc2cd.TransformationHelper.NAME_PATTERN;

/**
* Contains information about a grammar which is required for the parser
Expand All @@ -39,23 +34,18 @@ public class MCGrammarInfo {
* Keywords of the processed grammar and its super grammars
*/
protected Set<String> keywords = Sets.newLinkedHashSet();

/**
* Lexer patterns
*/
protected Map<MCGrammarSymbol, List<Pattern>> lexerPatterns = new LinkedHashMap<>();

/**
* Additional java code for parser defined in antlr concepts of the processed
* grammar and its super grammars
*/
protected List<String> additionalParserJavaCode = new ArrayList<String>();
protected List<String> additionalParserJavaCode = new ArrayList<>();

/**
* Additional java code for lexer defined in antlr concepts of the processed
* grammar and its super grammars
*/
protected List<String> additionalLexerJavaCode = new ArrayList<String>();
protected List<String> additionalLexerJavaCode = new ArrayList<>();

/**
* Predicates
Expand All @@ -78,7 +68,6 @@ public class MCGrammarInfo {

public MCGrammarInfo(MCGrammarSymbol grammarSymbol) {
this.grammarSymbol = grammarSymbol;
buildLexPatterns();
findAllKeywords();
grammarSymbol.getTokenRulesWithInherited().forEach(t -> addSplitRule(t));
grammarSymbol.getKeywordRulesWithInherited().forEach(k -> keywordRules.add(k));
Expand Down Expand Up @@ -117,7 +106,7 @@ protected void addSubRules() {
for (MCGrammarSymbol grammar : grammarsToHandle) {
HashMap<String, List<ASTRuleReference>> ruleMap = Maps.newLinkedHashMap();
// Collect superclasses and superinterfaces for classes
for (ASTClassProd classProd : ((ASTMCGrammar) grammar.getAstNode())
for (ASTClassProd classProd : (grammar.getAstNode())
.getClassProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperRuleList());
Expand All @@ -126,17 +115,15 @@ protected void addSubRules() {
}

// Collect superclasses and superinterfaces for abstract classes
for (ASTAbstractProd classProd : ((ASTMCGrammar) grammar.getAstNode())
.getAbstractProdList()) {
for (ASTAbstractProd classProd : grammar.getAstNode().getAbstractProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperRuleList());
ruleRefs.addAll(classProd.getSuperInterfaceRuleList());
ruleMap.put(classProd.getName(), ruleRefs);
}

// Collect superinterfaces for interfaces
for (ASTInterfaceProd classProd : ((ASTMCGrammar) grammar.getAstNode())
.getInterfaceProdList()) {
for (ASTInterfaceProd classProd : grammar.getAstNode().getInterfaceProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperInterfaceRuleList());
ruleMap.put(classProd.getName(), ruleRefs);
Expand Down Expand Up @@ -166,20 +153,6 @@ protected void addSubrule(String superrule, String subrule, ASTRuleReference rul
}


protected Collection<String> addLeftRecursiveRuleForProd(ASTClassProd ast) {
List<ASTProd> superProds = TransformationHelper.getAllSuperProds(ast);
Collection<String> names = new ArrayList<>();
superProds.forEach(s -> names.add(s.getName()));
DirectLeftRecursionDetector detector = new DirectLeftRecursionDetector();
for (ASTAlt alt : ast.getAltList()) {
if (detector.isAlternativeLeftRecursive(alt, names)) {
names.add(ast.getName());
return names;
}
}
return Lists.newArrayList();
}

/**
* @return grammarSymbol
*/
Expand Down Expand Up @@ -248,45 +221,22 @@ public Set<String> getKeywords() {
}

/**
* Checks if the terminal or constant <code>name</code> is a and has to be
* defined in the parser.
* Checks if the terminal or constant <code>name</code> is a keyword and could
* be replaced by a name
*
* @param name - rule to check
* @return true, if the terminal or constant <code>name</code> is a and has to
* be defined in the parser.
* @return true, if the terminal or constant <code>name</code> is a keyword and could
* be replaced by a name
*/
public boolean isKeyword(String name, MCGrammarSymbol grammar) {
boolean matches = false;
boolean found = false;

// Check with options
if (mustBeKeyword(name)) {
matches = true;
found = true;
}

// Automatically detect if not specified
if (!found && lexerPatterns.containsKey(grammar)) {
for (Pattern p : lexerPatterns.get(grammar)) {

if (p.matcher(name).matches()) {
matches = true;
Log.debug(name + " is considered as a keyword because it matches " + p + " "
+ "(grammarsymtab)", MCGrammarSymbol.class.getSimpleName());
break;
}

}
}

return matches;
public boolean isKeyword(String name) {
return keywords.contains(name);
}

public List<PredicatePair> getSubRulesForParsing(String ruleName) {
// Consider superclass
Optional<ProdSymbol> ruleByName = grammarSymbol.getProdWithInherited(ruleName);
List<PredicatePair> predicateList = Lists.newArrayList();
if (!ruleByName.isPresent()) {
if (ruleByName.isEmpty()) {
return predicateList;
}

Expand Down Expand Up @@ -324,9 +274,7 @@ protected void findAllKeywords() {
}
}
}
Optional<MCGrammarSymbol> refGrammarSymbol = MCGrammarSymbolTableHelper
.getMCGrammarSymbol(astProd.getEnclosingScope());
TerminalVisitor tv = new TerminalVisitor(refGrammarSymbol);
TerminalVisitor tv = new TerminalVisitor();
Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser();
traverser.add4Grammar(tv);
astProd.accept(traverser);
Expand All @@ -335,59 +283,9 @@ protected void findAllKeywords() {
}

}

protected void buildLexPatterns() {
buildLexPatterns(grammarSymbol);
grammarSymbol.getSuperGrammarSymbols().forEach(g -> buildLexPatterns(g));
}

protected void buildLexPatterns(MCGrammarSymbol grammar) {
List<Pattern> patterns = lexerPatterns.get(grammar);
if (patterns == null) {
patterns = new ArrayList<>();
lexerPatterns.put(grammar, patterns);
}

for (ProdSymbol rule : grammar.getProdsWithInherited().values()) {
if (rule.isPresentAstNode() && rule.isIsLexerProd()) {
if (!isFragment(rule.getAstNode())) {
Optional<Pattern> lexPattern = calculateLexPattern(
grammar,
(ASTLexProd) rule.getAstNode());

if (lexPattern.isPresent()) {
patterns.add(lexPattern.get());
}
}
}
}
}

public static String getListName(ASTNonTerminal a) {
String name;
if (a.isPresentUsageName()) {
name = a.getUsageName();
} else {
// Use Nonterminal name as attribute name starting with lower case
// for a list (iterated) nonterminal a 's' is added for the name
name = a.getName();
}
return name + DecorationHelper.GET_SUFFIX_LIST;
}


protected boolean mustBeKeyword(String rule) {
return keywords.contains(rule);
}

protected class TerminalVisitor implements GrammarVisitor2 {

TerminalVisitor(Optional<MCGrammarSymbol> refGrammarSymbol) {
this.refGrammarSymbol = refGrammarSymbol;
}

Optional<MCGrammarSymbol> refGrammarSymbol;

public GrammarTraverser getTraverser() {
return traverser;
}
Expand All @@ -400,16 +298,14 @@ public void setTraverser(GrammarTraverser traverser) {

@Override
public void visit(ASTTerminal keyword) {
if (isKeyword(keyword.getName(), grammarSymbol)
|| (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) {
if (NAME_PATTERN.matcher(keyword.getName()).matches()) {
keywords.add(keyword.getName());
}
}

@Override
public void visit(ASTConstant keyword) {
if (isKeyword(keyword.getName(), grammarSymbol)
|| (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) {
if (NAME_PATTERN.matcher(keyword.getName()).matches()) {
keywords.add(keyword.getName());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ public void handle(ASTConstantGroup ast) {

if (x.isPresentKeyConstant()) {
addToCodeSection(createKeyPredicate(x.getKeyConstant().getStringList(), tmpName + label));
} else if (!grammarInfo.isKeyword(x.getName(), grammarEntry)) {
} else if (!grammarInfo.isKeyword(x.getName())) {
addToCodeSection(tmpName + label + parserHelper.getOrComputeLexSymbolName(x.getName()));
} else if (grammarInfo.getKeywordRules().contains(x.getName())) {
addToCodeSection(tmpName + label + parserHelper.getKeyRuleName(x.getName()));
Expand Down Expand Up @@ -378,7 +378,7 @@ public void visit(ASTTerminal ast) {
String rulename;
if (ast.getName().isEmpty()) {
rulename = "";
} else if (grammarInfo.isKeyword(ast.getName(), grammarEntry) && grammarInfo.getKeywordRules().contains(ast.getName())) {
} else if (grammarInfo.isKeyword(ast.getName()) && grammarInfo.getKeywordRules().contains(ast.getName())) {
rulename = parserHelper.getKeyRuleName(ast.getName());
} else {
rulename = parserHelper.getOrComputeLexSymbolName(ast.getName().intern());
Expand Down Expand Up @@ -890,7 +890,7 @@ boolean getASTMax(ASTNonTerminal ast) {
protected void addActionForKeyword(ASTTerminal keyword, ProdSymbol rule, boolean isList, String tmpNamePlusLbl) {
addToCodeSection("(");
String rulename = "";
if (grammarInfo.isKeyword(keyword.getName(), grammarEntry)) {
if (grammarInfo.isKeyword(keyword.getName())) {
rulename = parserHelper.getOrComputeLexSymbolName(keyword.getName());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ public void handle(ASTTerminal node) {
protected String getRuleName(String name) {
if (name.isEmpty()) {
return "";
} else if (grammarInfo.isKeyword(name, parserGeneratorHelper.getGrammarSymbol()) && grammarInfo.getKeywordRules().contains(name)) {
} else if (grammarInfo.isKeyword(name) && grammarInfo.getKeywordRules().contains(name)) {
return parserGeneratorHelper.getKeyRuleName(name);
} else {
return parserGeneratorHelper.getCachedLexSymbolName(name.intern()).orElse("##no-usagename-for-rulename");
Expand Down Expand Up @@ -781,7 +781,7 @@ protected String getRuleName(ASTConstant constant) {
} else if (constant.isPresentTokenConstant()) {
return parserGeneratorHelper.getCachedLexSymbolName(constant.getTokenConstant().getString())
.orElse("##no-usagename-rulename-tc");
} else if (!grammarInfo.isKeyword(constant.getName(), parserGeneratorHelper.getGrammarSymbol())) {
} else if (!grammarInfo.isKeyword(constant.getName())) {
return parserGeneratorHelper.getCachedLexSymbolName(constant.getName())
.orElse("##no-usagename-rulename-k");
} else if (grammarInfo.getKeywordRules().contains(constant.getName())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ component grammar MCBasics {
token Name =
( 'a'..'z' | 'A'..'Z' | '_' | '$' )
( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' | '$' )*;

/*=================================================================*/

fragment token NEWLINE =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;

import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol;
import de.se_rwth.commons.logging.Log;
Expand All @@ -16,7 +17,9 @@
*
*/
public class LexNamer {


public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*");

protected int constantCounter = 0;

protected int lexCounter = 0;
Expand Down
Loading

0 comments on commit bb23e03

Please sign in to comment.