Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplify Name-token and keyword detection, fix fragmented identifiers #72

Merged
merged 14 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import de.monticore.cdbasis._ast.ASTCDDefinition;
import de.monticore.cdbasis._ast.ASTCDType;
import de.monticore.grammar.MCGrammarSymbolTableHelper;
import de.monticore.grammar.RegExpBuilder;
import de.monticore.grammar.grammar._ast.*;
import de.monticore.grammar.grammar._symboltable.AdditionalAttributeSymbol;
import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol;
Expand All @@ -38,19 +37,19 @@
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;

public final class TransformationHelper {

public static final String DEFAULT_FILE_EXTENSION = ".java";

public static final String AST_PREFIX = "AST";

public static final String LIST_SUFFIX = "s";

public static final int STAR = -1;

@Deprecated // Use LexNamer.NAME_PATTERN after release 7.7.0
public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*");

protected static List<String> reservedCdNames = Arrays.asList(
// CD4A
"derived",
Expand Down Expand Up @@ -609,38 +608,5 @@ public static Optional<Integer> getMax(ASTAdditionalAttribute ast) {
}
return Optional.empty();
}

public static boolean isFragment(ASTProd astNode) {
return !(astNode instanceof ASTLexProd)
|| ((ASTLexProd) astNode).isFragment();
}

public static Optional<Pattern> calculateLexPattern(MCGrammarSymbol grammar,
ASTLexProd lexNode) {
Optional<Pattern> ret = Optional.empty();

final String lexString = getLexString(grammar, lexNode);
try {
if ("[[]".equals(lexString)) {
return Optional.ofNullable(Pattern.compile("[\\[]"));
} else {
return Optional.ofNullable(Pattern.compile(lexString));
}
} catch (PatternSyntaxException e) {
Log.error("0xA0913 Internal error with pattern handling for lex rules. Pattern: " + lexString
+ "\n", e);
}
return ret;
}

protected static String getLexString(MCGrammarSymbol grammar, ASTLexProd lexNode) {
StringBuilder builder = new StringBuilder();
RegExpBuilder regExp = new RegExpBuilder(builder, grammar);
Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser();
traverser.add4Grammar(regExp);
traverser.setGrammarHandler(regExp);
lexNode.accept(traverser);
return builder.toString();
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
package de.monticore.codegen.parser;

import com.google.common.collect.*;
import de.monticore.codegen.cd2java.DecorationHelper;
import de.monticore.codegen.mc2cd.TransformationHelper;
import de.monticore.grammar.DirectLeftRecursionDetector;
import de.monticore.grammar.LexNamer;
import de.monticore.grammar.MCGrammarSymbolTableHelper;
import de.monticore.grammar.PredicatePair;
Expand All @@ -24,10 +21,8 @@

import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Pattern;

import static de.monticore.codegen.mc2cd.TransformationHelper.calculateLexPattern;
import static de.monticore.codegen.mc2cd.TransformationHelper.isFragment;
import static de.monticore.codegen.mc2cd.TransformationHelper.NAME_PATTERN;

/**
* Contains information about a grammar which is required for the parser
Expand All @@ -39,23 +34,18 @@ public class MCGrammarInfo {
* Keywords of the processed grammar and its super grammars
*/
protected Set<String> keywords = Sets.newLinkedHashSet();

/**
* Lexer patterns
*/
protected Map<MCGrammarSymbol, List<Pattern>> lexerPatterns = new LinkedHashMap<>();

/**
* Additional java code for parser defined in antlr concepts of the processed
* grammar and its super grammars
*/
protected List<String> additionalParserJavaCode = new ArrayList<String>();
protected List<String> additionalParserJavaCode = new ArrayList<>();

/**
* Additional java code for lexer defined in antlr concepts of the processed
* grammar and its super grammars
*/
protected List<String> additionalLexerJavaCode = new ArrayList<String>();
protected List<String> additionalLexerJavaCode = new ArrayList<>();

/**
* Predicates
Expand All @@ -78,7 +68,6 @@ public class MCGrammarInfo {

public MCGrammarInfo(MCGrammarSymbol grammarSymbol) {
this.grammarSymbol = grammarSymbol;
buildLexPatterns();
findAllKeywords();
grammarSymbol.getTokenRulesWithInherited().forEach(t -> addSplitRule(t));
grammarSymbol.getKeywordRulesWithInherited().forEach(k -> keywordRules.add(k));
Expand Down Expand Up @@ -117,7 +106,7 @@ protected void addSubRules() {
for (MCGrammarSymbol grammar : grammarsToHandle) {
HashMap<String, List<ASTRuleReference>> ruleMap = Maps.newLinkedHashMap();
// Collect superclasses and superinterfaces for classes
for (ASTClassProd classProd : ((ASTMCGrammar) grammar.getAstNode())
for (ASTClassProd classProd : (grammar.getAstNode())
.getClassProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperRuleList());
Expand All @@ -126,17 +115,15 @@ protected void addSubRules() {
}

// Collect superclasses and superinterfaces for abstract classes
for (ASTAbstractProd classProd : ((ASTMCGrammar) grammar.getAstNode())
.getAbstractProdList()) {
for (ASTAbstractProd classProd : grammar.getAstNode().getAbstractProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperRuleList());
ruleRefs.addAll(classProd.getSuperInterfaceRuleList());
ruleMap.put(classProd.getName(), ruleRefs);
}

// Collect superinterfaces for interfaces
for (ASTInterfaceProd classProd : ((ASTMCGrammar) grammar.getAstNode())
.getInterfaceProdList()) {
for (ASTInterfaceProd classProd : grammar.getAstNode().getInterfaceProdList()) {
List<ASTRuleReference> ruleRefs = Lists.newArrayList();
ruleRefs.addAll(classProd.getSuperInterfaceRuleList());
ruleMap.put(classProd.getName(), ruleRefs);
Expand Down Expand Up @@ -166,20 +153,6 @@ protected void addSubrule(String superrule, String subrule, ASTRuleReference rul
}


protected Collection<String> addLeftRecursiveRuleForProd(ASTClassProd ast) {
List<ASTProd> superProds = TransformationHelper.getAllSuperProds(ast);
Collection<String> names = new ArrayList<>();
superProds.forEach(s -> names.add(s.getName()));
DirectLeftRecursionDetector detector = new DirectLeftRecursionDetector();
for (ASTAlt alt : ast.getAltList()) {
if (detector.isAlternativeLeftRecursive(alt, names)) {
names.add(ast.getName());
return names;
}
}
return Lists.newArrayList();
}

/**
* @return grammarSymbol
*/
Expand Down Expand Up @@ -248,45 +221,22 @@ public Set<String> getKeywords() {
}

/**
* Checks if the terminal or constant <code>name</code> is a and has to be
* defined in the parser.
* Checks if the terminal or constant <code>name</code> is a keyword and could
* be replaced by a name
*
* @param name - rule to check
* @return true, if the terminal or constant <code>name</code> is a and has to
* be defined in the parser.
* @return true, if the terminal or constant <code>name</code> is a keyword and could
* be replaced by a name
*/
public boolean isKeyword(String name, MCGrammarSymbol grammar) {
boolean matches = false;
boolean found = false;

// Check with options
if (mustBeKeyword(name)) {
matches = true;
found = true;
}

// Automatically detect if not specified
if (!found && lexerPatterns.containsKey(grammar)) {
for (Pattern p : lexerPatterns.get(grammar)) {

if (p.matcher(name).matches()) {
matches = true;
Log.debug(name + " is considered as a keyword because it matches " + p + " "
+ "(grammarsymtab)", MCGrammarSymbol.class.getSimpleName());
break;
}

}
}

return matches;
public boolean isKeyword(String name) {
return keywords.contains(name);
}

public List<PredicatePair> getSubRulesForParsing(String ruleName) {
// Consider superclass
Optional<ProdSymbol> ruleByName = grammarSymbol.getProdWithInherited(ruleName);
List<PredicatePair> predicateList = Lists.newArrayList();
if (!ruleByName.isPresent()) {
if (ruleByName.isEmpty()) {
return predicateList;
}

Expand Down Expand Up @@ -324,9 +274,7 @@ protected void findAllKeywords() {
}
}
}
Optional<MCGrammarSymbol> refGrammarSymbol = MCGrammarSymbolTableHelper
.getMCGrammarSymbol(astProd.getEnclosingScope());
TerminalVisitor tv = new TerminalVisitor(refGrammarSymbol);
TerminalVisitor tv = new TerminalVisitor();
Grammar_WithConceptsTraverser traverser = Grammar_WithConceptsMill.traverser();
traverser.add4Grammar(tv);
astProd.accept(traverser);
Expand All @@ -335,59 +283,9 @@ protected void findAllKeywords() {
}

}

protected void buildLexPatterns() {
buildLexPatterns(grammarSymbol);
grammarSymbol.getSuperGrammarSymbols().forEach(g -> buildLexPatterns(g));
}

protected void buildLexPatterns(MCGrammarSymbol grammar) {
List<Pattern> patterns = lexerPatterns.get(grammar);
if (patterns == null) {
patterns = new ArrayList<>();
lexerPatterns.put(grammar, patterns);
}

for (ProdSymbol rule : grammar.getProdsWithInherited().values()) {
if (rule.isPresentAstNode() && rule.isIsLexerProd()) {
if (!isFragment(rule.getAstNode())) {
Optional<Pattern> lexPattern = calculateLexPattern(
grammar,
(ASTLexProd) rule.getAstNode());

if (lexPattern.isPresent()) {
patterns.add(lexPattern.get());
}
}
}
}
}

public static String getListName(ASTNonTerminal a) {
String name;
if (a.isPresentUsageName()) {
name = a.getUsageName();
} else {
// Use Nonterminal name as attribute name starting with lower case
// for a list (iterated) nonterminal a 's' is added for the name
name = a.getName();
}
return name + DecorationHelper.GET_SUFFIX_LIST;
}


protected boolean mustBeKeyword(String rule) {
return keywords.contains(rule);
}

protected class TerminalVisitor implements GrammarVisitor2 {

TerminalVisitor(Optional<MCGrammarSymbol> refGrammarSymbol) {
this.refGrammarSymbol = refGrammarSymbol;
}

Optional<MCGrammarSymbol> refGrammarSymbol;

public GrammarTraverser getTraverser() {
return traverser;
}
Expand All @@ -400,16 +298,14 @@ public void setTraverser(GrammarTraverser traverser) {

@Override
public void visit(ASTTerminal keyword) {
if (isKeyword(keyword.getName(), grammarSymbol)
|| (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) {
if (NAME_PATTERN.matcher(keyword.getName()).matches()) {
keywords.add(keyword.getName());
}
}

@Override
public void visit(ASTConstant keyword) {
if (isKeyword(keyword.getName(), grammarSymbol)
|| (refGrammarSymbol.isPresent() && isKeyword(keyword.getName(), refGrammarSymbol.get()))) {
if (NAME_PATTERN.matcher(keyword.getName()).matches()) {
keywords.add(keyword.getName());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ public void handle(ASTConstantGroup ast) {

if (x.isPresentKeyConstant()) {
addToCodeSection(createKeyPredicate(x.getKeyConstant().getStringList(), tmpName + label));
} else if (!grammarInfo.isKeyword(x.getName(), grammarEntry)) {
} else if (!grammarInfo.isKeyword(x.getName())) {
addToCodeSection(tmpName + label + parserHelper.getOrComputeLexSymbolName(x.getName()));
} else if (grammarInfo.getKeywordRules().contains(x.getName())) {
addToCodeSection(tmpName + label + parserHelper.getKeyRuleName(x.getName()));
Expand Down Expand Up @@ -378,7 +378,7 @@ public void visit(ASTTerminal ast) {
String rulename;
if (ast.getName().isEmpty()) {
rulename = "";
} else if (grammarInfo.isKeyword(ast.getName(), grammarEntry) && grammarInfo.getKeywordRules().contains(ast.getName())) {
} else if (grammarInfo.isKeyword(ast.getName()) && grammarInfo.getKeywordRules().contains(ast.getName())) {
rulename = parserHelper.getKeyRuleName(ast.getName());
} else {
rulename = parserHelper.getOrComputeLexSymbolName(ast.getName().intern());
Expand Down Expand Up @@ -890,7 +890,7 @@ boolean getASTMax(ASTNonTerminal ast) {
protected void addActionForKeyword(ASTTerminal keyword, ProdSymbol rule, boolean isList, String tmpNamePlusLbl) {
addToCodeSection("(");
String rulename = "";
if (grammarInfo.isKeyword(keyword.getName(), grammarEntry)) {
if (grammarInfo.isKeyword(keyword.getName())) {
rulename = parserHelper.getOrComputeLexSymbolName(keyword.getName());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ public void handle(ASTTerminal node) {
protected String getRuleName(String name) {
if (name.isEmpty()) {
return "";
} else if (grammarInfo.isKeyword(name, parserGeneratorHelper.getGrammarSymbol()) && grammarInfo.getKeywordRules().contains(name)) {
} else if (grammarInfo.isKeyword(name) && grammarInfo.getKeywordRules().contains(name)) {
return parserGeneratorHelper.getKeyRuleName(name);
} else {
return parserGeneratorHelper.getCachedLexSymbolName(name.intern()).orElse("##no-usagename-for-rulename");
Expand Down Expand Up @@ -781,7 +781,7 @@ protected String getRuleName(ASTConstant constant) {
} else if (constant.isPresentTokenConstant()) {
return parserGeneratorHelper.getCachedLexSymbolName(constant.getTokenConstant().getString())
.orElse("##no-usagename-rulename-tc");
} else if (!grammarInfo.isKeyword(constant.getName(), parserGeneratorHelper.getGrammarSymbol())) {
} else if (!grammarInfo.isKeyword(constant.getName())) {
return parserGeneratorHelper.getCachedLexSymbolName(constant.getName())
.orElse("##no-usagename-rulename-k");
} else if (grammarInfo.getKeywordRules().contains(constant.getName())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ component grammar MCBasics {
token Name =
( 'a'..'z' | 'A'..'Z' | '_' | '$' )
( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' | '$' )*;

/*=================================================================*/

fragment token NEWLINE =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;

import de.monticore.grammar.grammar._symboltable.MCGrammarSymbol;
import de.se_rwth.commons.logging.Log;
Expand All @@ -16,7 +17,9 @@
*
*/
public class LexNamer {


public static final Pattern NAME_PATTERN = Pattern.compile("([a-z]|[A-Z]|[_]|[$])([a-z]|[A-Z]|[_]|[0-9]|[$])*");

protected int constantCounter = 0;

protected int lexCounter = 0;
Expand Down
Loading
Loading