From 252a9306d308d1a045941a0e0fcd32cb2bcd3b59 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 21 Nov 2024 11:16:47 +0100 Subject: [PATCH] fixes #2009 --- .../rascal/grammar/tests/CharactersTests.rsc | 46 +++++++ .../values/parsetrees/SymbolFactory.java | 117 ++++-------------- 2 files changed, 73 insertions(+), 90 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/tests/CharactersTests.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/tests/CharactersTests.rsc index 7b9fc42153f..0ce18b21159 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/tests/CharactersTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/tests/CharactersTests.rsc @@ -2,6 +2,7 @@ module lang::rascal::grammar::tests::CharactersTests import lang::rascal::grammar::definition::Characters; import ParseTree; +import String; test bool testFlip() = \new-char-class([range(2,2), range(1,1)]) == \char-class([range(1,2)]); test bool testMerge() = \new-char-class([range(3,4), range(2,2), range(1,1)]) == \char-class([range(1,4)]); @@ -24,3 +25,48 @@ test bool testDiff1() = difference(\char-class([range(10,30)]), \char-class([ran test bool testDiff2() = difference(\char-class([range(10,30), range(40,50)]), \char-class([range(25,45)])) ==\char-class( [range(10,24), range(46,50)]); +test bool asciiEscape() = \char-class([range(0,127)]) == #[\a00-\a7F].symbol; +test bool utf16Escape() = \char-class([range(0,65535)]) == #[\u0000-\uFFFF].symbol; +test bool utf32Escape() = \char-class([range(0,1114111)]) == #[\U000000-\U10FFFF].symbol; +test bool highLowSurrogateRange1() = \char-class([range(9312,12991)]) == #[①-㊿].symbol; +test bool highLowSurrogateRange2() = \char-class([range(127829,127829)]) == #[🍕].symbol; +test bool differentEscapesSameResult1() = #[\a00-\a7F] == #[\u0000-\u007F]; +test bool differentEscapesSameResult2() = #[\a00-\a7F] == #[\U000000-\U00007F]; + +/* to avoid a known ambiguity */ +alias NotAZ = ![A-Z]; + +test bool unicodeCharacterClassSubtype1() { + Tree t = char(charAt("⑭", 0)); + + if ([①-㊿] circled := t) { + assert [⑭] _ := circled; + assert NotAZ _ := circled; + return true; + } + + return false; +} + +test bool unicodeCharacterClassSubtype2() { + Tree t = char(charAt("🍕", 0)); + + if ([🍕] pizza := t) { + assert [\a00-🍕] _ := pizza; + assert NotAZ _ := pizza; + return true; + } + + return false; +} + +test bool literalAsciiEscape1() = lit("\n") == #"\a0A".symbol; +test bool literalAsciiEscape2() = lit("w") == #"\a77".symbol; +test bool literalAsciiEscape3() = lit("\f") == #"\a0C".symbol; +test bool literalAsciiEscape4() = lit("\n") == #"\n".symbol; +@ignore{vallang must re-introduce the \f notation} +test bool literalAsciiEscape5() = lit("\f") == #"\f".symbol; +test bool literalUtf16Escape() = lit("\n") == #"\u000A".symbol; +test bool literalUtf32Escape1() = lit("\n") == #"\U00000A".symbol; +test bool literalUtf32Escape2() = lit("🍕") == #"\U01F355".symbol; + diff --git a/src/org/rascalmpl/values/parsetrees/SymbolFactory.java b/src/org/rascalmpl/values/parsetrees/SymbolFactory.java index ee6faf40483..cec5444b924 100644 --- a/src/org/rascalmpl/values/parsetrees/SymbolFactory.java +++ b/src/org/rascalmpl/values/parsetrees/SymbolFactory.java @@ -13,6 +13,8 @@ *******************************************************************************/ package org.rascalmpl.values.parsetrees; +import java.io.IOException; +import java.io.StringReader; import java.util.List; import org.apache.commons.lang3.ArrayUtils; @@ -35,6 +37,8 @@ import io.usethesource.vallang.IString; import io.usethesource.vallang.IValue; import io.usethesource.vallang.IValueFactory; +import io.usethesource.vallang.exceptions.FactTypeUseException; +import io.usethesource.vallang.io.StandardTextReader; import org.rascalmpl.values.RascalValueFactory; import org.rascalmpl.values.ValueFactoryFactory; @@ -198,101 +202,34 @@ private static IValue symbolAST2SymbolConstructor(Sym symbol, boolean lex, Strin } private static IValue literal2Symbol(StringConstant sep) { - String lit = ((StringConstant.Lexical) sep).getString(); - StringBuilder builder = new StringBuilder(lit.length()); - - // TODO: did we deal with all escapes here? probably not! - for (int i = 1; i < lit.length() - 1; i++) { - if (lit.charAt(i) == '\\') { - i++; - switch (lit.charAt(i)) { - case 'b': - builder.append('\b'); - break; - case 'f': - builder.append('\f'); - break; - case 'n': - builder.append('\n'); - break; - case 't': - builder.append('\t'); - break; - case 'r': - builder.append('\r'); - break; - case '\\': - builder.append('\\'); - break; - case '\"': - builder.append('\"'); - break; - case '>': - builder.append('>'); - break; - case '<': - builder.append('<'); - break; - case '\'': - builder.append('\''); - break; - case 'u': - while (lit.charAt(i++) == 'u'); - builder.append((char) Integer.decode("0x" + lit.substring(i, i+4)).intValue()); - i+=4; - break; - default: - // octal escape - int a = lit.charAt(i++); - int b = lit.charAt(i++); - int c = lit.charAt(i); - builder.append( (char) (100 * a + 10 * b + c)); - } - } - else { - builder.append(lit.charAt(i)); - } + try { + String lit = ((StringConstant.Lexical) sep).getString(); + // this should be the exact notation for string literals in vallang + IValue string = new StandardTextReader().read(factory, new StringReader(lit)); + + return factory.constructor(RascalValueFactory.Symbol_Lit, string); + } + catch (FactTypeUseException | IOException e) { + // this would mean Rascal's syntax definition for string constants is not aligned with vallang's string notation + throw new RuntimeException("Internal error: parsed stringconstant notation does not coincide with vallang stringconstant notation"); } - - return factory.constructor(RascalValueFactory.Symbol_Lit, factory.string(builder.toString())); } private static IValue ciliteral2Symbol(CaseInsensitiveStringConstant constant) { - String lit = ((CaseInsensitiveStringConstant.Lexical) constant).getString(); - StringBuilder builder = new StringBuilder(lit.length()); - - for (int i = 1; i < lit.length() - 1; i++) { - if (lit.charAt(i) == '\\') { - i++; - switch (lit.charAt(i)) { - case 'n': - builder.append('\n'); - break; - case 't': - builder.append('\t'); - break; - case 'r': - builder.append('\r'); - break; - case '\\': - builder.append('\\'); - break; - case '\"': - builder.append('\''); - break; - default: - int a = lit.charAt(i++); - int b = lit.charAt(i++); - int c = lit.charAt(i); - builder.append( (char) (100 * a + 10 * b + c)); - } - } - else { - builder.append(lit.charAt(i)); - } + try { + String lit = ((CaseInsensitiveStringConstant.Lexical) constant).getString(); + // replace single quotes by double quotes first + lit = "\"" + lit.substring(1, lit.length() - 1) + "\""; + + // this should be the exact notation for string literals in vallang + IValue string = new StandardTextReader().read(factory, new StringReader(lit)); + + return factory.constructor(RascalValueFactory.Symbol_Cilit, string); + } + catch (FactTypeUseException | IOException e) { + // this would mean Rascal's syntax definition for string constants is not aligned with vallang's string notation + throw new RuntimeException("Internal error: parsed stringconstant notation does not coincide with vallang stringconstant notation"); } - - return factory.constructor(RascalValueFactory.Symbol_Lit, factory.string(builder.toString())); } private static IConstructor charclass2Symbol(Class cc) {