Skip to content

Commit

Permalink
fixes #2009
Browse files Browse the repository at this point in the history
  • Loading branch information
jurgenvinju committed Nov 21, 2024
1 parent 11f6a42 commit 252a930
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module lang::rascal::grammar::tests::CharactersTests

import lang::rascal::grammar::definition::Characters;
import ParseTree;
import String;

test bool testFlip() = \new-char-class([range(2,2), range(1,1)]) == \char-class([range(1,2)]);
test bool testMerge() = \new-char-class([range(3,4), range(2,2), range(1,1)]) == \char-class([range(1,4)]);
Expand All @@ -24,3 +25,48 @@ test bool testDiff1() = difference(\char-class([range(10,30)]), \char-class([ran
test bool testDiff2() = difference(\char-class([range(10,30), range(40,50)]), \char-class([range(25,45)])) ==\char-class( [range(10,24), range(46,50)]);


test bool asciiEscape() = \char-class([range(0,127)]) == #[\a00-\a7F].symbol;
test bool utf16Escape() = \char-class([range(0,65535)]) == #[\u0000-\uFFFF].symbol;
test bool utf32Escape() = \char-class([range(0,1114111)]) == #[\U000000-\U10FFFF].symbol;
test bool highLowSurrogateRange1() = \char-class([range(9312,12991)]) == #[①-㊿].symbol;
test bool highLowSurrogateRange2() = \char-class([range(127829,127829)]) == #[🍕].symbol;
test bool differentEscapesSameResult1() = #[\a00-\a7F] == #[\u0000-\u007F];
test bool differentEscapesSameResult2() = #[\a00-\a7F] == #[\U000000-\U00007F];

/* to avoid a known ambiguity */
alias NotAZ = ![A-Z];

test bool unicodeCharacterClassSubtype1() {
Tree t = char(charAt("⑭", 0));

if ([①-㊿] circled := t) {
assert [⑭] _ := circled;
assert NotAZ _ := circled;
return true;
}

return false;
}

test bool unicodeCharacterClassSubtype2() {
Tree t = char(charAt("🍕", 0));

if ([🍕] pizza := t) {
assert [\a00-🍕] _ := pizza;
assert NotAZ _ := pizza;
return true;
}

return false;
}

test bool literalAsciiEscape1() = lit("\n") == #"\a0A".symbol;
test bool literalAsciiEscape2() = lit("w") == #"\a77".symbol;
test bool literalAsciiEscape3() = lit("\f") == #"\a0C".symbol;
test bool literalAsciiEscape4() = lit("\n") == #"\n".symbol;
@ignore{vallang must re-introduce the \f notation}
test bool literalAsciiEscape5() = lit("\f") == #"\f".symbol;
test bool literalUtf16Escape() = lit("\n") == #"\u000A".symbol;
test bool literalUtf32Escape1() = lit("\n") == #"\U00000A".symbol;
test bool literalUtf32Escape2() = lit("🍕") == #"\U01F355".symbol;

117 changes: 27 additions & 90 deletions src/org/rascalmpl/values/parsetrees/SymbolFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
*******************************************************************************/
package org.rascalmpl.values.parsetrees;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.apache.commons.lang3.ArrayUtils;
Expand All @@ -35,6 +37,8 @@
import io.usethesource.vallang.IString;
import io.usethesource.vallang.IValue;
import io.usethesource.vallang.IValueFactory;
import io.usethesource.vallang.exceptions.FactTypeUseException;
import io.usethesource.vallang.io.StandardTextReader;

import org.rascalmpl.values.RascalValueFactory;
import org.rascalmpl.values.ValueFactoryFactory;
Expand Down Expand Up @@ -198,101 +202,34 @@ private static IValue symbolAST2SymbolConstructor(Sym symbol, boolean lex, Strin
}

private static IValue literal2Symbol(StringConstant sep) {
String lit = ((StringConstant.Lexical) sep).getString();
StringBuilder builder = new StringBuilder(lit.length());

// TODO: did we deal with all escapes here? probably not!
for (int i = 1; i < lit.length() - 1; i++) {
if (lit.charAt(i) == '\\') {
i++;
switch (lit.charAt(i)) {
case 'b':
builder.append('\b');
break;
case 'f':
builder.append('\f');
break;
case 'n':
builder.append('\n');
break;
case 't':
builder.append('\t');
break;
case 'r':
builder.append('\r');
break;
case '\\':
builder.append('\\');
break;
case '\"':
builder.append('\"');
break;
case '>':
builder.append('>');
break;
case '<':
builder.append('<');
break;
case '\'':
builder.append('\'');
break;
case 'u':
while (lit.charAt(i++) == 'u');
builder.append((char) Integer.decode("0x" + lit.substring(i, i+4)).intValue());
i+=4;
break;
default:
// octal escape
int a = lit.charAt(i++);
int b = lit.charAt(i++);
int c = lit.charAt(i);
builder.append( (char) (100 * a + 10 * b + c));
}
}
else {
builder.append(lit.charAt(i));
}
try {
String lit = ((StringConstant.Lexical) sep).getString();
// this should be the exact notation for string literals in vallang
IValue string = new StandardTextReader().read(factory, new StringReader(lit));

return factory.constructor(RascalValueFactory.Symbol_Lit, string);
}
catch (FactTypeUseException | IOException e) {

Check warning on line 212 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L212

Added line #L212 was not covered by tests
// this would mean Rascal's syntax definition for string constants is not aligned with vallang's string notation
throw new RuntimeException("Internal error: parsed stringconstant notation does not coincide with vallang stringconstant notation");

Check warning on line 214 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L214

Added line #L214 was not covered by tests
}

return factory.constructor(RascalValueFactory.Symbol_Lit, factory.string(builder.toString()));
}

private static IValue ciliteral2Symbol(CaseInsensitiveStringConstant constant) {
String lit = ((CaseInsensitiveStringConstant.Lexical) constant).getString();
StringBuilder builder = new StringBuilder(lit.length());

for (int i = 1; i < lit.length() - 1; i++) {
if (lit.charAt(i) == '\\') {
i++;
switch (lit.charAt(i)) {
case 'n':
builder.append('\n');
break;
case 't':
builder.append('\t');
break;
case 'r':
builder.append('\r');
break;
case '\\':
builder.append('\\');
break;
case '\"':
builder.append('\'');
break;
default:
int a = lit.charAt(i++);
int b = lit.charAt(i++);
int c = lit.charAt(i);
builder.append( (char) (100 * a + 10 * b + c));
}
}
else {
builder.append(lit.charAt(i));
}
try {
String lit = ((CaseInsensitiveStringConstant.Lexical) constant).getString();

Check warning on line 220 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L220

Added line #L220 was not covered by tests
// replace single quotes by double quotes first
lit = "\"" + lit.substring(1, lit.length() - 1) + "\"";

Check warning on line 222 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L222

Added line #L222 was not covered by tests

// this should be the exact notation for string literals in vallang
IValue string = new StandardTextReader().read(factory, new StringReader(lit));

Check warning on line 225 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L225

Added line #L225 was not covered by tests

return factory.constructor(RascalValueFactory.Symbol_Cilit, string);

Check warning on line 227 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L227

Added line #L227 was not covered by tests
}
catch (FactTypeUseException | IOException e) {

Check warning on line 229 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L229

Added line #L229 was not covered by tests
// this would mean Rascal's syntax definition for string constants is not aligned with vallang's string notation
throw new RuntimeException("Internal error: parsed stringconstant notation does not coincide with vallang stringconstant notation");

Check warning on line 231 in src/org/rascalmpl/values/parsetrees/SymbolFactory.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/values/parsetrees/SymbolFactory.java#L231

Added line #L231 was not covered by tests
}

return factory.constructor(RascalValueFactory.Symbol_Lit, factory.string(builder.toString()));
}

private static IConstructor charclass2Symbol(Class cc) {
Expand Down

0 comments on commit 252a930

Please sign in to comment.