From f1f45c3e987a2c073482433be101478618116993 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 23 Nov 2020 21:00:17 -0500 Subject: [PATCH 1/2] wip --- benchmark/index.js | 132 ++++++++++++++++++++++++++++++++++++++++----- index.js | 82 +++++++++++++++++++++++++++- 2 files changed, 201 insertions(+), 13 deletions(-) diff --git a/benchmark/index.js b/benchmark/index.js index 37146bc..c528785 100644 --- a/benchmark/index.js +++ b/benchmark/index.js @@ -2,13 +2,20 @@ /** * Globals for benchmark.js */ -global.escapeHtml = require('..') +var lib = require('..') +global.escapeHtml = lib.escapeHtml +global.escapeHtmlFast = lib.escapeHtmlFast +global.escapeHtmlNoRegex = lib.escapeHtmlNoRegex /** * Module dependencies. */ var benchmark = require('benchmark') var benchmarks = require('beautify-benchmark') +var fs = require('fs') +var hugeHTML = fs.readFileSync("mathematica.html").toString() + +const MIN_SAMPLES = 3 for (var dep in process.versions) { console.log(' %s@%s', dep, process.versions[dep]) @@ -17,28 +24,129 @@ for (var dep in process.versions) { console.log('') var suite = new benchmark.Suite() +const fn = function() { escapeHtmlFast(str) } + +// suite.add({ +// 'name': 'no special characters', +// 'minSamples': MIN_SAMPLES, +// 'fn': function() { escapeHtml(str) },, +// 'setup': function() { str = "Hello, World!" } +// }) + +// suite.add({ +// 'name': 'no special characters (large)', +// 'minSamples': MIN_SAMPLES, +// 'fn': function() { escapeHtml(str) },, +// 'setup': function() { str = "Hello, World!".repeat(1000) } +// }) + suite.add({ - 'name': 'no special characters', - 'minSamples': 100, - 'fn': 'escapeHtml(str)', - 'setup': 'str = "Hello, World!"' + 'name': 'Long HTML page', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(hugeHTML) }, }) +suite.add({ + 'name': 'Long HTML page REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(hugeHTML) }, +}) + +suite.add({ + 'name': 'Short HTML page', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(hugeHTML.substring(1,30000)) }, +}) + +suite.add({ + 'name': 'Short HTML page REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(hugeHTML.substring(1,30000)) }, +}) + + suite.add({ 'name': 'single special character', - 'minSamples': 100, - 'fn': 'escapeHtml(str)', - 'setup': 'str = "Hello, World&!"' + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(str) }, + 'setup': function() { str = "Hello, World&!" } +}) + +suite.add({ + 'name': 'single special character REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(str) }, + 'setup': function() { str = "Hello, World&!" } }) +suite.add({ + 'name': 'single special character (large)', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(str) }, + 'setup': function() { + str = "Hello, World!".repeat(500) + + "&" + + "Hello, World!".repeat(500) + } +}) + +suite.add({ + 'name': 'single special character (large) REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(str) }, + 'setup': function() { + str = "Hello, World!".repeat(500) + + "&" + + "Hello, World!".repeat(500) + } +}) + + suite.add({ 'name': 'many special characters', - 'minSamples': 100, - 'fn': 'escapeHtml(str)', - 'setup': 'str = "\'>\'\\"\\"&>h&"' + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"' } +}) + +suite.add({ + 'name': 'many special characters REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"' } +}) + +suite.add({ + 'name': 'many special characters NO REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlNoRegex(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"' } +}) + + +suite.add({ + 'name': 'many special characters (large)', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtml(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"'.repeat(1000) } +}) + +suite.add({ + 'name': 'many special characters (large) REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlFast(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"'.repeat(1000) } }) +suite.add({ + 'name': 'many special characters (large) NO REGEX', + 'minSamples': MIN_SAMPLES, + 'fn': function() { escapeHtmlNoRegex(str) }, + 'setup': function() { str = '\'>\'\\"\\"&>h&"'.repeat(1000) } +}) + + suite.on('cycle', function onCycle (event) { benchmarks.add(event.target) }) @@ -47,4 +155,4 @@ suite.on('complete', function onComplete () { benchmarks.log() }) -suite.run({ 'async': false }) +suite.run({ 'async': false, maxTime: 0.001 }) diff --git a/index.js b/index.js index dd64ae8..410f85c 100644 --- a/index.js +++ b/index.js @@ -20,7 +20,7 @@ var matchHtmlRegExp = /["'&<>]/ * @public */ -module.exports = escapeHtml +module.exports = { escapeHtml, escapeHtmlFast, escapeHtmlNoRegex } /** * Escape special characters in the given string of text. @@ -30,6 +30,86 @@ module.exports = escapeHtml * @public */ +var matchHtmlRegExpFast = /["'&<>]/g +function escapeHtmlFast (str) { + var lastIndex = 0 + var html = '' + var escape = '' + var match + + while (match = matchHtmlRegExpFast.test(str)) { + switch (str.charCodeAt(matchHtmlRegExpFast.lastIndex)) { + case 34: // " + escape = '"' + break + case 38: // & + escape = '&' + break + case 39: // ' + escape = ''' + break + case 60: // < + escape = '<' + break + case 62: // > + escape = '>' + break + } + // console.log(lastIndex, matchHtmlRegExpFast.lastIndex, str.length) + html += str.substring(lastIndex, matchHtmlRegExpFast.lastIndex) + lastIndex = matchHtmlRegExpFast.lastIndex + 1 + html += escape + } + return html + str.substring(lastIndex) +} + +function escapeHtmlNoRegex (str) { + // var str = '' + string + // var match = matchHtmlRegExp.exec(str) + + // if (!match) { + // return str + // } + + var escape + var html = '' + var index = 0 + var lastIndex = 0 + + for (index = 0; index < str.length; index++) { + switch (str.charCodeAt(index)) { + case 34: // " + escape = '"' + break + case 38: // & + escape = '&' + break + case 39: // ' + escape = ''' + break + case 60: // < + escape = '<' + break + case 62: // > + escape = '>' + break + default: + continue + } + + if (lastIndex !== index) { + html += str.substring(lastIndex, index) + } + + lastIndex = index + 1 + html += escape + } + + return lastIndex !== index + ? html + str.substring(lastIndex, index) + : html +} + function escapeHtml (string) { var str = '' + string var match = matchHtmlRegExp.exec(str) From 63373a40364aa587e2c33f642ca045063dc5724b Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 23 Nov 2020 21:13:42 -0500 Subject: [PATCH 2/2] add it all (i can clean it up before merge) --- index.js | 10 +- mathematica.html | 11747 +++++++++++++++++++++++++++++++++++++++++++++ test/index.js | 2 +- 3 files changed, 11754 insertions(+), 5 deletions(-) create mode 100644 mathematica.html diff --git a/index.js b/index.js index 410f85c..051c2ac 100644 --- a/index.js +++ b/index.js @@ -31,14 +31,16 @@ module.exports = { escapeHtml, escapeHtmlFast, escapeHtmlNoRegex } */ var matchHtmlRegExpFast = /["'&<>]/g -function escapeHtmlFast (str) { +function escapeHtmlFast (string) { + var str = '' + string + var lastIndex = 0 var html = '' var escape = '' var match while (match = matchHtmlRegExpFast.test(str)) { - switch (str.charCodeAt(matchHtmlRegExpFast.lastIndex)) { + switch (str.charCodeAt(matchHtmlRegExpFast.lastIndex-1)) { case 34: // " escape = '"' break @@ -56,8 +58,8 @@ function escapeHtmlFast (str) { break } // console.log(lastIndex, matchHtmlRegExpFast.lastIndex, str.length) - html += str.substring(lastIndex, matchHtmlRegExpFast.lastIndex) - lastIndex = matchHtmlRegExpFast.lastIndex + 1 + html += str.substring(lastIndex, matchHtmlRegExpFast.lastIndex - 1) + lastIndex = matchHtmlRegExpFast.lastIndex html += escape } return html + str.substring(lastIndex) diff --git a/mathematica.html b/mathematica.html new file mode 100644 index 0000000..9cb117f --- /dev/null +++ b/mathematica.html @@ -0,0 +1,11747 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WIP: Proper highlighting for the Wolfram Language (Mathematica) by halirutan · Pull Request #2706 · highlightjs/highlight.js · GitHub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + +
+ +
+ + + + + +
+ + + +
+ + + + + + + + + +
+
+
+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + + + + +
+ + +
+ +
+
+
+
+ + + +
+ + + New issue + + + +
+
+ +
+ +
+

+ Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community. +

+ + +

By clicking “Sign up for GitHub”, you agree to our terms of service and + privacy statement. We’ll occasionally send you account related emails.

+ +

+ Already on GitHub? + Sign in + to your account +

+
+ +
+
+
+ +
+ +

+ + WIP: Proper highlighting for the Wolfram Language (Mathematica) + + #2706 +

+
+
+ +
+
+ + Merged + + +
+ + + +
+ + merged 32 commits into + + + + + +from + + + + + Nov 6, 2020 + +
+
+ + + +
+
+
+
+
+ + Merged + + +
+ + + +
+

+ WIP: Proper highlighting for the Wolfram Language (Mathematica) + #2706 +

+ +
+ + merged 32 commits into + + + + + +from + + + + + Nov 6, 2020 + +
+
+
+
+
+
+
+
+ + + + + + + +

Conversation

+
+ +
+ +
+
+
+ + +
+ + +
+ @halirutan + +
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + + Copy link + + + + + + +
+ +
+ +
+ + + + + + + + Contributor + + + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 23, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+

This is work-in-progress and not ready for merging, but I wanted to start a discussion about what is feasable/wanted for highlight.js. The Wolfram Language (WL) is one of those languages that are very different in some points. Most notably, it has close to 7000 built-in symbols and none of the usual "keywords" that other languages have. I rewrote the entire Mathematica (it's the same as "WL", don't ask, it's confusing) highlighter and here are the major changes:

+
    +
  • The many ways how a number can be specified is now handled correctly
  • +
  • All built-in functions and so-called named-characters (a way to represent e.g. Alpha or Arrows) are matched correctly and updated for the most recent version of Mathematica
  • +
  • Instead of using lists of keywords for the built-in symbols, I created a trie to compress the regex. This basically shaves 30kB from the regex of the built-in symbols.
  • +
+

I'll attach an image of a more realistic test-case at the end, but first I'll have some questions:

+
    +
  • Is it OK to use such a trie in favor of a human readable list of keywords?
  • +
  • Does anyone have experience regarding the performance of such a trie? In theory, it should be faster but I was never really able to measure this. On the other hand, I'm a JS noob and I'd like to keep it that way :)
  • +
  • Am I doing this right by using begin: and specifying the regex there? I mean, it works but what do I know?
  • +
  • Are there formatting guide-lines? Specifically, can I leave these long regex on one line?
  • +
+

Any tips are appreciated.

+

image

+
+
+ + + +
+ + +
+ +
+ + +
+
+ +
+ + + + +
+ + + + +
+ +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 75dc896 + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 23, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

It seems Travis fails because now many languages are detected as Mathematica. Is there a way I can provide further meta-data that defines how to detect it correctly?

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + + +
+ +
+ +
+ +
+ @joshgoebel + +
+ + +
+ + +
+ + +
+
+ + + + joshgoebel + + + + + reviewed + + + + + Sep 23, 2020 + + +
+ + +
+ + +
+
+ + + +
+
+ + + + +
+ +
+ + + src/languages/mathematica.js + + + + Outdated + + + Show resolved + Hide resolved +
+
+
+ + + + + + + + + +
+
+ + + + +
+
+ + +
+ +
+ + + +
+ + + +
+ +
+ +
+ +
+ @joshgoebel + +
+ + +
+ + +
+ + +
+
+ + + + joshgoebel + + + + + reviewed + + + + + Sep 23, 2020 + + +
+ + +
+ + +
+
+ + + +
+
+ + + + +
+ +
+ + + src/languages/mathematica.js + + + + Outdated + + + Show resolved + Hide resolved +
+
+
+ + + + + + + + + +
+
+ + + + +
+
+ + +
+ +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 23, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+

Instead of using lists of keywords for the built-in symbols, I created a trie to compress the regex. This basically shaves 30kB from the regex of the built-in symbols.

+
+

30kb measured how? Raw size is irrelevant. What matters (some) is Brotoli or gzip size.

+
+

Is it OK to use such a trie in favor of a human readable list of keywords?

+
+

Off the top of my head: no. This is pretty much unmaintainable - unless there was a whole build system internal to Highlight.js that took a human readable list, built the trie by hand, etc... and I don't think adding such a complex beast to core (just to support a single large language) doesn't make sense. Out of curiosity what does that code to build the regex look like? Is this a 5-10 line snippet or a huge processing library?

+

Although if you wanted to maintain your own 3rd party grammar module then you could do it however you wanted.

+
+

Does anyone have experience regarding the performance of such a trie? In theory, it should be faster but I was never really able to measure this. On the other hand, I'm a JS noob and I'd like to keep it that way :)

+
+

No idea, that's something we'd likely want some real numbers on before even considering something like this for core (if we were ever going to).

+
+

Am I doing this right by using begin: and specifying the regex there? I mean, it works but what do I know?
+Are there formatting guide-lines? Specifically, can I leave these long regex on one line?

+
+

begin works if there is a single thing to match... but I dropped a comment on NUMBER already... generally if a regex can be split into smaller pieces (for readability/maintainable) it should be... so if there are 8 variants of numbers they each should really have their own regex using variants rather than one complex | that's impossible to read.

+

There are helpers in regex.js for some common things...

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 23, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

It seems Travis fails because now many languages are detected as Mathematica. Is there a way I can provide further meta-data that defines how to detect it correctly?

+
+

You have look at what it's matching and tone down the match count or the relevance... relevance: 10 is probably wrong...

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 23, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+

@joshgoebel Thanks for your very thorough replies. Let me answer some of your comments:

+
+

30kb measured how? Raw size is irrelevant. What matters (some) is Brotoli or gzip size.

+
+

Gzipping the trie regex results in 43kB while the list of keywords would be 47kB. Not sure if this difference justifies the usage of a trie.

+
+

Off the top of my head: no. This is pretty much unmaintainable - unless there was a whole build system internal to Highlight.js that took a human readable list, built the trie by hand, etc...

+
+

The human-readable list is not going to be maintainable either. No one is editing 6659 keywords by hand and for all highlighters I've worked on (intellij, google-prettify, rogue), the list was always created automatically through Mathematica. Everything else required for creating the regex is small, open-source and well documented. I've put the exact commands used in the comments. It only relies on a package of mine that I use for the Wolfram Language IntelliJ plugin, where the code for creating the trie is written in Kotlin.

+

The gist is that you will need Mathematica either way to maintain the list of keywords, and the rest is open-source.

+
+

Although if you wanted to maintain your own 3rd party grammar module then you could do it however you wanted.

+
+

The reason I'm working on it is StackOverflow/StackExchange :)
+I've written the google-prettify support for Mathematica that was used there but now we're moving to highlight.js.
+The goal is bring the Mathematica highlighter to a reasonable state to get it eventually merged to StackOverflow.

+
+

but I dropped a comment on NUMBER already... generally if a regex can be split into smaller pieces (for readability/maintainable) it should be

+
+

I understand that. However, for Mathematica we're not really having different "variants". It's more like that you can prepend or append different things, e.g. if you have 123 and you can say it should be in base 8 and you get 8^^123. My major point however is that I linked a document in the comment above the NUMBER regex that goes into great detail about numbers. Everyone who's considering to work on this needs to read it anyway and it carefully lays out why and how things are matched. But I could try to split the regex in more readable chunks.

+
+

You have look at what it's matching and tone down the match count or the relevance... relevance: 10 is probably wrong...

+
+

Alright, I'll check the API.

+

Question:

+

How should I move on regarding the large list of keywords? Should I use a simple list or the trie regex? I'd also try bring some more people to this PR to get different opinions because in the end, it should be nice for the users. @CarlQLange already agreed to discuss matters.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 23, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+

Gzipping the trie regex results in 43kB while the list of keywords would be 47kB. Not sure if this difference justifies the usage of a trie.

+
+

Yeah for such a tiny difference we definitely want a clean inline list of keywords.

+
+

The human-readable list is not going to be maintainable either. No one is editing 6659 keywords by hand and for all highlighters I've worked on, the list was always created automatically through Mathematica.

+
+

Typically for longer lists (that are easily generated by the host language or script) we'll inline that code in the grammar itself (as a comment - usually these are only a few short lines of code)... so that the keywords are still listed and readable in the source, but maintainers can easily run the snippet themselves, generate a new list, copy and paste that into the file. Then we have clear readable diffs for what has changed from one release to another.

+
+

The reason I'm working on it is StackOverflow/StackExchange :)

+
+

Nothing prevents them from using 3rd party grammar modules. :-)

+
+

The goal is bring the Mathematica highlighter to a reasonable state to get it eventually merged to StackOverflow.

+
+

I don't know what this means... doesn't it automatically get included with the switch to Highlight.js... just it's being "loaded as needed" rather than bundled - which makes sense if most sites aren't going to use it (only the Mathematica Stack). What am I missing here? I mean I obviously get the desire to improve it... but you lost me with "merged".

+
+

I understand that. However, for Mathematica we're not really having different "variants". ... But I could try to split the regex in more readable chunks.

+
+

If there aren't truly variants the yes we'd split it up into more easy to read (and maintain) chunks so that reading it at a glance it's obvious what the component parts are, etc... rather than just a wall of regex as it stands now.

+
+

How should I move on regarding the large list of keywords? Should I use a simple list or the trie regex? I'd also try bring some more people to this PR to get different opinions because in the end, it should be nice for the users. @CarlQLange already agreed to discuss matters.

+
+

Simple list, and it should originally be an array, one word per line (for maintainability/git diffs/etc - see ecmascript.js for an example)... then as necessarily you can String#join it... it's going to be long but should be easier to see what changes in the future. Hopefully the process to generate the list can be documented in the comments as done with other languages and mentioned above.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 23, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

Am I doing this right by using begin: and specifying the regex there? I mean, it works but what do I know?
+Are there formatting guide-lines? Specifically, can I leave these long regex on one line?

+
+

And once we fall back to using a simple list you should be able to use the built in keyword functionality (as the grammar uses now) instead of needing to use a complex regex at all....

+

There is an eslint file...

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 23, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

Great! Then I'll know how to move and hope I'll find some time.

+
+

I don't know what this means... doesn't it automatically get included with the switch to Highlight.js

+
+

StackOverflow is still in the process of deciding how regularly they will update their version of highlight.js. The most upvoted user-wish on the official announcement however is that they do this more regularly. So I guess they will pull new versions of highlight.js to their side. I don't know specifics but here is the comment of the mod regarding the question "can the highlighter be updated more often?":

+
+

This is a good question. I'll bring it up internally to formally decide as to what team will be responsible for keeping this up to date as well as how we're going to ensure that we're alerted when a new version comes out. As it currently stands, I have a few OSS side projects relying on highlightjs that get weekly notifications when dependency updates hit npm, so at the very least, I'll be aware of them. Thanks for bringing this to my top of mind. This will be very important moving forward, because, as you mentioned, highlightjs is being actively maintained.

+
+

The only thing I do know is that SO will not provide Mathematica highlighting for the whole network but only for our StackExchange site because even in google-prettiy days, our highlighter was huge compared to others. Here is the semi-official statement about this.

+
+

And once we fall back to using a simple list you should be able to use the built in keyword functionality

+
+

Yep, I got that. Use an array. Each keyword a line for diffing and joining it to a string in the code.

+

Thanks again, I really appreciate your input.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+
+ +
+
+ + halirutan + + added 5 commits + + Sep 28, 2020 + +
+
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 1b5a3ae + +
+
+
+
+
For matching built-in symbols, a sorted list of all names in the System
+context is now provided in a separate file which can be automatically
+recreated. The required Mathematica code is given in the comment.
+
+As suggested by @joshgoebel, the regular expression for matching
+Mathematica's numbers is now broken into (slightly more) readable chunks.
+
+To provide features we had on StackExchange with google-pretty, more rules
+were added. This includes
+
+- explicit matching of operators and braces
+- matching of patterns and slots
+- matching of message names aka func::usage
+
+This implementation requires additional CSS classes, but looks reasonable
+on the standard styles.
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 2c01097 + +
+
+
+
+
…can be fixed.
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + b78e9fc + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+
+ + +
+ + 6216916 + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 0437cd8 + +
+
+
+
+
…olfram_Language
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + + +
+ +
+ +
+ +
+ @joshgoebel + +
+ + +
+ + +
+ + +
+
+ + + + joshgoebel + + + + + requested changes + + + + + Sep 28, 2020 + + +
+ + +
+ + +
+
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + + Copy link + + + + + + +
+ +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + left a comment + + + + + + +

+
+ + +
+ + + + + + + + + +
+

Overall this is awesome! :-)

+
+
+ + + +
+ + +
+ +
+ + +
+
+ +
+
+ + + +
+
+ + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ import * as regex from '../lib/regex.js'; + +
+
+ +
+ // @ts-ignore + +
+ export default function(highlightJS) { + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Please revert this to match every other grammer, hjls, etc...

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

OK. It wasn't a (camel-case) word and was flagged by my spell-check.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+
+

other grammer, hjls, etc

+
+

This is why I renamed it to something readable.. even you mistyped it 😂

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Legacy, consistency, etc... and it's to match the fact that hljs is our global. :-) Not something we're going to change at this time. :)

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ /* + +
+ This dangerously looking beast of a regex was carefully assembled by Robert Jacobson. + +
+ See: https://wltools.github.io/LanguageSpec/Specification/Syntax/Number-representations/ + +
+ This rather scary looking matching of Mathematica numbers is carefully explained by Robert Jacobson here: + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Awesome. Much nicer.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ const number_re = /(\d*\.\d+|\d+\.\d*|\d+)/; + +
+ const base_number_re = regex.either(regex.concat(base_re, base_digits_re), number_re); + +
+
+ +
+ const accuracy_re = /``[+-]?(\d*\.\d+|\d+\.\d*|\d+)/; + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Please use ALL_CAPS_CASE for constants like this.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + + + + + + +
+ + + + Member + + +

+ + + + + +
+

Hmmm... so there is actually a pattern here... you're using lowercase for the building blocks but then cap case for the modes? Could you explain the thinking? I just realized it was mixed and now I'm trying to understand it. :)

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

I don't have a preference here. You guessed the idea correctly: lowercase for building blocks and upper case for the things that go into the final return value. I can make all uppercase if this is the convention for JS.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + + + + + + +
+ + + + Member + + +

+ + + + + +
+
+

I can make all uppercase if this is the convention for JS.

+
+

I think it's a common convention and definitely one we typically follow. I like it now that I get it, but sadly it's inconsistent with every other grammar so I think we could make them all caps. Some grammars do end up with their own "micro-conventions" but overall I try and keep the style the same as they are part of a collection.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ $pattern: symbol_re, + +
+ keyword: Mathematica.SYSTEM_SYMBOLS.join(" ") + +
+ }, + +
+ end: /[a-zA-Z0-9$]*/ + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Why wouldn't this all be a single match with begin? I see that the first character can be different but traditionally we'd love this with a single match regex in begin to make it clear we're matching a single term and that this isn't a block match with nested rules, etc.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

I will give a more elaborate explanation in a separate comment. The summary is that I constantly ran into two issues:

+
    +
  • When I define keywords like all languages do at the top-level and all other symbols (that have the same regex) in the contains: section, I didn't get highlighting for the keywords because the "other symbols matching" took precedence.
  • +
  • I ran into "0 match" exceptions more than I'd like to admit
  • +
+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + + + + + + +
+ + + + Member + + +

+ + + + + +
+
+

When I define keywords like all languages do at the top-level and all other symbols (that have the same regex) in the contains: section, I didn't get highlighting for the keywords because the "other symbols matching" took precedence.

+
+

I think a specific example would help... but each level has it's own keywords so once you drop into a contains you no longer have keywords from the parent so if you ALSO wanted the keywords to kick in you'd need to re-include them at that level.

+

It's possible you're inventing a new useful pattern, but I've have to see the exact problem you're trying to solve to comment further - so a more detailed example would be appreciated. :)

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+
+

I ran into "0 match" exceptions more than I'd like to admit

+
+

Just avoid any regex that can make a 0 width match. :-) That means complex regex with 20 optional parts (maybe digit) need to be pinned down to ONE core match... (like ALWAYS matching \d somewhere, etc)... often this can result in variants. ie\d(\.\d)? and .\d vs [+-]?(\d)?(\.\d). This problem originally dates back to a complex digit match with all optional regex groups.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ className: 'symbol', + +
+ begin: new RegExp("\\\\\\[(?:A(?:Acute|Bar|Cup|DoubleDot|E|Grave|Hat|Ring|Tilde|kuz|l(?:eph|i(?:as(?:Delimiter|Indicator)|gnmentMarker)|pha|tKey)|n(?:d(?:y)?|g(?:le|strom))|quariusSign|riesSign|scendingEllipsis|uto(?:LeftMatch|Operand|Placeholder|RightMatch|Space))|B(?:ackslash|e(?:amed(?:EighthNote|SixteenthNote)|cause|t(?:a)?)|lack(?:Bishop|K(?:ing|night)|Pawn|Queen|Rook)|reve|ullet)|C(?:Acute|Cedilla|Hacek|a(?:ncerSign|p(?:ital(?:A(?:Acute|Bar|Cup|DoubleDot|E|Grave|Hat|Ring|Tilde|lpha)|Beta|C(?:Acute|Cedilla|Hacek|hi)|D(?:Hacek|elta|i(?:fferentialD|gamma))|E(?:Acute|Bar|Cup|DoubleDot|Grave|Ha(?:cek|t)|psilon|t[ah])|Gamma|I(?:Acute|Cup|DoubleDot|Grave|Hat|ota)|K(?:appa|oppa)|L(?:Slash|ambda)|Mu|N(?:Hacek|Tilde|u)|O(?:Acute|Double(?:Acute|Dot)|E|Grave|Hat|Slash|Tilde|m(?:ega|icron))|P(?:hi|i|si)|R(?:Hacek|ho)|S(?:Hacek|ampi|igma|tigma)|T(?:Hacek|au|h(?:eta|orn))|U(?:Acute|Double(?:Acute|Dot)|Grave|Hat|Ring|psilon)|Xi|YAcute|Z(?:Hacek|eta))|ricornSign)?)|e(?:dilla|nt(?:er(?:Dot|Ellipsis))?)|h(?:eck(?:edBox|mark(?:edBox)?)|i)|ircle(?:Dot|Minus|Plus|Times)|l(?:o(?:ckwiseContourIntegral|seCurly(?:DoubleQuote|Quote)|verLeaf)|ubSuit)|o(?:lon|mmandKey|n(?:ditioned|gruent|jugate(?:Transpose)?|stantC|t(?:inu(?:ation|edFractionK)|ourIntegral|rolKey))|p(?:roduct|yright)|unterClockwiseContourIntegral)|ross|u(?:p(?:Cap)?|r(?:l(?:y(?:CapitalUpsilon|Epsilon|Kappa|P(?:hi|i)|Rho|Theta))?|rency)))|D(?:Hacek|a(?:gger|let|sh)|e(?:gree|l(?:eteKey|ta)?|scendingEllipsis)|i(?:am(?:eter|ond(?:Suit)?)|fferen(?:ceDelta|tialD)|gamma|rectedEdge|s(?:cret(?:e(?:Ratio|Shift)|ionary(?:Hyphen|LineSeparator|Pa(?:geBreak(?:Above|Below)|ragraphSeparator)))|tributed)|v(?:ergence|i(?:de(?:s)?|sionSlash)))|o(?:t(?:Equal|less[IJ]|tedSquare)|uble(?:ContourIntegral|D(?:agger|o(?:t|wnArrow))|L(?:eft(?:Arrow|RightArrow|Tee)|ong(?:Left(?:Arrow|RightArrow)|RightArrow))|Prime|Right(?:Arrow|Tee)|Struck(?:A|B|C(?:apital[ABCDEFGHIJKLMNOPQRSTUVWXYZ])?|D|E(?:ight)?|F(?:ive|our)?|G|H|I|J|K|L|M|N(?:ine)?|O(?:ne)?|P|Q|R|S(?:even|ix)?|T(?:hree|wo)?|U|V|W|X|Y|Z(?:ero)?)|Up(?:Arrow|DownArrow)|VerticalBar|d(?:Gamma|Pi))|wn(?:Arrow(?:Bar|UpArrow)?|Breve|Exclamation|Left(?:RightVector|TeeVector|Vector(?:Bar)?)|Pointer|Question|Right(?:TeeVector|Vector(?:Bar)?)|Tee(?:Arrow)?)))|E(?:Acute|Bar|Cup|DoubleDot|Grave|Ha(?:cek|t)|arth|ighthNote|l(?:ement|lipsis)|mpty(?:Circle|D(?:iamond|ownTriangle)|Rectangle|S(?:et|mall(?:Circle|Square)|quare)|UpTriangle|VerySmallSquare)|nt(?:erKey|ity(?:End|Start))|psilon|qu(?:al(?:Tilde)?|i(?:librium|valent))|rrorIndicator|scapeKey|t[ah]|uro|x(?:ists|p(?:ectationE|onentialE)))|F(?:i(?:Ligature|lled(?:Circle|D(?:iamond|ownTriangle)|LeftTriangle|R(?:ectangle|ightTriangle)|S(?:mall(?:Circle|Square)|quare)|UpTriangle|VerySmallSquare)|nalSigma|rstPage|vePointedStar)|l(?:Ligature|at|orin)|or(?:All|mal(?:A(?:lpha)?|B(?:eta)?|C(?:apital(?:A(?:lpha)?|B(?:eta)?|C(?:hi)?|D(?:elta|igamma)?|E(?:psilon|ta)?|F|G(?:amma)?|H|I(?:ota)?|J|K(?:appa|oppa)?|L(?:ambda)?|M(?:u)?|N(?:u)?|O(?:m(?:ega|icron))?|P(?:hi|i|si)?|Q|R(?:ho)?|S(?:ampi|igma|tigma)?|T(?:au|heta)?|U(?:psilon)?|V|W|X(?:i)?|Y|Z(?:eta)?)|hi|urly(?:CapitalUpsilon|Epsilon|Kappa|P(?:hi|i)|Rho|Theta))?|D(?:elta|igamma)?|E(?:psilon|ta)?|F(?:inalSigma)?|G(?:amma)?|H|I(?:ota)?|J|K(?:appa|oppa)?|L(?:ambda)?|M(?:u)?|N(?:u)?|O(?:m(?:ega|icron))?|P(?:hi|i|si)?|Q|R(?:ho)?|S(?:ampi|igma|tigma)?|T(?:au|heta)?|U(?:psilon)?|V|W|X(?:i)?|Y|Z(?:eta)?))|re(?:akedSmiley|eformPrompt)|unction)|G(?:amma|eminiSign|imel|othic(?:A|B|C(?:apital[ABCDEFGHIJKLMNOPQRSTUVWXYZ])?|D|E(?:ight)?|F(?:ive|our)?|G|H|I|J|K|L|M|N(?:ine)?|O(?:ne)?|P|Q|R|S(?:even|ix)?|T(?:hree|wo)?|U|V|W|X|Y|Z(?:ero)?)|r(?:a(?:dient|y(?:Circle|Square))|eater(?:Equal(?:Less)?|FullEqual|Greater|Less|SlantEqual|Tilde)))|H(?:Bar|a(?:cek|ppySmiley)|e(?:artSuit|rmitianConjugate)|orizontalLine|ump(?:DownHump|Equal)|yphen)|I(?:Acute|Cup|DoubleDot|Grave|Hat|m(?:aginary[IJ]|pli(?:citPlus|es))|n(?:dentingNewLine|finity|linePart|te(?:gral|rsection)|visible(?:Application|Comma|P(?:ostfixScriptBase|refixScriptBase)|Space|Times))|ota)|Jupiter|K(?:appa|e(?:rnelIcon|yBar)|oppa)|L(?:Slash|a(?:mbda|placian|stPage)|e(?:ft(?:A(?:ngleBracket|rrow(?:Bar|RightArrow)?|ssociation)|BracketingBar|Ceiling|Do(?:ubleBracket(?:ingBar)?|wn(?:TeeVector|Vector(?:Bar)?))|Floor|Guillemet|Modified|Pointer|Right(?:Arrow|Vector)|Skeleton|T(?:ee(?:Arrow|Vector)?|riangle(?:Bar|Equal)?)|Up(?:DownVector|TeeVector|Vector(?:Bar)?)|Vector(?:Bar)?)|oSign|ss(?:Equal(?:Greater)?|FullEqual|Greater|Less|SlantEqual|Tilde)|tterSpace)|i(?:braSign|ghtBulb|mit|neSeparator)|o(?:ng(?:Dash|Equal|Left(?:Arrow|RightArrow)|RightArrow)|wer(?:LeftArrow|RightArrow)))|M(?:a(?:rs|thematicaIcon|xLimit)|e(?:asuredAngle|diumSpace|rcury)|ho|i(?:cro|n(?:Limit|us(?:Plus)?))|o(?:d(?:1Key|2Key)|on)|u)|N(?:Hacek|Tilde|a(?:nd|tural)|e(?:gative(?:MediumSpace|Thi(?:ckSpace|nSpace)|VeryThinSpace)|ptune|sted(?:GreaterGreater|LessLess)|utralSmiley)|o(?:Break|nBreakingSpace|r|t(?:C(?:ongruent|upCap)|DoubleVerticalBar|E(?:lement|qual(?:Tilde)?|xists)|Greater(?:Equal|FullEqual|Greater|Less|SlantEqual|Tilde)?|Hump(?:DownHump|Equal)|Le(?:ftTriangle(?:Bar|Equal)?|ss(?:Equal|FullEqual|Greater|Less|SlantEqual|Tilde)?)|Nested(?:GreaterGreater|LessLess)|Precedes(?:Equal|SlantEqual|Tilde)?|R(?:everseElement|ightTriangle(?:Bar|Equal)?)|S(?:quareSu(?:bset(?:Equal)?|perset(?:Equal)?)|u(?:bset(?:Equal)?|cceeds(?:Equal|SlantEqual|Tilde)?|perset(?:Equal)?))|Tilde(?:Equal|FullEqual|Tilde)?|VerticalBar)?)|u(?:ll|mberSign)?)|O(?:Acute|Double(?:Acute|Dot)|E|Grave|Hat|Slash|Tilde|m(?:ega|icron)|p(?:enCurly(?:DoubleQuote|Quote)|tionKey)|r|ver(?:Brac(?:e|ket)|Parenthesis))|P(?:a(?:geBreak(?:Above|Below)|r(?:agraph(?:Separator)?|tialD))|er(?:mutationProduct|pendicular)|hi|i(?:ecewise|scesSign)?|l(?:aceholder|u(?:sMinus|to))|r(?:ecedes(?:Equal|SlantEqual|Tilde)?|ime|o(?:babilityPr|duct|portion(?:al)?))|si)|QuarterNote|R(?:Hacek|awEscape|e(?:gisteredTrademark|turn(?:Indicator|Key)|verse(?:DoublePrime|E(?:lement|quilibrium)|Prime|UpEquilibrium))|ho|ight(?:A(?:ngle(?:Bracket)?|rrow(?:Bar|LeftArrow)?|ssociation)|BracketingBar|Ceiling|Do(?:ubleBracket(?:ingBar)?|wn(?:TeeVector|Vector(?:Bar)?))|Floor|Guillemet|Modified|Pointer|Skeleton|T(?:ee(?:Arrow|Vector)?|riangle(?:Bar|Equal)?)|Up(?:DownVector|TeeVector|Vector(?:Bar)?)|Vector(?:Bar)?)|ound(?:Implies|SpaceIndicator)|u(?:le(?:Delayed)?|pee))|S(?:Hacek|Z|a(?:dSmiley|gittariusSign|mpi|turn)|c(?:orpioSign|ript(?:A|B|C(?:apital[ABCDEFGHIJKLMNOPQRSTUVWXYZ])?|D(?:otless[IJ])?|E(?:ight)?|F(?:ive|our)?|G|H|I|J|K|L|M|N(?:ine)?|O(?:ne)?|P|Q|R|S(?:even|ix)?|T(?:hree|wo)?|U|V|W|X|Y|Z(?:ero)?))|e(?:ction|lectionPlaceholder)|h(?:a(?:h|rp)|iftKey|ort(?:DownArrow|LeftArrow|RightArrow|UpArrow))|i(?:gma|xPointedStar)|keletonIndicator|mallCircle|p(?:a(?:ce(?:Indicator|Key)|deSuit|nFrom(?:Above|Both|Left))|hericalAngle|ooky)|q(?:rt|uare(?:Intersection|Su(?:bset(?:Equal)?|perset(?:Equal)?)|Union)?)|t(?:ar|e(?:pper(?:Down|Left|Right|Up)|rling)|igma)|u(?:bset(?:Equal)?|c(?:ceeds(?:Equal|SlantEqual|Tilde)?|hThat)|m|n|perset(?:Equal)?)|ystem(?:EnterKey|sModelDelay))|T(?:Hacek|a(?:bKey|u(?:rusSign)?)|ensor(?:Product|Wedge)|h(?:e(?:refore|ta)|i(?:ckSpace|nSpace)|orn)|i(?:lde(?:Equal|FullEqual|Tilde)?|mes)|r(?:a(?:demark|nspose)|ipleDot)|woWayRule)|U(?:Acute|Double(?:Acute|Dot)|Grave|Hat|Ring|n(?:d(?:er(?:Brac(?:e|ket)|Parenthesis)|irectedEdge)|ion(?:Plus)?|knownGlyph)|p(?:Arrow(?:Bar|DownArrow)?|DownArrow|Equilibrium|Pointer|Tee(?:Arrow)?|per(?:LeftArrow|RightArrow)|silon)|ranus)|V(?:e(?:ctor(?:Greater(?:Equal)?|Less(?:Equal)?)|e|nus|r(?:tical(?:Bar|Ellipsis|Line|Separator|Tilde)|yThinSpace))|i(?:lla|rgoSign))|W(?:a(?:rningSign|tchIcon)|e(?:dge|ierstrassP)|hite(?:Bishop|K(?:ing|night)|Pawn|Queen|Rook)|olf(?:ram(?:AlphaPrompt|LanguageLogo(?:Circle)?))?)|X(?:i|nor|or)|Y(?:Acute|DoubleDot|en)|Z(?:Hacek|eta))]") + +
+ begin: /\\\[/, + +
+ end: /[$a-zA-Z][$a-zA-Z0-9]+]/ + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

See note above regarding using just begin for simple matches.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

Again, it was the version that worked and I'm happy to get a helping hand if this can be simplified.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

It's possible I don't undersatnd what you're matching but if it's a SINGLE unit then:

+
    begin: /\\\[[$a-zA-Z][$a-zA-Z0-9]+]/,
+
+

Should work equally well.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

I think I know what you're trying to solve now, but just give me a code example and I'll take a closer look.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ begin: /([a-zA-Z$][a-zA-Z0-9$]*)?_+([a-zA-Z$][a-zA-Z0-9$]*)?/, + +
+ keywords: { + +
+ $pattern: symbol_re, + +
+ strong: Mathematica.SYSTEM_SYMBOLS.join(" ") + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

Please explain this variant and why it's keywords would be "strong".... typically we try to use semantic use of classes, not visual... so this seems strange or else I simply don't understand what is happening here.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Sep 28, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

Yes, I'll post a screenshot. Basically, when we have a parameter of a function, Mathematica users love to see this in green. When this parameter has a type-specification like Integer which is a keyword, it should stay green but bold like keywords. This decision is not final and I'd like to discuss this with the community. It's quite possible that the keywords section in this goes away entirely.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Sep 28, 2020 + + + + + + + + + +
+ + + + Member + + +

+ + + + + +
+
+

When this parameter has a type-specification like Integer which is a keyword, it should stay green but bold like keywords.

+
+

But that's why we have "keyword" and variants - if it's a special type of keyword then we need to find the proper variant, not overload strong. strong simply means "strong/bold text" (as in something like Markdown or perhaps Latex)... also it's not always rendered bold. Themes can choose to render it however, and sometimes just use a color. That's what I mean when I say that these styles are semantic, not visual.

+

Your job (as the grammar) is to describe what something IS semantically, not describe how it should appear on the screen.

+

Perhaps semantically it's an 'important keyword' or something... and that's something I'm open to (see the issue for more nuanced styling), but we wouldn't do it by overloading "strong"...

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + +
+
+ + +
+ +
+ + + +
+ +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+
+ + +
+ + 8d61d15 + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 28, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@joshgoebel Let me try to explain the high-level problem that led to the awkward begin matching rules you commented on. In Mathematica, we have two types of symbols

+
    +
  1. Usual symbol names: Start with [a-zA-Z$] and can be followed by [a-zA-Z0-9$]. Note the missing _ in the regex which is an operator in Mathematica. Therefore, the usual word boundary \b cannot be used since it, AFAIK, allows _ to be part of the word.
  2. +
  3. Special characters like α which are represented as \[Alpha]. So it starts with a \[ and ends with a ] and in between, you have the symbols from 1. Examples are \[Gamma], \[VeryThinSpace], \[ScriptCapitalA], .. you get the idea.
  4. +
+

These two forms can be mixed. So you can have a variable myAwesome\[Gamma] and it's one single symbol. Some of the forms from 2. are actually operators and e.g. a\[ImplicitPlus]b is really just a sum of a and b. But that doesn't concern the highlighter which can't solve the problem anyway without parsing.

+

So what I want to have is:

+
    +
  1. All symbols that are possibly a mix of form 1 and 2 should be highlighted as symbol/variable
  2. +
  3. All symbols that exactly match one of the keywords should be highlighted as such.
  4. +
+

I implemented all your suggestions on a separate branch and, for the love of God, it works. So in particular, I put the whole regex in the begin section.

+

One thing I don't understand: I still need the $pattern here and the reason is that otherwise my keywords that start with a $ like $RecursionLimit would not be highlighted like here:

+

image

+

Do you have an idea, why this happens? I know that $ is a special char in regexes but even escaping them (several times) in the list of keywords did not help yesterday.

+

Your other question was concerning the usage of strong. This is for such a case:

+

image

+

So the parameter x1 has a "type" of Integer which is a keyword and it should be highlighted as such. When I use the keyword class, it would also set the color which is not what we want in this situation. Do you have a better way of expressing this without introducing another style class?

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+

I implemented all your suggestions on a separate branch and, for the love of God, it works.

+
+

Glad to hear it. :)

+
+

One thing I don't understand: I still need the $pattern here and the reason is that otherwise my keywords that start with a $ like $RecursionLimit would not be highlighted like here:

+
+

The default keyword pattern doesn't include $ I don't think and pattern isn't magically recursive... so anywhere you have a keyword list you're going to need to pass $pattern also. Keywords are found by FIRST executing a match with $pattern and THEN seeing if any of those matches are in the keyword list. So that $pattern is correct very important for languages with special characters in their keywords.

+
+

I know that $ is a special char in regexes but even escaping them (several times) in the list of keywords did not help yesterday.

+
+

Lists of keywords (as passed to keywords) are plain text.. the only "magic" is relevancy: a|0 meaning keyword a has a relevancy of 0. There is no regex going on in normal keyword lists (other than the special $pattern key).

+
+

So the parameter x1 has a "type" of Integer which is a keyword and it should be highlighted as such. When I use the keyword class, it would also set the color which is not what we want in this situation. Do you have a better way of expressing this without introducing another style class?

+
+

I know what you're getting at but I have a very hard time even parsing this - because the parsing step is VERY different from the highlighting.

+
+

has a "type" of Integer which is a keyword and it should be highlighted as such.

+
+

Right - and it would be, and if a theme says "keywords" are orange, then it would be orange. That means some themes MIGHT be friendlier to some languages than other themes... simply because of language semantics. Since we much support an arbitrary number of themes (including user defined ones) the only way we can solve this properly is with proper semantic tags - and then themes being aware of them.

+

It's possible what's needed here semantically is a arguments.name.keyword "class" (we don't support that yet)... but themes can already do this via the simple nesting that we already provide within match blocks. I think perhaps Stack Overflow wants to add a rule like .hljs-mathematica .arguments .keyword (or something) to their custom theme that applies font-weight:bold.

+

Ie, this is a theme problem, not a grammar problem.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

The closest comparison (I can think of) would be to Fortran where we have highlighting like:

+
<span class="hljs-number">6.666666666666666_DBL</span>
+
+

The type simply being part of the number, not trying to a apply a different style to it.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+

I think perhaps Stack Overflow wants to add a rule like .hljs-mathematica .arguments .keyword (or something) to their custom theme that applies font-weight:bold.

+
+

Of course that only solves your problem with Stack Overflow... if the general parsing looks "ugly/wrong" for most themes, then that's a separate issue and probably means the grammar may have to be artificially "crippled" (vs what might be considered perfection) to better fit with the larger world of themes.

+

I'd suggest trying:

+
    +
  • Don't highlight these keywords at all (let the whole argument be green - in your example)
  • +
  • Don't highlight the _ at all (to provide separation) and then just let the name and the type be highlighted as the theme desires.
  • +
+

The latter would probably be most similar to many other languages (where the _ would simply not be present and hence both "words" would stand alone as "identifier" and "keyword" separately.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

OK, you convinced me. I'm opting for this solution

+
+

Don't highlight these keywords at all (let the whole argument be green - in your example)

+
+

because the underscore must be green. You know how users are.

+
+
+ + + +
+ +
+ + +
+ +
+
+ +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @b3m2a1 + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + + + +
+ +

+ + + @b3m2a1 + + + + + b3m2a1 + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

OK, you convinced me. I'm opting for this solution

+
+

Don't highlight these keywords at all (let the whole argument be green - in your example)

+
+

because the underscore must be green. You know how users are.

+
+

This is the option I would go for, personally. I'd also note that inside the front-end the pattern spec (e.g. the Integer) is displayed the same for known symbols as for unassigned ones.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

There might also need to be a larger discussion here if you're inventing new css classes that are not documented in css-classes-reference... generally that's not something core grammars do. For example (upon taking a closer look) I have no idea what brace and pattern are... I'm not opposed to figuring something out here, but we can't just add them randomly without looking at the bigger picture (whether using an existing css class would not be preferable to support existing themes).

+

We may have to fall back to generics and then look at expanding them along with the other issue relating to more complex grammar support.

+

To start could you make a list of what any custom "classes" you have are, what they are (short explanation) and the closest thing that they would match with in our existing set of semantic classes?

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

because the underscore must be green.

+
+

Even this type of talking/thinking is problematic. :-) For Stack overflow it may be green (if your ultimately in charge of the theming)... but it will definitely not be green as a general rule... it'll be whatever color the theme decides arguments are, etc.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

Don't highlight these keywords at all (let the whole argument be green - in your example)

+
+

I'm fine with that for now. Though it's possible someone raises an issue in the future and it gets changed (and I would probably support that)... that's why if a site wants a very specific look it's best that the grammar handles the semantics and the site handles the theming. Ie, it might be more "future proof" for the grammar to flag it as what it is and let site owners decide how they want to theme it... but I'm happy revisiting that on a different day. :)

+

I just tried it as "keyword" with several themes and it looked a bit strange, but not terrible... but also hard to say since I already find the code very strange and weird to begin with. :-) If I was more familiar with Mathematica I might be a stronger opinion here. :-)

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @b3m2a1 + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + + + +
+ +

+ + + @b3m2a1 + + + + + b3m2a1 + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+
+

because the underscore must be green.

+
+

Even this type of talking/thinking is problematic. :-) For Stack overflow it may be green (if your ultimately in charge of the theming)... but it will definitely not be green as a general rule... it'll be whatever color the theme decides arguments are, etc.

+
+

We understand that, of course, but 90% of Mathematica users don't. It's a weird programming community composed of the usual SWE types, but also a majority of people who use it as a fancy graphing program and who only ever see it in Wolfram's front-end. This means that to the best of our ability we want to get Stack Overflow to display it the way those users would see it so that we can have a maximally useful forum for that.

+

If someone somewhere else decides to change the theme, who cares, but we want to make it possible for it to look right if we can convince SO of the utility.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

FYI: I type a lot because I'm verbose and try to be thorough, it isn't an attempt to overwhelm with more words than you. :-) I've been told sometimes it comes off that way.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Sep 29, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+

This means that to the best of our ability we want to get Stack Overflow to display it the way those users would see it so that we can have a maximally useful forum for that.

+
+

Sure, I think I understand the GOAL. :-) It just doesn't fit with HLJS as a generic highlighter for 200+ languages... to get what you want really involves controlling the grammar and theme tightly.

+
+

If someone somewhere else decides to change the theme, who cares, but we want to make it possible for it to look right if we can convince SO of the utility.

+
+

In this case perhaps you should consider #2 then:

+
    +
  1. The grammar is very precise in it's parsing.
  2. +
  3. StackOverflow's rendering is "spot on" (via your control of the theme)
  4. +
+

And if someone finds that it looks "weird" with other themes, then we revisit THAT in the future. :) I found it "tolerable"...

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @b3m2a1 + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + + + +
+ +

+ + + @b3m2a1 + + + + + b3m2a1 + + + + + + commented + + + Sep 29, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+
+
+

Don't highlight these keywords at all (let the whole argument be green - in your example)

+
+

I'm fine with that for now. Though it's possible someone raises an issue in the future and it gets changed (and I would probably support that)... that's why if a site wants a very specific look it's best that the grammar handles the semantics and the site handles the theming. Ie, it might be more "future proof" for the grammar to flag it as what it is and let site owners decide how they want to theme it... but I'm happy revisiting that on a different day. :)

+

I just tried it as "keyword" with several themes and it looked a bit strange, but not terrible... but also hard to say since I already find the code very strange and weird to begin with. :-) If I was more familiar with Mathematica I might be a stronger opinion here. :-)

+
+

BTW from a pure semantic perspective when we have an expression like

+
myFunc[i_, int_Integer, r_Real, q_?(Internal`RealValuedNumericQ)]:=...;
+

that int_Integer actually parses out like

+
Pattern[int, Blank[Integer]]
+

and so the _ should be at minimum attached to the Integer argument, i.e. they should be a semantic unit. Most Mathematica users, though, would likely argue that the entire expression should be a semantic unit named pattern, since _Integer, even though it's a semantic unit, is never styled.

+

From a formal grammar perspective the entire pattern expression syntax can be given as

+
name:pat:default -> Pattern[name, Optional[pat, default]
+

and pat can be any matchable pattern, most commonly things that look like _Type, __Type, (_Type)..., etc. which are all discrete semantic units and hence in the spirit of specifying a grammar should be contained in a single pattern class.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + +
+
+
+
+ + +
+
+
+
+ + + +
+ +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 1fa1040 + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

Well, this is the point, where the "theme reality" hits. Many of the themes have built_in in the same style class as number which makes absolutely no sense in our case.

+
+

By my count:

+
    +
  • Themes where they are the same: 40
  • +
  • Themes where they are different 51
  • +
+

Without looking it's easy to find themes that blend type and keyword also (15 at a quick count)... many themes are "quirky". I personally find many unattractive - but that's just me - there is no accounting for taste. Since we currently don't have any objective standards on what makes a "good" theme (or what themes we include)... my bent leans heavily towards theme autonomy - letting themes determine their own look based on the semantic meaning of the terms - rather than allowing grammar one at a time to "cheat" themes because they disagree with theme choices.

+

I expect most sites/implementors typically use a single theme (or two) and if they have issues/disagreements with that theme then they can customize it. (using CSS or not aliases)

+
+

So while I agree that built_in should fit better here

+
+

This kind of seals the deal for me... these are indeed more built-ins than they are keywords. Your reasoning from a purely visual perspective.

+
+

using keyword will give much better results in many themes.

+
+

I think this is very dependent on how you define "better results". When your results become "the opposite of what the theme author intended" that is not better. I get that your heart is in the right place here, but I feel this is short-term thinking... perhaps it does make your theme slightly better today but it costs the whole library tomorrow in terms of muddier semantics, theme authors not being certain of what things mean what in which grammars, etc...

+

...now a theme author who already clearly expressed "really i want numbers and built-ins to look the same" (for whatever reason) has to add a special case CSS to handle Mathematica because you pulled the rug right out from underneath them by calling your built-ins keywords.

+

If there is a true issue with our themes here then we need to work with the theme authors (or a visual designer) to actually fix the themes, not just let grammars one at a time redefine the semantic meaning of the terms in order to achieve a certain "look". This is the road to insanity. This probably also touches on the broader long-term initiative of higher fidelity grammars.

+

We could surely use some help here, but the solution isn't grammars just going rogue like this. I will spin this off into a new issue.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+ +
+ +
+
+ @joshgoebel +joshgoebel + + + + mentioned this pull request + + + Nov 2, 2020 + +
+ + + + + + + + +
+
+ + + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@halirutan Lets focus on the remaining technical issues here (if any). Are they any? I'm still not sure what your reference was to regex.js but if you don't think it belongs in this PR anyways then maybe it's simply not relevance here. :-)

+

If you want to respond regarding the theming (built_in vs keyword) please continue that discussion over in the new thread I started.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + + +
+ +
+ +
+ +
+ @halirutan + +
+ + +
+ + +
+ + +
+
+ + + + halirutan + + + + + reviewed + + + + + Nov 2, 2020 + + +
+ + +
+ + +
+
+ + + +
+
+ + + + +
+ +
+ + + types/index.d.ts + + + + Outdated + + + Show resolved + Hide resolved +
+
+
+ + + + + + + + + +
+
+ + + + +
+
+ + +
+ +
+ + + +
+ +
+
+
+ +
+
+ + joshgoebel + and others + added 2 commits + + Nov 2, 2020 + +
+
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 67ceb03 + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 92b413a + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

@halirutan Lets focus on the remaining technical issues here (if any). Are they any?

+
+

@joshgoebel I think it's good to go now.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@joshgoebel Ah, didn't you want to turn on autodetect? I tested it locally already and it seems that it doesn't break anything.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+
+ +
+
+ + joshgoebel + + added 2 commits + + Nov 2, 2020 + +
+
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + +
+ + final cleanups + + + +
+ +
+ + +
+ +
+
+ + +
+ + c4838b6 + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + ad71815 + +
+
+
+
+
….js into WIP_Wolfram_Language
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@halirutan Thanks for all the great work on this!!! :-) Hopefully it serves the SE community (and others) well.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+ +
+
+ + + + @joshgoebel +joshgoebel + + + + + + + removed + the + + 0-WIP + + label + + + Nov 2, 2020 + +
+
+ + + + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 2, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

This was before our new PR template... could you update the changelog (this might be worthy of several bullets if you wanted, you decide)... And if you wanted to provide me a short summary for the squashed commit that'd be great also.

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + + +
+ +
+ +
+ +
+ @joshgoebel + +
+ + +
+ + +
+ + +
+
+ + + + joshgoebel + + + + + reviewed + + + + + Nov 2, 2020 + + +
+ + +
+ + +
+
+ + + +
+
+ + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ A list of additional names (besides the canonical one given by the filename) that can be used to identify a language in HTML classes and in a call to :ref:`getLanguage <getLanguage>`. + +
+
+ +
+
+ +
+ classNameAliases + +
+
+ +
+
+
+ + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Nov 2, 2020 + + + + +
+ + + + Member + + +

+ + + + + +
+

@allejo Any thoughts on this naming? It seems clear to me... Other ideas were nesting, but that seems more complex:

+
themes: { aliases: {}}
+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @halirutan + + +

+ + + + halirutan + + + + + + + Nov 2, 2020 + + + + +
+ + Author + + + + + Contributor + + +

+ + + + + +
+

I'd be fine with classNameAliases. Another suggestion would be styleClassAliases (or even styleAliases) which makes it a bit clearer what we're talking about. You should decide this having the newbie user in mind.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+
+ + + + +
+ +
+

+
+ +
+
+ This comment has been minimized. + +
+

+ +
+
+
+ + +
+ +
+
+
+
+ + + + + + + + +
+
+ + +
+ + @joshgoebel + + +

+ + + + joshgoebel + + + + + + + Nov 2, 2020 + + + + + + + + + +
+ + + + Member + + +

+ + + + + +
+

className has meaning though because it's the key we use to specify such things already... making className and classNameAliases consistent.

+
+
+ + + +
+ + +
+ +
+
+ +
+
+ + + +
+ +
+ + +
+ + + + +
+ + + + +
+
+ + +
+ +
+ + + +
+ +
+
+
+ +
+
+ + joshgoebel + + added 2 commits + + Nov 2, 2020 + +
+
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 0653dcd + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + a178a71 + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Nov 3, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

This was before our new PR template... could you update the changelog (this might be worthy of several bullets if you wanted, you decide)... And if you wanted to provide me with a short summary for the squashed commit that'd be great also.

+
+

@joshgoebel Would something like this work

+
- enh(mathematica) Rework entire implementation [Patrick Scheibe][]
+    - Correct matching of the many variations of Mathematica's numbers
+    - Matching of named-characters aka special symbols like `\[Gamma]`
+    - Updated list of version 12.1 built-in symbols
+    - Matching of patterns, slots, message-names and braces
+

How does the linking of author name to their GitHub page work? I guess it requires that the PR is merged first and then it automatically retrieves it from people who have committed in the repo? Never seen this before.

+

For squashed commit, you could use something like this which details the technical points more:

+
Fix several issues and implement additional features for the Wolfram Language (Mathematica)
+
+- Include an up-to-date list of built-in symbols in a separate `lib/mathematica.js` file. It's one keyword per line and more easy to maintain.
+- Fix regexp to identify symbols/variables which requires special treatment and does not follow the common `IDENT_RE` matching.
+- Replace generic `C_NUMBER_MODE` matching with dedicated regular expressions for all possible numbers in Mathematica.
+- Include named-characters in the matching of symbols.
+- Allow for dedicated styling of
+    - pattern-like forms, e.g. `par_String`
+    - slots of anonymous functions, e.g. `##3`
+    - message names, e.g. `myFunc::usage`
+    - braces, curly braces and brackets
+- Introduce `classNameAliases` to map specific styles to general styles used by all themes. This allows for using built-in themes and writing sophisticated Mathematica themes.
+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 3, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+
+

How does the linking of author name to their GitHub page work?

+
+

No magic, it's just a footnote link... see lines 35-41, etc... I'm gonna noodle on this a bit more to see if I come up with a better name for the alias stuff (or anyone comments). This should get merged in the next day or two though! :-)

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @halirutan +
+
+ + + + +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 6d240a7 + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + + +
+ +
+ +
+ +
+ @joshgoebel + +
+ + +
+ + +
+ + +
+
+ + + + joshgoebel + + + + + approved these changes + + + + + Nov 3, 2020 + + +
+ + +
+ + +
+
+ + + + + +
+ +
+ + + +
+ +
+
+
+ +
+
+ + joshgoebel + + added 2 commits + + Nov 6, 2020 + +
+
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + +
+ + tweak changelog + + + +
+ +
+ + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + c20aa2a + +
+
+
+
+ + +
+
+
+
+ +
+
+ + +
+
+
+ +
+
+ + @joshgoebel +
+
+ + + + +
+ + + +
+ +
+ +
+ + + + +
+ +
+ + +
+ + 5d2d68a + +
+
+
+
+ + +
+
+
+
+ + +
+ + + +
+ + +
+
+ +
+
+ + + + @joshgoebel +joshgoebel + + + + + merged commit ff6df77 + into + + + highlightjs:master + + + Nov 6, 2020 + +
+ 10 checks passed +
+ +
+
+ 10 checks passed +
+
+
+
+ +
+
+ + build (12.x, node) + + +
+ + Details +
+
+
+ +
+
+ + build (12.x, browser) + + +
+ + Details +
+
+
+ +
+
+ + build (12.x, browser -n) + + +
+ + Details +
+
+
+ +
+
+ + build (14.x, node) + + +
+ + Details +
+
+
+ +
+
+ + build (14.x, browser) + + +
+ + Details +
+
+
+ +
+
+ + build (14.x, browser -n) + + +
+ + Details +
+
+
+ +
+
+ + build (15.x, node) + + +
+ + Details +
+
+
+ +
+
+ + build (15.x, browser) + + +
+ + Details +
+
+
+ +
+
+ + build (15.x, browser -n) + + +
+ + Details +
+
+
+ +
+
+ + security/snyk (joshgoebel) + + No manifest changes detected in 1 project +
+ + Details +
+
+
+
+
+ +
+ + + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 6, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@halirutan 🚀

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @CarlQLange + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + + + +
+ +

+ + + @CarlQLange + + + + + CarlQLange + + + + + + commented + + + Nov 6, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+

Huge thanks to both of you! 👏👏👏👏

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ + +
+ + +
+ @halirutan + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Contributor + + + + + Author + + + +
+ +

+ + + @halirutan + + + + + halirutan + + + + + + commented + + + Nov 7, 2020 + + + + +

+
+ + +
+ + + + + + + + + +
+

@joshgoebel Thanks a bunch and thanks for guiding me so well along the way. I really enjoyed our discussions!

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + +
+ +
+
+ +
+ +
+
+ @halirutan +halirutan + + + + mentioned this pull request + + + Nov 7, 2020 + +
+ + + + + + + + +
+
+ + + +
+ + + +
+ + +
+ + +
+ @joshgoebel + +
+ + +
+
+ + + +
+
+ + + + +
+ + + + + + Copy link + + +
+ + +
+ +
+ + + + + + + + Member + + + + + +
+ +

+ + + @joshgoebel + + + + + joshgoebel + + + + + + commented + + + Nov 22, 2020 + + + + + + + + + +

+
+ + +
+ + + + + + + + + +
+

Are there any common patterns from other languages that are ILLEGAL in Mathematica so we can add an illegal key? (which greatly speeds up auto-detect) Things that would let us know for sure that a given snippet is NOT mathematica? Or just any normal characters that are simply entirely disallowed in the code?

+
# ruby comment
+// JS comment
+/* C++ comment */
+
+

Often times comment patterns can be good illegals for this...

+
+
+ + + +
+ + +
+ +
+ + +
+
+ + +
+ + +
+ + + + + + +
+
+ +
+ + +
+ +
+
+
+ Sign up for free + to join this conversation on GitHub. + Already have an account? + Sign in to comment +
+ + +
+
+
+ +
+
+ + +
+ + + + + + + + + + + + + + +
+
+ +
+ Projects +
+ + + + None yet + + +
+ + + + + + + +
+
+ +
+ Linked issues +
+ + + +

Successfully merging this pull request may close these issues.

+ +

None yet

+ +
+
+ + + + +
+
+
+ 5 participants +
+ +
+
+ + + + + + + + + + +
+ + + +
+
+
+ + + +
+ + +
+
+ + +
+
+ +
+ + + + + + +
+ + + You can’t perform that action at this time. +
+ + + + + + + + + + + + + diff --git a/test/index.js b/test/index.js index ef6c36c..598f84e 100644 --- a/test/index.js +++ b/test/index.js @@ -1,5 +1,5 @@ var assert = require('assert') -var escapeHtml = require('..') +var escapeHtml = require('..').escapeHtmlFast describe('escapeHtml(string)', function () { describe('when string is undefined', function () {