Merge pull request #1147 from savetheclocktower/spell-check-scope-gra…

…nularity [spell-check] Allow the user to whitelist sections of a buffer…
pulsar-edit · Dec 16, 2024 · b04e997 · b04e997
2 parents 7ab0460 + 8d6e677
commit b04e997
Show file tree

Hide file tree

Showing 5 changed files with 397 additions and 30 deletions.
diff --git a/packages/spell-check/lib/main.js b/packages/spell-check/lib/main.js
@@ -42,23 +42,9 @@ module.exports = {
         // Hook up changes to the configuration settings.
         this.excludedScopeRegexLists = [];
         this.subs.add(
-            atom.config.observe(
-                'spell-check.excludedScopes',
-                (excludedScopes) => {
-                    this.excludedScopeRegexLists = excludedScopes.map(
-                        (excludedScope) =>
-                            excludedScope
-                                .split(/\s+/)[0]
-                                .split('.')
-                                .filter((className) => className)
-                                .map(
-                                    (className) =>
-                                        new RegExp(`\\b${className}\\b`)
-                                )
-                    );
-                    return this.updateViews();
-                }
-            )
+            atom.config.onDidChange('spell-check.excludedScopes', () => {
+                return this.updateViews();
+            })
         );
 
         this.subs.add(

diff --git a/packages/spell-check/lib/scope-helper.js b/packages/spell-check/lib/scope-helper.js
@@ -0,0 +1,155 @@
+function normalizeSegment(segment) {
+    if (!segment.startsWith('.')) return segment;
+    return segment.substring(1);
+}
+
+function segmentsMatch(
+    descriptorSegment,
+    selectorSegment,
+    { enforceSegmentOrder = false } = {}
+) {
+    let descriptorParts = normalizeSegment(descriptorSegment).split('.');
+    let selectorParts = normalizeSegment(selectorSegment).split('.');
+
+    if (selectorParts.length > descriptorParts.length) {
+        return false;
+    }
+
+    // Remove all parts from the descriptor scope name that aren't present in the
+    // selector scope name.
+    for (let i = descriptorParts.length - 1; i >= 0; i--) {
+        let part = descriptorParts[i];
+        if (!selectorParts.includes(part)) {
+            descriptorParts.splice(i, 1);
+        }
+    }
+    // Does order matter? It would if this were a TextMate scope, but Atom has
+    // broadly treated `.function.entity` as equivalent to `.entity.function`,
+    // even though it causes headaches in some places.
+    //
+    // We'll assume that order doesn't matter, but the user can opt into strict
+    // ordering if they want.
+    if (!enforceSegmentOrder) {
+        descriptorParts.sort();
+        selectorParts.sort();
+    }
+    return descriptorParts.join('.') === selectorParts.join('.');
+}
+
+class ScopeSelector {
+    static create(stringOrScopeSelector) {
+        if (typeof stringOrScopeSelector === 'string') {
+            return new ScopeSelector(stringOrScopeSelector);
+        } else if (stringOrScopeSelector instanceof ScopeSelector) {
+            return stringOrScopeSelector;
+        } else {
+            throw new TypeError(`Invalid argument`);
+        }
+    }
+
+    constructor(selectorString) {
+        this.selectorString = selectorString;
+        this.variations = selectorString.split(/,\s*/);
+    }
+
+    matches(scopeDescriptorOrArray, rawOptions = {}) {
+        let options = {
+            // Whether to treat (e.g.) `.function.entity` as distinct from
+            // `.entity.function`. Defaults to `false` to match prevailing Atom
+            // behavior.
+            enforceSegmentOrder: false,
+            ...rawOptions,
+        };
+        console.log(this, 'matches', scopeDescriptorOrArray);
+        let scopeList;
+        if (Array.isArray(scopeDescriptorOrArray)) {
+            scopeList = scopeDescriptorOrArray;
+        } else {
+            scopeList = scopeDescriptorOrArray.getScopesArray();
+        }
+
+        return this.variations.some((variation) =>
+            this.matchesVariation(scopeList, variation, options)
+        );
+    }
+
+    matchesVariation(scopeList, selectorString, options) {
+        let parts = selectorString.split(/\s+/);
+        if (parts.length > scopeList.length) return false;
+
+        let lastIndex = -1;
+
+        outer: for (let selectorPart of parts) {
+            // Find something in the descriptor that matches this selector part.
+            for (let [i, descriptorPart] of scopeList.entries()) {
+                // Ignore everything before our index cursor; this is what enforces the
+                // ordering of the scope selector.
+                if (i <= lastIndex) continue;
+                let doesMatch = segmentsMatch(
+                    descriptorPart,
+                    selectorPart,
+                    options
+                );
+                if (doesMatch) {
+                    lastIndex = i;
+                    continue outer;
+                }
+            }
+            // If we get this far, we searched the entire descriptor list for a
+            // selector part and failed to find it; hence this variation doesn't
+            // match.
+            return false;
+        }
+        // If we get this far, we made it through the entire gauntlet without
+        // hitting the early return. This variation matches!
+        return true;
+    }
+}
+
+// Private: A candidate for possible addition to the {ScopeDescriptor} API.
+//
+// Tests whether the given scope descriptor matches the given scope selector.
+//
+// A subset of the full TextMate scope selector syntax is supported:
+//
+// * Descendant scopes (e.g., `source.python string`); this function will
+//   enforce the ordering of segments.
+// * Variations (e.g., `comment.block, string.quoted`); this function will
+//   return true if any of the variations match.
+//
+// Not supported:
+//
+// * Subtraction syntax (e.g., `comment - block`).
+// * Left/right edge syntax (e.g., `L:comment.block`).
+//
+// For example: given the scope descriptor…
+//
+// [
+//   'source.js',
+//   'meta.block.function.js',
+//   'string.quoted.single.js'
+// ]
+//
+// …here are outcomes of various tests:
+//
+// scopeDescriptorMatchesSelector(descriptor, `source`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `text`) // -> false
+// scopeDescriptorMatchesSelector(descriptor, `source.js`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `source.python`) // -> false
+// scopeDescriptorMatchesSelector(descriptor, `source.js meta.block.function.js`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `source meta`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `source meta.block.class`) // -> false
+// scopeDescriptorMatchesSelector(descriptor, `source meta string`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `source string`) // -> true
+// scopeDescriptorMatchesSelector(descriptor, `source string meta`) // -> false
+//
+// - `scopeDescriptor` A {ScopeDescriptor} or a scope descriptor {Array}.
+// - `selector` A {String} representing a scope selector.
+function scopeDescriptorMatchesSelector(scopeDescriptor, selector) {
+    let scopeSelector = ScopeSelector.create(selector);
+    return scopeSelector.matches(scopeDescriptor);
+}
+
+module.exports = {
+    scopeDescriptorMatchesSelector,
+};
diff --git a/packages/spell-check/lib/spell-check-view.js b/packages/spell-check/lib/spell-check-view.js
@@ -2,6 +2,18 @@ let SpellCheckView;
 const _ = require('underscore-plus');
 const { CompositeDisposable } = require('atom');
 const SpellCheckTask = require('./spell-check-task');
+const { scopeDescriptorMatchesSelector } = require('./scope-helper');
+
+// Tests whether a grammar's root scope matches a scope specified in the
+// `grammars` setting. Allows for a more generic name in the setting (e.g.,
+// `source` to match all `source.[x]` grammars).
+function topLevelScopeMatches(grammar, scope) {
+    if (scope === grammar) return true;
+    if (grammar.startsWith(`${scope}.`)) {
+        return true;
+    }
+    return false;
+}
 
 let CorrectionsView = null;
 
@@ -73,6 +85,15 @@ module.exports = SpellCheckView = class SpellCheckView {
             })
         );
 
+        this.disposables.add(
+            atom.config.observe(
+                'spell-check.excludedScopes',
+                (excludedScopes) => {
+                    this.excludedScopes = excludedScopes;
+                }
+            )
+        );
+
         this.subscribeToBuffer();
 
         this.disposables.add(this.editor.onDidDestroy(this.destroy.bind(this)));
@@ -118,6 +139,7 @@ module.exports = SpellCheckView = class SpellCheckView {
         this.unsubscribeFromBuffer();
 
         if (this.spellCheckCurrentGrammar()) {
+            this.scopesToSpellCheck = this.getSpellCheckScopesForCurrentGrammar();
             this.buffer = this.editor.getBuffer();
             this.bufferDisposable = new CompositeDisposable(
                 this.buffer.onDidStopChanging(
@@ -131,7 +153,69 @@ module.exports = SpellCheckView = class SpellCheckView {
 
     spellCheckCurrentGrammar() {
         const grammar = this.editor.getGrammar().scopeName;
-        return _.contains(atom.config.get('spell-check.grammars'), grammar);
+        let grammars = atom.config.get('spell-check.grammars');
+        let topLevelScopes = grammars.map((rawScope) => {
+            if (!rawScope.includes(' ')) return rawScope;
+            return rawScope.substring(0, rawScope.indexOf(' '));
+        });
+        return topLevelScopes.some((scope) => {
+            return topLevelScopeMatches(grammar, scope);
+        });
+        // return topLevelScopes.includes(grammar);
+    }
+
+    // Returns:
+    //
+    // * `true` if the entire buffer should be checked;
+    // * `false` if none of the buffer should be checked; or, if only certain
+    //   parts of the buffer should be checked,
+    // * an {Array} of scope names matching regions of the buffer that should
+    //   be checked.
+    getSpellCheckScopesForCurrentGrammar() {
+        const grammar = this.editor.getGrammar().scopeName;
+        let grammars = atom.config.get('spell-check.grammars');
+        let scopeList = [];
+        // Despite the name of this setting, spell-checking is no longer all or
+        // nothing on a per-grammar basis; we now allow users to opt into
+        // checking subsections of a buffer by adding descendant scopes. Each
+        // segment must begin with all or part of a root scope name (e.g.,
+        // `source.js`, `text.html`, but otherwise any valid scope selector is
+        // accepted here.)
+        //
+        // Examples:
+        //
+        // * `source.js comment.block`
+        // * `source comment, source string.quoted`
+        // * `text`
+        //
+        // The first example targets just JS block comments; the second targets
+        // all comments and quoted strings in _all_ source files; and the third
+        // targets any text format, whether HTML or Markdown or plaintext.
+        //
+        // This allows for more granular spell-checking than was possible
+        // before, even if the `excludeScopes` setting was utilized.
+        for (let rawScope of grammars) {
+            if (!rawScope.includes(' ')) {
+                // Any value that's just the bare root scope of the language
+                // (like `source.python`) means that we're spell-checking the
+                // entire buffer. This applies even if there's a later match
+                // for this grammar that's more restrictive.
+                if (topLevelScopeMatches(grammar, rawScope)) {
+                    return true;
+                }
+            } else {
+                // If the value also includes a descendant scope, it means we're
+                // spell-checking some subset of the buffer.
+                let index = rawScope.indexOf(' ');
+                let rootScope = rawScope.substring(0, index);
+                if (topLevelScopeMatches(grammar, rootScope)) {
+                    // There could be multiple of these — e.g., `source.python string,
+                    // source.python comment` — so we won't return early.
+                    scopeList.push(rawScope);
+                }
+            }
+        }
+        return scopeList.length > 0 ? scopeList : false;
     }
 
     destroyMarkers() {
@@ -147,6 +231,9 @@ module.exports = SpellCheckView = class SpellCheckView {
                 const scope = this.editor.scopeDescriptorForBufferPosition(
                     misspelling[0]
                 );
+                // Under the hood, we spell-check the entire document; but we
+                // might end up ignoring some of the misspellings based on
+                // their scope descriptor.
                 if (!this.scopeIsExcluded(scope)) {
                     result.push(
                         this.markerLayer.markBufferRange(misspelling, {
@@ -308,11 +395,31 @@ module.exports = SpellCheckView = class SpellCheckView {
         return (this.spellCheckModule.contextMenuEntries = []);
     }
 
-    scopeIsExcluded(scopeDescriptor, excludedScopes) {
-        return this.spellCheckModule.excludedScopeRegexLists.some((regexList) =>
-            scopeDescriptor.scopes.some((scopeName) =>
-                regexList.every((regex) => regex.test(scopeName))
-            )
-        );
+    scopeIsExcluded(scopeDescriptor) {
+        // Practically speaking, `this.scopesToSpellCheck` will either be `true`
+        // or an array of scope selectors. If it's the latter, then we should
+        // apply whitelisting and exclude anything that doesn't match.
+        if (Array.isArray(this.scopesToSpellCheck)) {
+            // If we know none of the subscopes match this region, we can
+            // exclude it even before we get to the `excludedScopes` setting.
+            let someMatch = this.scopesToSpellCheck.some(
+                (scopeToSpellCheck) => {
+                    return scopeDescriptorMatchesSelector(
+                        scopeDescriptor,
+                        scopeToSpellCheck
+                    );
+                }
+            );
+            if (!someMatch) return true;
+        }
+        // Whether or not we applied whitelisting above, excluded scopes take
+        // precedence; anything that doesn't make it through this gauntlet
+        // gets excluded.
+        return this.excludedScopes.some((excludedScope) => {
+            return scopeDescriptorMatchesSelector(
+                scopeDescriptor,
+                excludedScope
+            );
+        });
     }
 };
diff --git a/packages/spell-check/package.json b/packages/spell-check/package.json
@@ -20,7 +20,8 @@
   "repository": "https://github.com/pulsar-edit/pulsar",
   "license": "MIT",
   "engines": {
-    "atom": "*"
+    "atom": "*",
+    "node": ">=14"
   },
   "scripts": {
     "format": "prettier --write \"spec/*.js\" \"lib/**/*.js\" \"script/*.js\" --loglevel warn"
@@ -29,6 +30,7 @@
     "grammars": {
       "type": "array",
       "default": [
+        "source comment",
         "source.asciidoc",
         "source.gfm",
         "text.git-commit",
@@ -37,7 +39,7 @@
         "source.rst",
         "text.restructuredtext"
       ],
-      "description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language.",
+      "description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language. If you want to spell-check only parts of some languages, you may specify a more detailed scope selector (like `source.js comment.block` or `source string`); but the first part of the selector must match the language scope.",
       "order": 1
     },
     "excludedScopes": {