From c4ce2a5c4c3726d6bdd79ab2836204d5cf14738b Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Sat, 30 Nov 2024 16:29:21 -0800 Subject: [PATCH] =?UTF-8?q?[spell-check]=20Allow=20the=20user=20to=20white?= =?UTF-8?q?list=20sections=20of=20a=20buffer=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …for spellchecking on a per-language basis. This opens up things that aren't really possible with the `excludedScopes` setting. For instance: you can specify `source.python comment, source.python string, source.js comment`; this will enable spellchecking in Python files for both comments and strings, while enabling spellchecking in JavaScript files for _only_ comments. Closes #1135. --- packages/spell-check/lib/main.js | 20 +-- packages/spell-check/lib/scope-helper.js | 155 ++++++++++++++++++ packages/spell-check/lib/spell-check-view.js | 121 +++++++++++++- packages/spell-check/package.json | 5 +- packages/spell-check/spec/spell-check-spec.js | 125 +++++++++++++- 5 files changed, 396 insertions(+), 30 deletions(-) create mode 100644 packages/spell-check/lib/scope-helper.js diff --git a/packages/spell-check/lib/main.js b/packages/spell-check/lib/main.js index df66c98767..8803a23c25 100644 --- a/packages/spell-check/lib/main.js +++ b/packages/spell-check/lib/main.js @@ -42,23 +42,9 @@ module.exports = { // Hook up changes to the configuration settings. this.excludedScopeRegexLists = []; this.subs.add( - atom.config.observe( - 'spell-check.excludedScopes', - (excludedScopes) => { - this.excludedScopeRegexLists = excludedScopes.map( - (excludedScope) => - excludedScope - .split(/\s+/)[0] - .split('.') - .filter((className) => className) - .map( - (className) => - new RegExp(`\\b${className}\\b`) - ) - ); - return this.updateViews(); - } - ) + atom.config.onDidChange('spell-check.excludedScopes', () => { + return this.updateViews(); + }) ); this.subs.add( diff --git a/packages/spell-check/lib/scope-helper.js b/packages/spell-check/lib/scope-helper.js new file mode 100644 index 0000000000..f43135e607 --- /dev/null +++ b/packages/spell-check/lib/scope-helper.js @@ -0,0 +1,155 @@ +function normalizeSegment(segment) { + if (!segment.startsWith('.')) return segment; + return segment.substring(1); +} + +function segmentsMatch( + descriptorSegment, + selectorSegment, + { enforceSegmentOrder = false } = {} +) { + let descriptorParts = normalizeSegment(descriptorSegment).split('.'); + let selectorParts = normalizeSegment(selectorSegment).split('.'); + + if (selectorParts.length > descriptorParts.length) { + return false; + } + + // Remove all parts from the descriptor scope name that aren't present in the + // selector scope name. + for (let i = descriptorParts.length - 1; i >= 0; i--) { + let part = descriptorParts[i]; + if (!selectorParts.includes(part)) { + descriptorParts.splice(i, 1); + } + } + // Does order matter? It would if this were a TextMate scope, but Atom has + // broadly treated `.function.entity` as equivalent to `.entity.function`, + // even though it causes headaches in some places. + // + // We'll assume that order doesn't matter, but the user can opt into strict + // ordering if they want. + if (!enforceSegmentOrder) { + descriptorParts.sort(); + selectorParts.sort(); + } + return descriptorParts.join('.') === selectorParts.join('.'); +} + +class ScopeSelector { + static create(stringOrScopeSelector) { + if (typeof stringOrScopeSelector === 'string') { + return new ScopeSelector(stringOrScopeSelector); + } else if (stringOrScopeSelector instanceof ScopeSelector) { + return stringOrScopeSelector; + } else { + throw new TypeError(`Invalid argument`); + } + } + + constructor(selectorString) { + this.selectorString = selectorString; + this.variations = selectorString.split(/,\s*/); + } + + matches(scopeDescriptorOrArray, rawOptions = {}) { + let options = { + // Whether to treat (e.g.) `.function.entity` as distinct from + // `.entity.function`. Defaults to `false` to match prevailing Atom + // behavior. + enforceSegmentOrder: false, + ...rawOptions, + }; + console.log(this, 'matches', scopeDescriptorOrArray); + let scopeList; + if (Array.isArray(scopeDescriptorOrArray)) { + scopeList = scopeDescriptorOrArray; + } else { + scopeList = scopeDescriptorOrArray.getScopesArray(); + } + + return this.variations.some((variation) => + this.matchesVariation(scopeList, variation, options) + ); + } + + matchesVariation(scopeList, selectorString, options) { + let parts = selectorString.split(/\s+/); + if (parts.length > scopeList.length) return false; + + let lastIndex = -1; + + outer: for (let selectorPart of parts) { + // Find something in the descriptor that matches this selector part. + for (let [i, descriptorPart] of scopeList.entries()) { + // Ignore everything before our index cursor; this is what enforces the + // ordering of the scope selector. + if (i <= lastIndex) continue; + let doesMatch = segmentsMatch( + descriptorPart, + selectorPart, + options + ); + if (doesMatch) { + lastIndex = i; + continue outer; + } + } + // If we get this far, we searched the entire descriptor list for a + // selector part and failed to find it; hence this variation doesn't + // match. + return false; + } + // If we get this far, we made it through the entire gauntlet without + // hitting the early return. This variation matches! + return true; + } +} + +// Private: A candidate for possible addition to the {ScopeDescriptor} API. +// +// Tests whether the given scope descriptor matches the given scope selector. +// +// A subset of the full TextMate scope selector syntax is supported: +// +// * Descendant scopes (e.g., `source.python string`); this function will +// enforce the ordering of segments. +// * Variations (e.g., `comment.block, string.quoted`); this function will +// return true if any of the variations match. +// +// Not supported: +// +// * Subtraction syntax (e.g., `comment - block`). +// * Left/right edge syntax (e.g., `L:comment.block`). +// +// For example: given the scope descriptor… +// +// [ +// 'source.js', +// 'meta.block.function.js', +// 'string.quoted.single.js' +// ] +// +// …here are outcomes of various tests: +// +// scopeDescriptorMatchesSelector(descriptor, `source`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `text`) // -> false +// scopeDescriptorMatchesSelector(descriptor, `source.js`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `source.python`) // -> false +// scopeDescriptorMatchesSelector(descriptor, `source.js meta.block.function.js`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `source meta`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `source meta.block.class`) // -> false +// scopeDescriptorMatchesSelector(descriptor, `source meta string`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `source string`) // -> true +// scopeDescriptorMatchesSelector(descriptor, `source string meta`) // -> false +// +// - `scopeDescriptor` A {ScopeDescriptor} or a scope descriptor {Array}. +// - `selector` A {String} representing a scope selector. +function scopeDescriptorMatchesSelector(scopeDescriptor, selector) { + let scopeSelector = ScopeSelector.create(selector); + return scopeSelector.matches(scopeDescriptor); +} + +module.exports = { + scopeDescriptorMatchesSelector, +}; diff --git a/packages/spell-check/lib/spell-check-view.js b/packages/spell-check/lib/spell-check-view.js index c6545bf810..f995c5e8e5 100644 --- a/packages/spell-check/lib/spell-check-view.js +++ b/packages/spell-check/lib/spell-check-view.js @@ -2,6 +2,18 @@ let SpellCheckView; const _ = require('underscore-plus'); const { CompositeDisposable } = require('atom'); const SpellCheckTask = require('./spell-check-task'); +const { scopeDescriptorMatchesSelector } = require('./scope-helper'); + +// Tests whether a grammar's root scope matches a scope specified in the +// `grammars` setting. Allows for a more generic name in the setting (e.g., +// `source` to match all `source.[x]` grammars). +function topLevelScopeMatches(grammar, scope) { + if (scope === grammar) return true; + if (grammar.startsWith(`${scope}.`)) { + return true; + } + return false; +} let CorrectionsView = null; @@ -73,6 +85,15 @@ module.exports = SpellCheckView = class SpellCheckView { }) ); + this.disposables.add( + atom.config.observe( + 'spell-check.excludedScopes', + (excludedScopes) => { + this.excludedScopes = excludedScopes; + } + ) + ); + this.subscribeToBuffer(); this.disposables.add(this.editor.onDidDestroy(this.destroy.bind(this))); @@ -118,6 +139,7 @@ module.exports = SpellCheckView = class SpellCheckView { this.unsubscribeFromBuffer(); if (this.spellCheckCurrentGrammar()) { + this.scopesToSpellCheck = this.getSpellCheckScopesForCurrentGrammar(); this.buffer = this.editor.getBuffer(); this.bufferDisposable = new CompositeDisposable( this.buffer.onDidStopChanging( @@ -131,7 +153,69 @@ module.exports = SpellCheckView = class SpellCheckView { spellCheckCurrentGrammar() { const grammar = this.editor.getGrammar().scopeName; - return _.contains(atom.config.get('spell-check.grammars'), grammar); + let grammars = atom.config.get('spell-check.grammars'); + let topLevelScopes = grammars.map((rawScope) => { + if (!rawScope.includes(' ')) return rawScope; + return rawScope.substring(0, rawScope.indexOf(' ')); + }); + return topLevelScopes.some((scope) => { + return topLevelScopeMatches(grammar, scope); + }); + // return topLevelScopes.includes(grammar); + } + + // Returns: + // + // * `true` if the entire buffer should be checked; + // * `false` if none of the buffer should be checked; or, if only certain + // parts of the buffer should be checked, + // * an {Array} of scope names matching regions of the buffer that should + // be checked. + getSpellCheckScopesForCurrentGrammar() { + const grammar = this.editor.getGrammar().scopeName; + let grammars = atom.config.get('spell-check.grammars'); + let scopeList = []; + // Despite the name of this setting, spell-checking is no longer all or + // nothing on a per-grammar basis; we now allow users to opt into + // checking subsections of a buffer by adding descendant scopes. Each + // segment must begin with all or part of a root scope name (e.g., + // `source.js`, `text.html`, but otherwise any valid scope selector is + // accepted here.) + // + // Examples: + // + // * `source.js comment.block` + // * `source comment, source string.quoted` + // * `text` + // + // The first example targets just JS block comments; the second targets + // all comments and quoted strings in _all_ source files; and the third + // targets any text format, whether HTML or Markdown or plaintext. + // + // This allows for more granular spell-checking than was possible + // before, even if the `excludeScopes` setting was utilized. + for (let rawScope of grammars) { + if (!rawScope.includes(' ')) { + // Any value that's just the bare root scope of the language + // (like `source.python`) means that we're spell-checking the + // entire buffer. This applies even if there's a later match + // for this grammar that's more restrictive. + if (topLevelScopeMatches(grammar, rawScope)) { + return true; + } + } else { + // If the value also includes a descendant scope, it means we're + // spell-checking some subset of the buffer. + let index = rawScope.indexOf(' '); + let rootScope = rawScope.substring(0, index); + if (topLevelScopeMatches(grammar, rootScope)) { + // There could be multiple of these — e.g., `source.python string, + // source.python comment` — so we won't return early. + scopeList.push(rawScope); + } + } + } + return scopeList.length > 0 ? scopeList : false; } destroyMarkers() { @@ -147,6 +231,9 @@ module.exports = SpellCheckView = class SpellCheckView { const scope = this.editor.scopeDescriptorForBufferPosition( misspelling[0] ); + // Under the hood, we spell-check the entire document; but we + // might end up ignoring some of the misspellings based on + // their scope descriptor. if (!this.scopeIsExcluded(scope)) { result.push( this.markerLayer.markBufferRange(misspelling, { @@ -308,11 +395,31 @@ module.exports = SpellCheckView = class SpellCheckView { return (this.spellCheckModule.contextMenuEntries = []); } - scopeIsExcluded(scopeDescriptor, excludedScopes) { - return this.spellCheckModule.excludedScopeRegexLists.some((regexList) => - scopeDescriptor.scopes.some((scopeName) => - regexList.every((regex) => regex.test(scopeName)) - ) - ); + scopeIsExcluded(scopeDescriptor) { + // Practically speaking, `this.scopesToSpellCheck` will either be `true` + // or an array of scope selectors. If it's the latter, then we should + // apply whitelisting and exclude anything that doesn't match. + if (Array.isArray(this.scopesToSpellCheck)) { + // If we know none of the subscopes match this region, we can + // exclude it even before we get to the `excludedScopes` setting. + let someMatch = this.scopesToSpellCheck.some( + (scopeToSpellCheck) => { + return scopeDescriptorMatchesSelector( + scopeDescriptor, + scopeToSpellCheck + ); + } + ); + if (!someMatch) return true; + } + // Whether or not we applied whitelisting above, excluded scopes take + // precedence; anything that doesn't make it through this gauntlet + // gets excluded. + return this.excludedScopes.some((excludedScope) => { + return scopeDescriptorMatchesSelector( + scopeDescriptor, + excludedScope + ); + }); } }; diff --git a/packages/spell-check/package.json b/packages/spell-check/package.json index 61ac11aa6d..862fb9f1b4 100644 --- a/packages/spell-check/package.json +++ b/packages/spell-check/package.json @@ -20,7 +20,8 @@ "repository": "https://github.com/pulsar-edit/pulsar", "license": "MIT", "engines": { - "atom": "*" + "atom": "*", + "node": ">=14" }, "scripts": { "format": "prettier --write \"spec/*.js\" \"lib/**/*.js\" \"script/*.js\" --loglevel warn" @@ -37,7 +38,7 @@ "source.rst", "text.restructuredtext" ], - "description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language.", + "description": "List of scopes for languages which will be checked for misspellings. See [the README](https://github.com/pulsar-edit/pulsar/blob/master/packages/spell-check/README.md) for more information on finding the correct scope for a specific language. If you want to spell-check only parts of some languages, you may specify a more detailed scope selector (like `source.js comment.block` or `source string`); but the first part of the selector must match the language scope.", "order": 1 }, "excludedScopes": { diff --git a/packages/spell-check/spec/spell-check-spec.js b/packages/spell-check/spec/spell-check-spec.js index cdc32c18f9..4c74200dfd 100644 --- a/packages/spell-check/spec/spell-check-spec.js +++ b/packages/spell-check/spec/spell-check-spec.js @@ -47,8 +47,8 @@ describe('Spell check', function () { }); afterEach(async () => { - await languageMode.atTransactionEnd(); - SpellCheckTask.clear(); + await languageMode.atTransactionEnd(); + SpellCheckTask.clear(); }); it('decorates all misspelled words', async function () { @@ -77,9 +77,124 @@ describe('Spell check', function () { expect(textForMarker(misspellingMarkers[1])).toEqual('bok'); }); + it('allows certain sub-scopes to be whitelisted into spell checking, implicitly excluding anything that does not match', async () => { + editor.setText( + `speledWrong = 5; +function speledWrong() {} +// We only care about mispelings in comments and strings! +let foo = "this is speled wrong" +class SpeledWrong {}` + ); + + atom.config.set('spell-check.useLocales', true); + atom.config.set('spell-check.grammars', [ + 'source.js comment', + 'source.js string', + 'text.plain.null-grammar', + ]); + + { + await conditionPromise(() => getMisspellingMarkers().length > 0); + const markers = getMisspellingMarkers(); + expect(markers.map((marker) => marker.getBufferRange())).toEqual([ + [ + [2, 22], + [2, 32], + ], + [ + [3, 19], + [3, 25], + ], + ]); + } + }); + + it('interprets a bare root scope as opting out of scope whitelisting, even when other more specific segments are present', async () => { + editor.setText( + `speledWrong = 5; +function speledWrong() {} +// We only care about mispelings in comments and strings! +let foo = "this is speled wrong" +class SpeledWrong {}` + ); + + atom.config.set('spell-check.useLocales', true); + atom.config.set('spell-check.grammars', [ + // Exactly as above, but with an extra `'source.js'` listing; this will + // supersede the more specific settings below. + 'source.js', + 'source.js comment', + 'source.js string', + 'text.plain.null-grammar', + ]); + + { + await conditionPromise(() => getMisspellingMarkers().length > 0); + const markers = getMisspellingMarkers(); + expect(markers.map((marker) => marker.getBufferRange())).toEqual([ + [ + [0, 0], + [0, 11], + ], + [ + [1, 9], + [1, 20], + ], + [ + [2, 22], + [2, 32], + ], + [ + [3, 4], + [3, 7], + ], + [ + [3, 19], + [3, 25], + ], + [ + [4, 6], + [4, 17], + ], + ]); + } + }); + + it('allows a generic root scope like "source"', async () => { + editor.setText( + `speledWrong = 5; +function speledWrong() {} +// We only care about mispelings in comments and strings! +let foo = "this is speled wrong" +class SpeledWrong {}` + ); + + atom.config.set('spell-check.useLocales', true); + atom.config.set('spell-check.grammars', [ + 'source comment', + 'source string', + 'text.plain.null-grammar', + ]); + + { + await conditionPromise(() => getMisspellingMarkers().length > 0); + const markers = getMisspellingMarkers(); + expect(markers.map((marker) => marker.getBufferRange())).toEqual([ + [ + [2, 22], + [2, 32], + ], + [ + [3, 19], + [3, 25], + ], + ]); + } + }); + it('allows certain scopes to be excluded from spell checking', async function () { editor.setText( -`speledWrong = 5; + `speledWrong = 5; function speledWrong() {} class SpeledWrong {}` ); @@ -126,7 +241,9 @@ class SpeledWrong {}` } { - atom.config.set('spell-check.excludedScopes', ['.entity.name.type.class']); + atom.config.set('spell-check.excludedScopes', [ + '.entity.name.type.class', + ]); await conditionPromise(() => getMisspellingMarkers().length === 2); const markers = getMisspellingMarkers(); expect(markers.map((marker) => marker.getBufferRange())).toEqual([