From b59868cfa0a2cf9ae59ecc6eb33e992a50d09a72 Mon Sep 17 00:00:00 2001 From: Zhiming Ma Date: Fri, 26 Apr 2024 21:57:19 +0800 Subject: [PATCH] fix(vscode): use SemanticTokensProvider to filter words before extracing declaration snippets. --- clients/vscode/src/CodeSearchEngine.ts | 8 +-- clients/vscode/src/TabbyCompletionProvider.ts | 34 ++++++--- clients/vscode/src/utils.ts | 70 ++++++++++++++++--- 3 files changed, 90 insertions(+), 22 deletions(-) diff --git a/clients/vscode/src/CodeSearchEngine.ts b/clients/vscode/src/CodeSearchEngine.ts index d964f4b82a63..625cf54e9668 100644 --- a/clients/vscode/src/CodeSearchEngine.ts +++ b/clients/vscode/src/CodeSearchEngine.ts @@ -1,6 +1,6 @@ import * as Engine from "@orama/orama"; -import { Range, Position, TextDocument } from "vscode"; -import { extractSematicSymbols } from "./utils"; +import { Position, Range, TextDocument } from "vscode"; +import { extractNonReservedWordList } from "./utils"; export type DocumentRange = { document: TextDocument; @@ -16,7 +16,7 @@ export type CodeSnippet = { fullText: string; // The code language id of the snippet language: string; - // The sematic symbols extracted from the snippet + // The semantic symbols extracted from the snippet symbols: string; }; @@ -102,7 +102,7 @@ export class CodeSearchEngine { offset: document.offsetAt(positionStart), fullText: text, language: document.languageId, - symbols: extractSematicSymbols(text), + symbols: extractNonReservedWordList(text), }); } diff --git a/clients/vscode/src/TabbyCompletionProvider.ts b/clients/vscode/src/TabbyCompletionProvider.ts index edd88b50a1ef..aaefd6aa8f2c 100644 --- a/clients/vscode/src/TabbyCompletionProvider.ts +++ b/clients/vscode/src/TabbyCompletionProvider.ts @@ -22,7 +22,7 @@ import { API as GitAPI } from "./types/git"; import { logger } from "./logger"; import { agent } from "./agent"; import { RecentlyChangedCodeSearch } from "./RecentlyChangedCodeSearch"; -import { getWordStartIndices, extractSematicSymbols } from "./utils"; +import { extractSemanticSymbols, extractNonReservedWordList } from "./utils"; type DisplayedCompletion = { id: string; @@ -342,19 +342,33 @@ export class TabbyCompletionProvider extends EventEmitter implements InlineCompl position.line, position.character, ); - const prefixText = document.getText(prefixRange); - const prefixRangeStartOffset = document.offsetAt(prefixRange.start); - const symbolPositions = getWordStartIndices(prefixText).map((offset) => - document.positionAt(prefixRangeStartOffset + offset), - ); - this.logger.trace("Found symbol positions in prefix text", { prefixText, symbolPositions }); + const allowedSymbolTypes = [ + "class", + "decorator", + "enum", + "function", + "interface", + "macro", + "method", + "namespace", + "struct", + "type", + "typeParameter", + ]; + const allSymbols = await extractSemanticSymbols(document, prefixRange); + if (!allSymbols) { + this.logger.trace("End collectDeclarationSnippets early, symbols provider not available."); + return undefined; + } + const symbols = allSymbols.filter((symbol) => allowedSymbolTypes.includes(symbol.type)); + this.logger.trace("Found symbols in prefix text", { symbols }); // Loop through the symbol positions backwards - for (let symbolIndex = symbolPositions.length - 1; symbolIndex >= 0; symbolIndex--) { + for (let symbolIndex = symbols.length - 1; symbolIndex >= 0; symbolIndex--) { if (snippets.length >= config.fillDeclarations.maxSnippets) { // Stop collecting snippets if the max number of snippets is reached break; } - const symbolPosition = symbolPositions[symbolIndex]; + const symbolPosition = symbols[symbolIndex]!.position; const declarationLinks = await commands.executeCommand( "vscode.executeDefinitionProvider", document.uri, @@ -425,7 +439,7 @@ export class TabbyCompletionProvider extends EventEmitter implements InlineCompl position.character, ); const prefixText = document.getText(prefixRange); - const query = extractSematicSymbols(prefixText); + const query = extractNonReservedWordList(prefixText); const snippets = await this.recentlyChangedCodeSearch.collectRelevantSnippets( query, document, diff --git a/clients/vscode/src/utils.ts b/clients/vscode/src/utils.ts index 81bb30b63d12..0cd9eb308e70 100644 --- a/clients/vscode/src/utils.ts +++ b/clients/vscode/src/utils.ts @@ -1,11 +1,65 @@ -export function getWordStartIndices(text: string): number[] { - const indices: number[] = []; - const re = /\b\w/g; - let match; - while ((match = re.exec(text)) != null) { - indices.push(match.index); +import { commands, Position, Range, SemanticTokens, SemanticTokensLegend, TextDocument } from "vscode"; + +export type SemanticSymbolInfo = { + position: Position; + type: string; +}; + +// reference: https://code.visualstudio.com/api/language-extensions/semantic-highlight-guide +export async function extractSemanticSymbols( + document: TextDocument, + range: Range, +): Promise { + const providedTokens = await commands.executeCommand( + "vscode.provideDocumentRangeSemanticTokens", + document.uri, + range, + ); + if ( + typeof providedTokens === "object" && + providedTokens !== null && + "resultId" in providedTokens && + "data" in providedTokens + ) { + const tokens = providedTokens as SemanticTokens; + const providedLegend = await commands.executeCommand( + "vscode.provideDocumentRangeSemanticTokensLegend", + document.uri, + range, + ); + if ( + typeof providedLegend === "object" && + providedLegend !== null && + "tokenTypes" in providedLegend && + "tokenModifiers" in providedLegend + ) { + const legend = providedLegend as SemanticTokensLegend; + + const semanticSymbols: SemanticSymbolInfo[] = []; + let line = 0; + let char = 0; + for (let i = 0; i + 4 < tokens.data.length; i += 5) { + const deltaLine = tokens.data[i]!; + const deltaChar = tokens.data[i + 1]!; + // i + 2 is token length, not used here + const type = legend.tokenTypes[tokens.data[i + 3]!] ?? ""; + // i + 4 is type modifiers, not used here + + line += deltaLine; + if (deltaLine > 0) { + char = deltaChar; + } else { + char += deltaChar; + } + semanticSymbols.push({ + position: new Position(line, char), + type, + }); + } + return semanticSymbols; + } } - return indices; + return undefined; } // Keywords appear in the code everywhere, but we don't want to use them for @@ -73,7 +127,7 @@ const reservedKeywords = [ "with", "yield", ]; -export function extractSematicSymbols(text: string): string { +export function extractNonReservedWordList(text: string): string { const re = /\w+/g; return [ ...new Set(text.match(re)?.filter((symbol) => symbol.length > 2 && !reservedKeywords.includes(symbol))).values(),