From 0676cae6f3325ec469d7bf8f523ef6ba27c48cdc Mon Sep 17 00:00:00 2001 From: xzb <2598514867@qq.com> Date: Sat, 18 Jan 2025 14:31:48 +0800 Subject: [PATCH] semtok --- gopls/doc/release/v0.18.0.md | 15 +++- gopls/doc/semantictokens.md | 11 +-- gopls/internal/golang/highlight.go | 11 ++- gopls/internal/golang/semtok.go | 70 ++++++++++++++++--- gopls/internal/protocol/semtok/semtok.go | 24 ++++--- .../marker/testdata/token/placeholder.txt | 26 +++++++ internal/astutil/util.go | 14 ++++ 7 files changed, 139 insertions(+), 32 deletions(-) create mode 100644 gopls/internal/test/marker/testdata/token/placeholder.txt diff --git a/gopls/doc/release/v0.18.0.md b/gopls/doc/release/v0.18.0.md index e2b730052bc..9bb1c735659 100644 --- a/gopls/doc/release/v0.18.0.md +++ b/gopls/doc/release/v0.18.0.md @@ -98,7 +98,9 @@ The Definition query now supports additional locations: When invoked on a return statement, hover reports the types of the function's result variables. -## Improvements to "DocumentHighlight" +## UX improvements to format strings + +### "DocumentHighlight" When your cursor is inside a printf-like function, gopls now highlights the relationship between formatting verbs and arguments as visual cues to differentiate how operands are used in the format string. @@ -109,3 +111,14 @@ fmt.Printf("Hello %s, you scored %d", name, score) If the cursor is either on `%s` or `name`, gopls will highlight `%s` as a write operation, and `name` as a read operation. + +### "SemanticHighlight" + +Similar to the improvements to DocumentHighlight, gopls also reports formatting verbs +as "placeholder" modifier for token type "string" to better distinguish them with other parts of the format string. + +```go +fmt.Printf("Hello %s, you scored %d", name, score) +``` + +`%s` and `%d` will have token type "string" and modifier "placeholder". diff --git a/gopls/doc/semantictokens.md b/gopls/doc/semantictokens.md index f17ea7f06d8..fefbab2f954 100644 --- a/gopls/doc/semantictokens.md +++ b/gopls/doc/semantictokens.md @@ -54,14 +54,15 @@ and change over time. (Nonetheless, a minimal implementation would not return `k `number`, `comment`, or `string`.) The maximal position isn't particularly well-specified either. To chose one example, a -format string might have formatting codes (`%[4]-3.6f`), escape sequences (`\U00010604`), and regular +format string might have formatting codes (`%-[4].6f`), escape sequences (`\U00010604`), and regular characters. Should these all be distinguished? One could even imagine distinguishing different runes by their Unicode language assignment, or some other Unicode property, such as -being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt). +being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt). While gopls does not fully adhere to such distinctions, +it does recognizes formatting directives within strings, treating them as "macro" tokens, +providing more precise semantic highlighting in format strings. -Gopls does not come close to either of these principles. Semantic tokens are returned for -identifiers, keywords, operators, comments, and literals. (Semantic tokens do not -cover the file. They are not returned for +Semantic tokens are returned for identifiers, keywords, operators, comments, and literals. +(Semantic tokens do not cover the file. They are not returned for white space or punctuation, and there is no semantic token for labels.) The following describes more precisely what gopls does, with a few notes on possible alternative choices. diff --git a/gopls/internal/golang/highlight.go b/gopls/internal/golang/highlight.go index a4f81e35153..ee82b622a71 100644 --- a/gopls/internal/golang/highlight.go +++ b/gopls/internal/golang/highlight.go @@ -17,6 +17,7 @@ import ( "golang.org/x/tools/gopls/internal/cache" "golang.org/x/tools/gopls/internal/file" "golang.org/x/tools/gopls/internal/protocol" + goplsastutil "golang.org/x/tools/gopls/internal/util/astutil" internalastutil "golang.org/x/tools/internal/astutil" "golang.org/x/tools/internal/event" "golang.org/x/tools/internal/fmtstr" @@ -210,11 +211,7 @@ func highlightPrintf(call *ast.CallExpr, idx int, cursorPos token.Pos, lit *ast. // highlightPair highlights the operation and its potential argument pair if the cursor is within either range. highlightPair := func(rang fmtstr.Range, argIndex int) { - rangeStart, err := internalastutil.PosInStringLiteral(lit, rang.Start) - if err != nil { - return - } - rangeEnd, err := internalastutil.PosInStringLiteral(lit, rang.End) + rangeStart, rangeEnd, err := internalastutil.RangeInStringLiteral(lit, rang.Start, rang.End) if err != nil { return } @@ -226,9 +223,9 @@ func highlightPrintf(call *ast.CallExpr, idx int, cursorPos token.Pos, lit *ast. } // cursorPos can't equal to end position, otherwise the two - // neighborhood such as (%[2]*d) are both highlighted if cursor in "*" (ending of [2]*). + // neighborhood such as (%[2]*d) are both highlighted if cursor in "d" (ending of [2]*). if rangeStart <= cursorPos && cursorPos < rangeEnd || - arg != nil && arg.Pos() <= cursorPos && cursorPos < arg.End() { + arg != nil && goplsastutil.NodeContains(arg, cursorPos) { highlightRange(result, rangeStart, rangeEnd, protocol.Write) if arg != nil { succeededArg = argIndex diff --git a/gopls/internal/golang/semtok.go b/gopls/internal/golang/semtok.go index 84fad43a47f..4bc7d0b1226 100644 --- a/gopls/internal/golang/semtok.go +++ b/gopls/internal/golang/semtok.go @@ -17,6 +17,7 @@ import ( "log" "path/filepath" "regexp" + "strconv" "strings" "time" @@ -28,7 +29,9 @@ import ( "golang.org/x/tools/gopls/internal/protocol/semtok" "golang.org/x/tools/gopls/internal/util/bug" "golang.org/x/tools/gopls/internal/util/safetoken" + "golang.org/x/tools/internal/astutil" "golang.org/x/tools/internal/event" + "golang.org/x/tools/internal/fmtstr" ) // semDebug enables comprehensive logging of decisions @@ -323,16 +326,17 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) { case *ast.AssignStmt: tv.token(n.TokPos, len(n.Tok.String()), semtok.TokOperator) case *ast.BasicLit: - if strings.Contains(n.Value, "\n") { - // has to be a string. - tv.multiline(n.Pos(), n.End(), semtok.TokString) - break - } - what := semtok.TokNumber if n.Kind == token.STRING { - what = semtok.TokString + if strings.Contains(n.Value, "\n") { + // has to be a string. + tv.multiline(n.Pos(), n.End(), semtok.TokString) + } else if !tv.formatString(n) { + // not a format string, color the whole as a TokString. + tv.token(n.Pos(), len(n.Value), semtok.TokString) + } + } else { + tv.token(n.Pos(), len(n.Value), semtok.TokNumber) } - tv.token(n.Pos(), len(n.Value), what) case *ast.BinaryExpr: tv.token(n.OpPos, len(n.Op.String()), semtok.TokOperator) case *ast.BlockStmt: @@ -461,6 +465,56 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) { return true } +// formatString tries to report directives and string literals +// inside a (possible) printf-like call, it returns false and does nothing +// if the string is not a format string. +func (tv *tokenVisitor) formatString(lit *ast.BasicLit) bool { + if len(tv.stack) <= 1 { + return false + } + call, ok := tv.stack[len(tv.stack)-2].(*ast.CallExpr) + if !ok { + return false + } + lastNonVariadic, idx := formatStringAndIndex(tv.info, call) + if idx == -1 || lit != lastNonVariadic { + return false + } + format, err := strconv.Unquote(lit.Value) + if err != nil { + return false + } + if !strings.Contains(format, "%") { + return false + } + operations, err := fmtstr.Parse(format, idx) + if err != nil { + return false + } + + // It's a format string, compute interleaved sub range of directives and literals. + // litPos tracks literal substring position within the overall BasicLit. + litPos := lit.ValuePos + for _, op := range operations { + // Skip "%%". + if op.Verb.Verb == '%' { + continue + } + rangeStart, rangeEnd, err := astutil.RangeInStringLiteral(lit, op.Range.Start, op.Range.End) + if err != nil { + return false + } + // Report formatting directive. + tv.token(rangeStart, int(rangeEnd-rangeStart), semtok.TokString, semtok.ModPlaceHolder) + // Report literal substring. + tv.token(litPos, int(rangeStart-litPos), semtok.TokString) + litPos = rangeEnd + } + // Report remaining literal substring. + tv.token(litPos, int(lit.End()-litPos), semtok.TokString) + return true +} + func (tv *tokenVisitor) appendObjectModifiers(mods []semtok.Modifier, obj types.Object) (semtok.Type, []semtok.Modifier) { if obj.Pkg() == nil { mods = append(mods, semtok.ModDefaultLibrary) diff --git a/gopls/internal/protocol/semtok/semtok.go b/gopls/internal/protocol/semtok/semtok.go index a40f2b5482f..ae24218638b 100644 --- a/gopls/internal/protocol/semtok/semtok.go +++ b/gopls/internal/protocol/semtok/semtok.go @@ -99,17 +99,18 @@ const ( // Since the type of a symbol is orthogonal to its kind, // (e.g. a variable can have function type), // we use modifiers for the top-level type constructor. - ModArray Modifier = "array" - ModBool Modifier = "bool" - ModChan Modifier = "chan" - ModInterface Modifier = "interface" - ModMap Modifier = "map" - ModNumber Modifier = "number" - ModPointer Modifier = "pointer" - ModSignature Modifier = "signature" // for function types - ModSlice Modifier = "slice" - ModString Modifier = "string" - ModStruct Modifier = "struct" + ModArray Modifier = "array" + ModBool Modifier = "bool" + ModChan Modifier = "chan" + ModInterface Modifier = "interface" + ModMap Modifier = "map" + ModNumber Modifier = "number" + ModPointer Modifier = "pointer" + ModSignature Modifier = "signature" // for function types + ModSlice Modifier = "slice" + ModString Modifier = "string" + ModStruct Modifier = "struct" + ModPlaceHolder Modifier = "placeholder" // for format string placeholders such as "%s" ) // TokenModifiers is a slice of modifiers gopls will return as its server @@ -131,6 +132,7 @@ var TokenModifiers = []Modifier{ ModSlice, ModString, ModStruct, + ModPlaceHolder, } // Encode returns the LSP encoding of a sequence of tokens. diff --git a/gopls/internal/test/marker/testdata/token/placeholder.txt b/gopls/internal/test/marker/testdata/token/placeholder.txt new file mode 100644 index 00000000000..4761afd2927 --- /dev/null +++ b/gopls/internal/test/marker/testdata/token/placeholder.txt @@ -0,0 +1,26 @@ +This test checks semanticTokens for format string placeholders. + +-- settings.json -- +{ + "semanticTokens": true +} + +-- flags -- +-ignore_extra_diags + +-- placeholder.go -- +package placeholder + +import "fmt" + +func PrintfTests() { + var i int + var x float64 + fmt.Printf("%b %d %f", 3, i, x) //@ token("%b", "string", "placeholder"), token("%d", "string", "placeholder"),token("%f", "string", "placeholder"), + fmt.Printf("lit1%blit2%dlit3%flit4", 3, i, x) //@ token("%b", "string", "placeholder"), token("%d", "string", "placeholder"),token("%f", "string", "placeholder"),token("lit1", "string", ""),token("lit2", "string", ""),token("lit3", "string", ""), + fmt.Printf("%% %d lit2", 3, i, x) //@ token("%d", "string", "placeholder"),token("%%", "string", ""),token("lit2", "string", ""), + fmt.Printf("Hello %% \n %s, you \t%% \n have %d new m%%essages!", "Alice", 5) //@ token("%s", "string", "placeholder"),token("%d", "string", "placeholder") + fmt.Printf("%d \nss \x25[2]d", 234, 123) //@ token("%d", "string", "placeholder"),token("\\x25[2]d", "string", "placeholder") + fmt.Printf("start%[2]*.[1]*[3]dmiddle%send", 4, 5, 6) //@ token("%[2]*.[1]*[3]d", "string", "placeholder"),token("start", "string", ""),token("%s", "string", "placeholder"),token("middle", "string", ""),token("end", "string", "") +} + diff --git a/internal/astutil/util.go b/internal/astutil/util.go index 3b3c6259568..849d45d8539 100644 --- a/internal/astutil/util.go +++ b/internal/astutil/util.go @@ -12,6 +12,20 @@ import ( "unicode/utf8" ) +// RangeInStringLiteral calculates the positional range within a string literal +// corresponding to the specified start and end byte offsets within the logical string. +func RangeInStringLiteral(lit *ast.BasicLit, start, end int) (token.Pos, token.Pos, error) { + startPos, err := PosInStringLiteral(lit, start) + if err != nil { + return 0, 0, fmt.Errorf("start: %v", err) + } + endPos, err := PosInStringLiteral(lit, end) + if err != nil { + return 0, 0, fmt.Errorf("end: %v", err) + } + return startPos, endPos, nil +} + // PosInStringLiteral returns the position within a string literal // corresponding to the specified byte offset within the logical // string that it denotes.