Skip to content

Commit

Permalink
semtok
Browse files Browse the repository at this point in the history
  • Loading branch information
xzbdmw committed Jan 21, 2025
1 parent 1261a24 commit 0676cae
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 32 deletions.
15 changes: 14 additions & 1 deletion gopls/doc/release/v0.18.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ The Definition query now supports additional locations:
When invoked on a return statement, hover reports the types of
the function's result variables.

## Improvements to "DocumentHighlight"
## UX improvements to format strings

### "DocumentHighlight"

When your cursor is inside a printf-like function, gopls now highlights the relationship between
formatting verbs and arguments as visual cues to differentiate how operands are used in the format string.
Expand All @@ -109,3 +111,14 @@ fmt.Printf("Hello %s, you scored %d", name, score)

If the cursor is either on `%s` or `name`, gopls will highlight `%s` as a write operation,
and `name` as a read operation.

### "SemanticHighlight"

Similar to the improvements to DocumentHighlight, gopls also reports formatting verbs
as "placeholder" modifier for token type "string" to better distinguish them with other parts of the format string.

```go
fmt.Printf("Hello %s, you scored %d", name, score)
```

`%s` and `%d` will have token type "string" and modifier "placeholder".
11 changes: 6 additions & 5 deletions gopls/doc/semantictokens.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,15 @@ and change over time. (Nonetheless, a minimal implementation would not return `k
`number`, `comment`, or `string`.)

The maximal position isn't particularly well-specified either. To chose one example, a
format string might have formatting codes (`%[4]-3.6f`), escape sequences (`\U00010604`), and regular
format string might have formatting codes (`%-[4].6f`), escape sequences (`\U00010604`), and regular
characters. Should these all be distinguished? One could even imagine distinguishing
different runes by their Unicode language assignment, or some other Unicode property, such as
being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt).
being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt). While gopls does not fully adhere to such distinctions,
it does recognizes formatting directives within strings, treating them as "macro" tokens,
providing more precise semantic highlighting in format strings.

Gopls does not come close to either of these principles. Semantic tokens are returned for
identifiers, keywords, operators, comments, and literals. (Semantic tokens do not
cover the file. They are not returned for
Semantic tokens are returned for identifiers, keywords, operators, comments, and literals.
(Semantic tokens do not cover the file. They are not returned for
white space or punctuation, and there is no semantic token for labels.)
The following describes more precisely what gopls
does, with a few notes on possible alternative choices.
Expand Down
11 changes: 4 additions & 7 deletions gopls/internal/golang/highlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"golang.org/x/tools/gopls/internal/cache"
"golang.org/x/tools/gopls/internal/file"
"golang.org/x/tools/gopls/internal/protocol"
goplsastutil "golang.org/x/tools/gopls/internal/util/astutil"
internalastutil "golang.org/x/tools/internal/astutil"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/fmtstr"
Expand Down Expand Up @@ -210,11 +211,7 @@ func highlightPrintf(call *ast.CallExpr, idx int, cursorPos token.Pos, lit *ast.

// highlightPair highlights the operation and its potential argument pair if the cursor is within either range.
highlightPair := func(rang fmtstr.Range, argIndex int) {
rangeStart, err := internalastutil.PosInStringLiteral(lit, rang.Start)
if err != nil {
return
}
rangeEnd, err := internalastutil.PosInStringLiteral(lit, rang.End)
rangeStart, rangeEnd, err := internalastutil.RangeInStringLiteral(lit, rang.Start, rang.End)
if err != nil {
return
}
Expand All @@ -226,9 +223,9 @@ func highlightPrintf(call *ast.CallExpr, idx int, cursorPos token.Pos, lit *ast.
}

// cursorPos can't equal to end position, otherwise the two
// neighborhood such as (%[2]*d) are both highlighted if cursor in "*" (ending of [2]*).
// neighborhood such as (%[2]*d) are both highlighted if cursor in "d" (ending of [2]*).
if rangeStart <= cursorPos && cursorPos < rangeEnd ||
arg != nil && arg.Pos() <= cursorPos && cursorPos < arg.End() {
arg != nil && goplsastutil.NodeContains(arg, cursorPos) {
highlightRange(result, rangeStart, rangeEnd, protocol.Write)
if arg != nil {
succeededArg = argIndex
Expand Down
70 changes: 62 additions & 8 deletions gopls/internal/golang/semtok.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"log"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"

Expand All @@ -28,7 +29,9 @@ import (
"golang.org/x/tools/gopls/internal/protocol/semtok"
"golang.org/x/tools/gopls/internal/util/bug"
"golang.org/x/tools/gopls/internal/util/safetoken"
"golang.org/x/tools/internal/astutil"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/fmtstr"
)

// semDebug enables comprehensive logging of decisions
Expand Down Expand Up @@ -323,16 +326,17 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) {
case *ast.AssignStmt:
tv.token(n.TokPos, len(n.Tok.String()), semtok.TokOperator)
case *ast.BasicLit:
if strings.Contains(n.Value, "\n") {
// has to be a string.
tv.multiline(n.Pos(), n.End(), semtok.TokString)
break
}
what := semtok.TokNumber
if n.Kind == token.STRING {
what = semtok.TokString
if strings.Contains(n.Value, "\n") {
// has to be a string.
tv.multiline(n.Pos(), n.End(), semtok.TokString)
} else if !tv.formatString(n) {
// not a format string, color the whole as a TokString.
tv.token(n.Pos(), len(n.Value), semtok.TokString)
}
} else {
tv.token(n.Pos(), len(n.Value), semtok.TokNumber)
}
tv.token(n.Pos(), len(n.Value), what)
case *ast.BinaryExpr:
tv.token(n.OpPos, len(n.Op.String()), semtok.TokOperator)
case *ast.BlockStmt:
Expand Down Expand Up @@ -461,6 +465,56 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) {
return true
}

// formatString tries to report directives and string literals
// inside a (possible) printf-like call, it returns false and does nothing
// if the string is not a format string.
func (tv *tokenVisitor) formatString(lit *ast.BasicLit) bool {
if len(tv.stack) <= 1 {
return false
}
call, ok := tv.stack[len(tv.stack)-2].(*ast.CallExpr)
if !ok {
return false
}
lastNonVariadic, idx := formatStringAndIndex(tv.info, call)
if idx == -1 || lit != lastNonVariadic {
return false
}
format, err := strconv.Unquote(lit.Value)
if err != nil {
return false
}
if !strings.Contains(format, "%") {
return false
}
operations, err := fmtstr.Parse(format, idx)
if err != nil {
return false
}

// It's a format string, compute interleaved sub range of directives and literals.
// litPos tracks literal substring position within the overall BasicLit.
litPos := lit.ValuePos
for _, op := range operations {
// Skip "%%".
if op.Verb.Verb == '%' {
continue
}
rangeStart, rangeEnd, err := astutil.RangeInStringLiteral(lit, op.Range.Start, op.Range.End)
if err != nil {
return false
}
// Report formatting directive.
tv.token(rangeStart, int(rangeEnd-rangeStart), semtok.TokString, semtok.ModPlaceHolder)
// Report literal substring.
tv.token(litPos, int(rangeStart-litPos), semtok.TokString)
litPos = rangeEnd
}
// Report remaining literal substring.
tv.token(litPos, int(lit.End()-litPos), semtok.TokString)
return true
}

func (tv *tokenVisitor) appendObjectModifiers(mods []semtok.Modifier, obj types.Object) (semtok.Type, []semtok.Modifier) {
if obj.Pkg() == nil {
mods = append(mods, semtok.ModDefaultLibrary)
Expand Down
24 changes: 13 additions & 11 deletions gopls/internal/protocol/semtok/semtok.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,18 @@ const (
// Since the type of a symbol is orthogonal to its kind,
// (e.g. a variable can have function type),
// we use modifiers for the top-level type constructor.
ModArray Modifier = "array"
ModBool Modifier = "bool"
ModChan Modifier = "chan"
ModInterface Modifier = "interface"
ModMap Modifier = "map"
ModNumber Modifier = "number"
ModPointer Modifier = "pointer"
ModSignature Modifier = "signature" // for function types
ModSlice Modifier = "slice"
ModString Modifier = "string"
ModStruct Modifier = "struct"
ModArray Modifier = "array"
ModBool Modifier = "bool"
ModChan Modifier = "chan"
ModInterface Modifier = "interface"
ModMap Modifier = "map"
ModNumber Modifier = "number"
ModPointer Modifier = "pointer"
ModSignature Modifier = "signature" // for function types
ModSlice Modifier = "slice"
ModString Modifier = "string"
ModStruct Modifier = "struct"
ModPlaceHolder Modifier = "placeholder" // for format string placeholders such as "%s"
)

// TokenModifiers is a slice of modifiers gopls will return as its server
Expand All @@ -131,6 +132,7 @@ var TokenModifiers = []Modifier{
ModSlice,
ModString,
ModStruct,
ModPlaceHolder,
}

// Encode returns the LSP encoding of a sequence of tokens.
Expand Down
26 changes: 26 additions & 0 deletions gopls/internal/test/marker/testdata/token/placeholder.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
This test checks semanticTokens for format string placeholders.

-- settings.json --
{
"semanticTokens": true
}

-- flags --
-ignore_extra_diags

-- placeholder.go --
package placeholder

import "fmt"

func PrintfTests() {
var i int
var x float64
fmt.Printf("%b %d %f", 3, i, x) //@ token("%b", "string", "placeholder"), token("%d", "string", "placeholder"),token("%f", "string", "placeholder"),
fmt.Printf("lit1%blit2%dlit3%flit4", 3, i, x) //@ token("%b", "string", "placeholder"), token("%d", "string", "placeholder"),token("%f", "string", "placeholder"),token("lit1", "string", ""),token("lit2", "string", ""),token("lit3", "string", ""),
fmt.Printf("%% %d lit2", 3, i, x) //@ token("%d", "string", "placeholder"),token("%%", "string", ""),token("lit2", "string", ""),
fmt.Printf("Hello %% \n %s, you \t%% \n have %d new m%%essages!", "Alice", 5) //@ token("%s", "string", "placeholder"),token("%d", "string", "placeholder")
fmt.Printf("%d \nss \x25[2]d", 234, 123) //@ token("%d", "string", "placeholder"),token("\\x25[2]d", "string", "placeholder")
fmt.Printf("start%[2]*.[1]*[3]dmiddle%send", 4, 5, 6) //@ token("%[2]*.[1]*[3]d", "string", "placeholder"),token("start", "string", ""),token("%s", "string", "placeholder"),token("middle", "string", ""),token("end", "string", "")
}

14 changes: 14 additions & 0 deletions internal/astutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@ import (
"unicode/utf8"
)

// RangeInStringLiteral calculates the positional range within a string literal
// corresponding to the specified start and end byte offsets within the logical string.
func RangeInStringLiteral(lit *ast.BasicLit, start, end int) (token.Pos, token.Pos, error) {
startPos, err := PosInStringLiteral(lit, start)
if err != nil {
return 0, 0, fmt.Errorf("start: %v", err)
}
endPos, err := PosInStringLiteral(lit, end)
if err != nil {
return 0, 0, fmt.Errorf("end: %v", err)
}
return startPos, endPos, nil
}

// PosInStringLiteral returns the position within a string literal
// corresponding to the specified byte offset within the logical
// string that it denotes.
Expand Down

0 comments on commit 0676cae

Please sign in to comment.