Skip to content

Commit

Permalink
semtok
Browse files Browse the repository at this point in the history
  • Loading branch information
xzbdmw committed Jan 18, 2025
1 parent 1261a24 commit 9db16d1
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 15 deletions.
15 changes: 14 additions & 1 deletion gopls/doc/release/v0.18.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ The Definition query now supports additional locations:
When invoked on a return statement, hover reports the types of
the function's result variables.

## Improvements to "DocumentHighlight"
## UX improvements to format strings

### "DocumentHighlight"

When your cursor is inside a printf-like function, gopls now highlights the relationship between
formatting verbs and arguments as visual cues to differentiate how operands are used in the format string.
Expand All @@ -109,3 +111,14 @@ fmt.Printf("Hello %s, you scored %d", name, score)

If the cursor is either on `%s` or `name`, gopls will highlight `%s` as a write operation,
and `name` as a read operation.

### "SemanticHighlight"

Similar to the improvements to DocumentHighlight, gopls also reports formatting verbs
as "macro" token type to better distinguish them with other parts of the format string.

```go
fmt.Printf("Hello %s, you scored %d", name, score)
```

`%s` and `%d` will have token type "macro".
9 changes: 5 additions & 4 deletions gopls/doc/semantictokens.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,12 @@ The maximal position isn't particularly well-specified either. To chose one exam
format string might have formatting codes (`%[4]-3.6f`), escape sequences (`\U00010604`), and regular
characters. Should these all be distinguished? One could even imagine distinguishing
different runes by their Unicode language assignment, or some other Unicode property, such as
being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt).
being [confusable](http://www.unicode.org/Public/security/10.0.0/confusables.txt). While gopls does not fully adhere to such distinctions,
it does recognizes formatting directives within strings, treating them as "macro" tokens,
providing more precise semantic highlighting in format strings.

Gopls does not come close to either of these principles. Semantic tokens are returned for
identifiers, keywords, operators, comments, and literals. (Semantic tokens do not
cover the file. They are not returned for
Semantic tokens are returned for identifiers, keywords, operators, comments, and literals.
(Semantic tokens do not cover the file. They are not returned for
white space or punctuation, and there is no semantic token for labels.)
The following describes more precisely what gopls
does, with a few notes on possible alternative choices.
Expand Down
4 changes: 2 additions & 2 deletions gopls/internal/golang/highlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ func highlightPrintf(call *ast.CallExpr, idx int, cursorPos token.Pos, lit *ast.
}

// cursorPos can't equal to end position, otherwise the two
// neighborhood such as (%[2]*d) are both highlighted if cursor in "*" (ending of [2]*).
// neighborhood such as (%[2]*d) are both highlighted if cursor in "d" (ending of [2]*).
if rangeStart <= cursorPos && cursorPos < rangeEnd ||
arg != nil && arg.Pos() <= cursorPos && cursorPos < arg.End() {
arg != nil && arg.Pos() <= cursorPos && cursorPos <= arg.End() {
highlightRange(result, rangeStart, rangeEnd, protocol.Write)
if arg != nil {
succeededArg = argIndex
Expand Down
79 changes: 71 additions & 8 deletions gopls/internal/golang/semtok.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"log"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"

Expand All @@ -28,7 +29,9 @@ import (
"golang.org/x/tools/gopls/internal/protocol/semtok"
"golang.org/x/tools/gopls/internal/util/bug"
"golang.org/x/tools/gopls/internal/util/safetoken"
"golang.org/x/tools/internal/astutil"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/fmtstr"
)

// semDebug enables comprehensive logging of decisions
Expand Down Expand Up @@ -323,16 +326,16 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) {
case *ast.AssignStmt:
tv.token(n.TokPos, len(n.Tok.String()), semtok.TokOperator)
case *ast.BasicLit:
if strings.Contains(n.Value, "\n") {
// has to be a string.
tv.multiline(n.Pos(), n.End(), semtok.TokString)
break
}
what := semtok.TokNumber
if n.Kind == token.STRING {
what = semtok.TokString
if strings.Contains(n.Value, "\n") {
// has to be a string.
tv.multiline(n.Pos(), n.End(), semtok.TokString)
} else if !tv.formatString(n) {
tv.token(n.Pos(), len(n.Value), semtok.TokString)
}
} else {
tv.token(n.Pos(), len(n.Value), semtok.TokNumber)
}
tv.token(n.Pos(), len(n.Value), what)
case *ast.BinaryExpr:
tv.token(n.OpPos, len(n.Op.String()), semtok.TokOperator)
case *ast.BlockStmt:
Expand Down Expand Up @@ -461,6 +464,66 @@ func (tv *tokenVisitor) inspect(n ast.Node) (descend bool) {
return true
}

// formatString tries to report directives and string literals
// inside a (possible) printf-like call, it returns false and does nothing
// if the string is not a formatString.
func (tv *tokenVisitor) formatString(lit *ast.BasicLit) bool {
if len(tv.stack) <= 1 {
return false
}
call, ok := tv.stack[len(tv.stack)-2].(*ast.CallExpr)
if !ok {
return false
}
lastNonVariadic, idx := formatStringAndIndex(tv.info, call)
if idx == -1 || lit != lastNonVariadic {
return false
}
format, err := strconv.Unquote(lit.Value)
if err != nil {
return false
}
if !strings.Contains(format, "%") {
return false
}
operations, err := fmtstr.Parse(format, idx)
if err != nil {
return false
}

// It's a formatString, compute interleaved sub range of directives and literals.
// litStart tracks literal substring index within the overall formatString.
litStart := lit.ValuePos
for _, op := range operations {
// Skip "%%".
if op.Verb.Verb == '%' {
continue
}
rangeStart, err := astutil.PosInStringLiteral(lit, op.Range.Start)
if err != nil {
return false
}
rangeEnd, err := astutil.PosInStringLiteral(lit, op.Range.End)
if err != nil {
return false
}

// Report formating directive.
tv.token(rangeStart, int(rangeEnd-rangeStart), semtok.TokMacro)

if litStart < rangeStart {
// Report literal substring.
tv.token(litStart, int(rangeStart-litStart), semtok.TokString)
}
litStart = rangeEnd
}
// Report remaining literal substring.
if litStart < lit.End() {
tv.token(litStart, int(lit.End()-litStart), semtok.TokString)
}
return true
}

func (tv *tokenVisitor) appendObjectModifiers(mods []semtok.Modifier, obj types.Object) (semtok.Type, []semtok.Modifier) {
if obj.Pkg() == nil {
mods = append(mods, semtok.ModDefaultLibrary)
Expand Down
26 changes: 26 additions & 0 deletions gopls/internal/test/marker/testdata/token/directive.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
This test checks semanticTokens for formatString directive.

-- settings.json --
{
"semanticTokens": true
}

-- flags --
-ignore_extra_diags

-- directive.go --
package directive

import "fmt"

func PrintfTests() {
var i int
var x float64
fmt.Printf("%b %d %f", 3, i, x) //@ token("%b", "macro", ""), token("%d", "macro", ""),token("%f", "macro", ""),
fmt.Printf("lit1%blit2%dlit3%flit4", 3, i, x) //@ token("%b", "macro", ""), token("%d", "macro", ""),token("%f", "macro", ""),token("lit1", "string", ""),token("lit2", "string", ""),token("lit3", "string", ""),
fmt.Printf("%% %d lit2", 3, i, x) //@ token("%d", "macro", ""),token("%%", "string", ""),token("lit2", "string", ""),
fmt.Printf("Hello %% \n %s, you \t%% \n have %d new m%%essages!", "Alice", 5) //@ token("%s", "macro", ""),token("%d", "macro", "")
fmt.Printf("%d \nss \x25[2]d", 234, 123) //@ token("%d", "macro", ""),token("\\x25[2]d", "macro", "")
fmt.Printf("start%[2]*.[1]*[3]dmiddle%send", 4, 5, 6) //@ token("%[2]*.[1]*[3]d", "macro", ""),token("start", "string", ""),token("%s", "macro", ""),token("middle", "string", ""),token("end", "string", "")
}

0 comments on commit 9db16d1

Please sign in to comment.