Skip to content

Commit

Permalink
semtok
Browse files Browse the repository at this point in the history
  • Loading branch information
perrinjerome committed Jun 23, 2024
1 parent 52277d1 commit 386541d
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 116 deletions.
151 changes: 66 additions & 85 deletions server/buildoutls/semantic_tokens.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from typing import Literal, TypeAlias
import logging
from io import StringIO
import pygments

from lsprotocol.types import SemanticTokens, Range, Position
import attrs
import pygments.lexers
import pygments.token

from .types import SEMANTIC_TOKENS
from .buildout import BuildoutProfile, BuildoutTemplate
from .buildout import BuildoutProfile
from .recipes import RecipeOptionKind


Expand Down Expand Up @@ -47,43 +45,50 @@ def token_type(self):
}.get(self.kind, 0)


def get_python_tokens(source_code: str) -> list[int]:
data = []
lexer = pygments.lexers.get_lexer_by_name("python")
for token in lexer.get_tokens_unprocessed(source_code):
pass
return data
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.18/specification/#textDocument_semanticTokens

SEMANTIC_TOKEN_TYPES = [
"comment",
"string",
"number",
"keyword",
"class",
"function",
"variable",
"type",
]

def get_token_type(token_pygment_type: pygments.token._TokenType) -> int:
from pygments.token import Keyword, Comment, String, Number, Token, Name
token_type_by_type = {t: SEMANTIC_TOKEN_TYPES.index(t) for t in SEMANTIC_TOKEN_TYPES}


def get_token_type(token_pygment_type: pygments.token._TokenType) -> int | None:
from pygments.token import Keyword, Comment, String, Number, Name

if token_pygment_type in Keyword:
return 15
if token_pygment_type in Comment:
return 17
return token_type_by_type["comment"]
if token_pygment_type in String:
return 18
return token_type_by_type["string"]
if token_pygment_type in Number:
return 19
return token_type_by_type["number"]
if token_pygment_type in Name.Class:
return 2
return token_type_by_type["class"]
if token_pygment_type in Name.Function:
return 12
return 0
return token_type_by_type["function"]
if token_pygment_type in Name.Builtin or token_pygment_type in Keyword.Constant:
return token_type_by_type["type"]
if token_pygment_type in Name:
return token_type_by_type["variable"]
if token_pygment_type in Keyword:
return token_type_by_type["keyword"]
return None


def get_semantic_tokens_and_semantic_errors(
parsed: BuildoutProfile | BuildoutTemplate | None,
parsed: BuildoutProfile,
) -> tuple[SemanticTokens, list]:
data: list[int] = []
errors: list = []

if not isinstance(parsed, BuildoutProfile):
return SemanticTokens(data=data), errors

prev_line, prev_character = 0, 0

for section_value in parsed.values():
if recipe := section_value.getRecipe():
for option_key, option_value in section_value.items():
Expand All @@ -92,22 +97,18 @@ def get_semantic_tokens_and_semantic_errors(
if option_definition.kind == RecipeOptionKind.PythonScript:
lexer = pygments.lexers.get_lexer_by_name("python")

source_code = (
option_value.value
) # XXX isn't this too much white space normalized ?

# XXX character_adjust is wrong, we want the indentation
character_adjust = option_value.location.range.start.character
line_adjust = option_value.location.range.start.line
character = character_adjust
line = line_adjust
prev_line, prev_character = line, character
source_code = option_value.value
delta_line = option_value.location.range.start.line + 1
character_adjust = delta_start = 4 # TODO indentation

for pos, token_pygment_type, token_text in lexer.get_tokens_unprocessed(
source_code
):
line = prev_line
character = prev_character
if token_text in ("\n", "\r", "\r\n"):
line += 1
character = character_adjust
length = len(token_text)

# A specific token i in the file consists of the following array indices:
#
# at index 5*i - deltaLine: token line number, relative to the previous token
Expand All @@ -119,56 +120,36 @@ def get_semantic_tokens_and_semantic_errors(
# SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
# at index 5*i+4 - tokenModifiers: each set bit will be looked up in
# SemanticTokensLegend.tokenModifiers
tok = [
line - prev_line,
character,
length,
get_token_type(token_pygment_type),
0,
]
logger.info("adding token %s", tok)
data.extend(tok)
prev_line = line
prev_character = character + length

try:
raise SyntaxError("skip")
source_code = option_value.value
for token in get_python_tokens(source_code):
line, character = token.start.line, token.start.character

if line == prev_line:
pos = character - prev_character
else:
pos = character

length = len(token.text)

# A specific token i in the file consists of the following array indices:
#
# at index 5*i - deltaLine: token line number, relative to the previous token
# at index 5*i+1 - deltaStart: token start character, relative to the previous token (relative to 0 or the previous token’s start if they are on the same line)
# at index 5*i+2 - length: the length of the token.
# at index 5*i+3 - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
# at index 5*i+4 - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
data.extend(
[
line - prev_line,
pos,
length,
token.token_type,
token_type = get_token_type(token_pygment_type)
if token_type is not None:
# explode token spawning on multiple lines into multiple tokens
for token_text_line in token_text.splitlines():
tok = [
delta_line,
delta_start,
len(token_text_line),
token_type,
0,
]
)
prev_line = line
prev_character = character + length

# module = ast.parse(
# source=option_value.value,
# filename="<option>",
# )
except SyntaxError as e:
pass
if 1:
logger.info(
f"🌞 token {delta_line=}, {delta_start=} {tok=} {token_text=} {token_text_line=} {token_pygment_type=}"
)
data.extend(tok)
delta_line = 1
delta_start = character_adjust
delta_line = 0
delta_start = len(token_text)
else:
if line_count := (token_text.replace("\r\n", "\n").count("\n")):
delta_line += line_count
delta_start = character_adjust
else:
delta_start += len(token_text)
if 0:
logger.info(
f"skipped token {line_count=} {delta_line=}, {delta_start=} {token_text=} {token_pygment_type=}"
)

# prev_line, prev_character = params.prev_line, params.prev_character
# this_range = Range(
Expand Down
6 changes: 4 additions & 2 deletions server/buildoutls/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,12 +771,14 @@ async def lsp_document_link(

@server.feature(
TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL,
SemanticTokensLegend(token_types=types.SEMANTIC_TOKENS, token_modifiers=[]),
SemanticTokensLegend(
token_types=semantic_tokens.SEMANTIC_TOKEN_TYPES, token_modifiers=[]
),
)
async def lsp_semantic_tokens_full(
ls: LanguageServer, params: SemanticTokensParams
) -> SemanticTokens:
parsed = await buildout.open(ls, params.text_document.uri)
parsed = await buildout.parse(ls, params.text_document.uri)

tokens, errors = semantic_tokens.get_semantic_tokens_and_semantic_errors(
parsed,
Expand Down
1 change: 0 additions & 1 deletion server/buildoutls/tests/test_semantic_tokens.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from lsprotocol.types import (
TextDocumentIdentifier,
SemanticTokensParams,
SemanticTokensRangeParams,
)
from pygls.server import LanguageServer

Expand Down
28 changes: 0 additions & 28 deletions server/buildoutls/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,3 @@ class OpenPypiPageCommandParams(TypedDict):
class UpdateMD5SumCommandParams(TypedDict):
document_uri: str
section_name: str


# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.18/specification/#textDocument_semanticTokens
SEMANTIC_TOKENS: List[str] = [
"namespace",
"type",
"class",
"enum",
"interface",
"struct", # 5
"typeParameter",
"parameter",
"variable",
"property",
"enumMember", # 10
"event",
"function",
"method",
"macro",
"keyword", # 15
"modifier",
"comment",
"string",
"number",
"regexp",
"operator",
"decorator",
]
1 change: 1 addition & 0 deletions server/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"pytest-cov",
"pytest",
"types-cachetools",
"types-pygments",
"types-setuptools",
"types-toml",
"ruff",
Expand Down

0 comments on commit 386541d

Please sign in to comment.