Skip to content

Commit

Permalink
Add support for always tokenizing async/await as keywords
Browse files Browse the repository at this point in the history
Fixes psf#593

I looked into this bug with @ambv and @carljm, and we reached the
conclusion was that it's not possible for the tokenizer to determine if
async/await is a keyword inside all possible generators without breaking
the grammar for older versions of Python.

Instead, we introduce a new tokenizer mode for Python 3.7+ that will
cause all async/await instances to get parsed as a reserved keyword,
which should fix async/await inside generators.
  • Loading branch information
bgw authored and ambv committed May 9, 2019
1 parent 8c8aded commit f8617f9
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 18 deletions.
82 changes: 70 additions & 12 deletions black.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from blib2to3.pgen2 import driver, token
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.parse import ParseError
from blib2to3.pgen2.tokenize import TokenizerConfig


__version__ = "19.3b0"
Expand Down Expand Up @@ -136,33 +137,44 @@ class Feature(Enum):
NUMERIC_UNDERSCORES = 3
TRAILING_COMMA_IN_CALL = 4
TRAILING_COMMA_IN_DEF = 5
# The following two feature-flags are mutually exclusive, and exactly one should be
# set for every version of python.
ASYNC_IS_VALID_IDENTIFIER = 6
ASYNC_IS_RESERVED_KEYWORD = 7


VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
TargetVersion.PY27: set(),
TargetVersion.PY33: {Feature.UNICODE_LITERALS},
TargetVersion.PY34: {Feature.UNICODE_LITERALS},
TargetVersion.PY35: {Feature.UNICODE_LITERALS, Feature.TRAILING_COMMA_IN_CALL},
TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER},
TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
TargetVersion.PY35: {
Feature.UNICODE_LITERALS,
Feature.TRAILING_COMMA_IN_CALL,
Feature.ASYNC_IS_VALID_IDENTIFIER,
},
TargetVersion.PY36: {
Feature.UNICODE_LITERALS,
Feature.F_STRINGS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.ASYNC_IS_VALID_IDENTIFIER,
},
TargetVersion.PY37: {
Feature.UNICODE_LITERALS,
Feature.F_STRINGS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.ASYNC_IS_RESERVED_KEYWORD,
},
TargetVersion.PY38: {
Feature.UNICODE_LITERALS,
Feature.F_STRINGS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.ASYNC_IS_RESERVED_KEYWORD,
},
}

Expand Down Expand Up @@ -748,29 +760,75 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
return tiow.read(), encoding, newline


def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
@dataclass(frozen=True)
class ParserConfig:
grammar: Grammar
tokenizer_config: TokenizerConfig = TokenizerConfig()


def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]:
if not target_versions:
# No target_version specified, so try all grammars.
return [
pygram.python_grammar_no_print_statement_no_exec_statement,
pygram.python_grammar_no_print_statement,
pygram.python_grammar,
# Python 3.7+
ParserConfig(
pygram.python_grammar_no_print_statement_no_exec_statement,
TokenizerConfig(async_is_reserved_keyword=True),
),
# Python 3.0-3.6
ParserConfig(
pygram.python_grammar_no_print_statement_no_exec_statement,
TokenizerConfig(async_is_reserved_keyword=False),
),
# Python 2.7 with future print_function import
ParserConfig(pygram.python_grammar_no_print_statement),
# Python 2.7
ParserConfig(pygram.python_grammar),
]
elif all(version.is_python2() for version in target_versions):
# Python 2-only code, so try Python 2 grammars.
return [pygram.python_grammar_no_print_statement, pygram.python_grammar]
return [
# Python 2.7 with future print_function import
ParserConfig(pygram.python_grammar_no_print_statement),
# Python 2.7
ParserConfig(pygram.python_grammar),
]
else:
# Python 3-compatible code, so only try Python 3 grammar.
return [pygram.python_grammar_no_print_statement_no_exec_statement]
configs = []
# If we have to parse both, try to parse async as a keyword first
if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER):
# Python 3.7+
configs.append(
ParserConfig(
pygram.python_grammar_no_print_statement_no_exec_statement,
TokenizerConfig(async_is_reserved_keyword=True),
)
)
if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD):
# Python 3.0-3.6
configs.append(
ParserConfig(
pygram.python_grammar_no_print_statement_no_exec_statement,
TokenizerConfig(async_is_reserved_keyword=False),
)
)
# At least one of the above branches must have been taken, because every Python
# version has exactly one of the two 'ASYNC_IS_*' flags
return configs


def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
"""Given a string with source, return the lib2to3 Node."""
if src_txt[-1:] != "\n":
src_txt += "\n"

for grammar in get_grammars(set(target_versions)):
drv = driver.Driver(grammar, pytree.convert)
for parser_config in get_parser_configs(set(target_versions)):
drv = driver.Driver(
parser_config.grammar,
pytree.convert,
tokenizer_config=parser_config.tokenizer_config,
)
try:
result = drv.parse_string(src_txt, True)
break
Expand Down
16 changes: 13 additions & 3 deletions blib2to3/pgen2/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,19 @@

class Driver(object):

def __init__(self, grammar, convert=None, logger=None):
def __init__(
self,
grammar,
convert=None,
logger=None,
tokenizer_config=tokenize.TokenizerConfig(),
):
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
self.logger = logger
self.convert = convert
self.tokenizer_config = tokenizer_config

def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
Expand Down Expand Up @@ -97,7 +104,7 @@ def parse_tokens(self, tokens, debug=False):

def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline)
tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
return self.parse_tokens(tokens, debug)

def parse_stream(self, stream, debug=False):
Expand All @@ -111,7 +118,10 @@ def parse_file(self, filename, encoding=None, debug=False):

def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(io.StringIO(text).readline)
tokens = tokenize.generate_tokens(
io.StringIO(text).readline,
config=self.tokenizer_config,
)
return self.parse_tokens(tokens, debug)

def _partially_consume_prefix(self, prefix, column):
Expand Down
9 changes: 8 additions & 1 deletion blib2to3/pgen2/driver.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,20 @@ from logging import Logger
from blib2to3.pytree import _Convert, _NL
from blib2to3.pgen2 import _Path
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.tokenize import TokenizerConfig


class Driver:
grammar: Grammar
logger: Logger
convert: _Convert
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
def __init__(
self,
grammar: Grammar,
convert: Optional[_Convert] = ...,
logger: Optional[Logger] = ...,
tokenizer_config: TokenizerConfig = ...
) -> None: ...
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
Expand Down
12 changes: 10 additions & 2 deletions blib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import re
from codecs import BOM_UTF8, lookup
from attr import dataclass
from blib2to3.pgen2.token import *

from . import token
Expand Down Expand Up @@ -137,6 +138,10 @@ def _combinations(*l):

tabsize = 8

@dataclass(frozen=True)
class TokenizerConfig:
async_is_reserved_keyword: bool = False

class TokenError(Exception): pass

class StopTokenizing(Exception): pass
Expand Down Expand Up @@ -334,7 +339,7 @@ def untokenize(iterable):
ut = Untokenizer()
return ut.untokenize(iterable)

def generate_tokens(readline):
def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
Expand All @@ -356,6 +361,9 @@ def generate_tokens(readline):
contline = None
indents = [0]

# If we know we're parsing 3.7+, we can unconditionally parse `async` and
# `await` as keywords.
async_is_reserved_keyword = config.async_is_reserved_keyword
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
async_def = False
Expand Down Expand Up @@ -506,7 +514,7 @@ def generate_tokens(readline):
yield (STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
if async_def:
if async_is_reserved_keyword or async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
Expand Down
4 changes: 4 additions & 0 deletions blib2to3/pgen2/tokenize.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
# NOTE: Only elements from __all__ are present.

from typing import Callable, Iterable, Iterator, List, Text, Tuple
from attr import dataclass
from blib2to3.pgen2.token import * # noqa


_Coord = Tuple[int, int]
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]

@dataclass(frozen=True)
class TokenizerConfig:
async_is_reserved_keyword: bool = False

class TokenError(Exception): ...
class StopTokenizing(Exception): ...
Expand Down
49 changes: 49 additions & 0 deletions tests/data/async_as_identifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
def async():
pass


def await():
pass


await = lambda: None
async = lambda: None
async()
await()


def sync_fn():
await = lambda: None
async = lambda: None
async()
await()


async def async_fn():
await async_fn()


# output
def async():
pass


def await():
pass


await = lambda: None
async = lambda: None
async()
await()


def sync_fn():
await = lambda: None
async = lambda: None
async()
await()


async def async_fn():
await async_fn()
16 changes: 16 additions & 0 deletions tests/data/python37.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ async def func():
self.async_inc, arange(8), batch_size=3
)
]

def awaited_generator_value(n):
return (await awaitable for awaitable in awaitable_list)

def make_arange(n):
return (i * 2 for i in range(n) if await wrap(i))


# output


Expand All @@ -39,3 +47,11 @@ async def func():
self.async_inc, arange(8), batch_size=3
)
]


def awaited_generator_value(n):
return (await awaitable for awaitable in awaitable_list)


def make_arange(n):
return (i * 2 for i in range(n) if await wrap(i))
20 changes: 20 additions & 0 deletions tests/test_black.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,15 +502,35 @@ def test_stub(self) -> None:
self.assertFormatEqual(expected, actual)
black.assert_stable(source, actual, mode)

@patch("black.dump_to_file", dump_to_stderr)
def test_async_as_identifier(self) -> None:
source_path = (THIS_DIR / "data" / "async_as_identifier.py").resolve()
source, expected = read_data("async_as_identifier")
actual = fs(source)
self.assertFormatEqual(expected, actual)
major, minor = sys.version_info[:2]
if major < 3 or (major <= 3 and minor < 7):
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
# ensure black can parse this when the target is 3.6
self.invokeBlack([str(source_path), "--target-version", "py36"])
# but not on 3.7, because async/await is no longer an identifier
self.invokeBlack([str(source_path), "--target-version", "py37"], exit_code=123)

@patch("black.dump_to_file", dump_to_stderr)
def test_python37(self) -> None:
source_path = (THIS_DIR / "data" / "python37.py").resolve()
source, expected = read_data("python37")
actual = fs(source)
self.assertFormatEqual(expected, actual)
major, minor = sys.version_info[:2]
if major > 3 or (major == 3 and minor >= 7):
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
# ensure black can parse this when the target is 3.7
self.invokeBlack([str(source_path), "--target-version", "py37"])
# but not on 3.6, because we use async as a reserved keyword
self.invokeBlack([str(source_path), "--target-version", "py36"], exit_code=123)

@patch("black.dump_to_file", dump_to_stderr)
def test_fmtonoff(self) -> None:
Expand Down

0 comments on commit f8617f9

Please sign in to comment.