Move tokenizer config onto grammar, rename flag
Based on the feedback in https://github.com/python/black/pull/845#issuecomment-490622711 - Remove TokenizerConfig, and add a field to Grammar instead. - Pass the Grammar to the tokenizer. - Rename `ASYNC_IS_RESERVED_KEYWORD` to `ASYNC_KEYWORDS` and `ASYNC_IS_VALID_IDENTIFIER` to `ASYNC_IDENTIFIERS`.
This commit is contained in:
parent
f8617f975d
commit
448885b256
75
black.py
75
black.py
@ -48,7 +48,6 @@
|
||||
from blib2to3.pgen2 import driver, token
|
||||
from blib2to3.pgen2.grammar import Grammar
|
||||
from blib2to3.pgen2.parse import ParseError
|
||||
from blib2to3.pgen2.tokenize import TokenizerConfig
|
||||
|
||||
|
||||
__version__ = "19.3b0"
|
||||
@ -139,18 +138,18 @@ class Feature(Enum):
|
||||
TRAILING_COMMA_IN_DEF = 5
|
||||
# The following two feature-flags are mutually exclusive, and exactly one should be
|
||||
# set for every version of python.
|
||||
ASYNC_IS_VALID_IDENTIFIER = 6
|
||||
ASYNC_IS_RESERVED_KEYWORD = 7
|
||||
ASYNC_IDENTIFIERS = 6
|
||||
ASYNC_KEYWORDS = 7
|
||||
|
||||
|
||||
VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
|
||||
TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER},
|
||||
TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
|
||||
TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
|
||||
TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
|
||||
TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
|
||||
TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
|
||||
TargetVersion.PY35: {
|
||||
Feature.UNICODE_LITERALS,
|
||||
Feature.TRAILING_COMMA_IN_CALL,
|
||||
Feature.ASYNC_IS_VALID_IDENTIFIER,
|
||||
Feature.ASYNC_IDENTIFIERS,
|
||||
},
|
||||
TargetVersion.PY36: {
|
||||
Feature.UNICODE_LITERALS,
|
||||
@ -158,7 +157,7 @@ class Feature(Enum):
|
||||
Feature.NUMERIC_UNDERSCORES,
|
||||
Feature.TRAILING_COMMA_IN_CALL,
|
||||
Feature.TRAILING_COMMA_IN_DEF,
|
||||
Feature.ASYNC_IS_VALID_IDENTIFIER,
|
||||
Feature.ASYNC_IDENTIFIERS,
|
||||
},
|
||||
TargetVersion.PY37: {
|
||||
Feature.UNICODE_LITERALS,
|
||||
@ -166,7 +165,7 @@ class Feature(Enum):
|
||||
Feature.NUMERIC_UNDERSCORES,
|
||||
Feature.TRAILING_COMMA_IN_CALL,
|
||||
Feature.TRAILING_COMMA_IN_DEF,
|
||||
Feature.ASYNC_IS_RESERVED_KEYWORD,
|
||||
Feature.ASYNC_KEYWORDS,
|
||||
},
|
||||
TargetVersion.PY38: {
|
||||
Feature.UNICODE_LITERALS,
|
||||
@ -174,7 +173,7 @@ class Feature(Enum):
|
||||
Feature.NUMERIC_UNDERSCORES,
|
||||
Feature.TRAILING_COMMA_IN_CALL,
|
||||
Feature.TRAILING_COMMA_IN_DEF,
|
||||
Feature.ASYNC_IS_RESERVED_KEYWORD,
|
||||
Feature.ASYNC_KEYWORDS,
|
||||
},
|
||||
}
|
||||
|
||||
@ -760,62 +759,42 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
|
||||
return tiow.read(), encoding, newline
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParserConfig:
|
||||
grammar: Grammar
|
||||
tokenizer_config: TokenizerConfig = TokenizerConfig()
|
||||
|
||||
|
||||
def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]:
|
||||
def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
|
||||
if not target_versions:
|
||||
# No target_version specified, so try all grammars.
|
||||
return [
|
||||
# Python 3.7+
|
||||
ParserConfig(
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement,
|
||||
TokenizerConfig(async_is_reserved_keyword=True),
|
||||
),
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
|
||||
# Python 3.0-3.6
|
||||
ParserConfig(
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement,
|
||||
TokenizerConfig(async_is_reserved_keyword=False),
|
||||
),
|
||||
# Python 2.7 with future print_function import
|
||||
ParserConfig(pygram.python_grammar_no_print_statement),
|
||||
pygram.python_grammar_no_print_statement,
|
||||
# Python 2.7
|
||||
ParserConfig(pygram.python_grammar),
|
||||
pygram.python_grammar,
|
||||
]
|
||||
elif all(version.is_python2() for version in target_versions):
|
||||
# Python 2-only code, so try Python 2 grammars.
|
||||
return [
|
||||
# Python 2.7 with future print_function import
|
||||
ParserConfig(pygram.python_grammar_no_print_statement),
|
||||
pygram.python_grammar_no_print_statement,
|
||||
# Python 2.7
|
||||
ParserConfig(pygram.python_grammar),
|
||||
pygram.python_grammar,
|
||||
]
|
||||
else:
|
||||
# Python 3-compatible code, so only try Python 3 grammar.
|
||||
configs = []
|
||||
grammars = []
|
||||
# If we have to parse both, try to parse async as a keyword first
|
||||
if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER):
|
||||
if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
|
||||
# Python 3.7+
|
||||
configs.append(
|
||||
ParserConfig(
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement,
|
||||
TokenizerConfig(async_is_reserved_keyword=True),
|
||||
grammars.append(
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950
|
||||
)
|
||||
)
|
||||
if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD):
|
||||
if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
|
||||
# Python 3.0-3.6
|
||||
configs.append(
|
||||
ParserConfig(
|
||||
pygram.python_grammar_no_print_statement_no_exec_statement,
|
||||
TokenizerConfig(async_is_reserved_keyword=False),
|
||||
)
|
||||
)
|
||||
grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
|
||||
# At least one of the above branches must have been taken, because every Python
|
||||
# version has exactly one of the two 'ASYNC_IS_*' flags
|
||||
return configs
|
||||
# version has exactly one of the two 'ASYNC_*' flags
|
||||
return grammars
|
||||
|
||||
|
||||
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
|
||||
@ -823,12 +802,8 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
|
||||
if src_txt[-1:] != "\n":
|
||||
src_txt += "\n"
|
||||
|
||||
for parser_config in get_parser_configs(set(target_versions)):
|
||||
drv = driver.Driver(
|
||||
parser_config.grammar,
|
||||
pytree.convert,
|
||||
tokenizer_config=parser_config.tokenizer_config,
|
||||
)
|
||||
for grammar in get_grammars(set(target_versions)):
|
||||
drv = driver.Driver(grammar, pytree.convert)
|
||||
try:
|
||||
result = drv.parse_string(src_txt, True)
|
||||
break
|
||||
|
@ -34,14 +34,12 @@ def __init__(
|
||||
grammar,
|
||||
convert=None,
|
||||
logger=None,
|
||||
tokenizer_config=tokenize.TokenizerConfig(),
|
||||
):
|
||||
self.grammar = grammar
|
||||
if logger is None:
|
||||
logger = logging.getLogger(__name__)
|
||||
self.logger = logger
|
||||
self.convert = convert
|
||||
self.tokenizer_config = tokenizer_config
|
||||
|
||||
def parse_tokens(self, tokens, debug=False):
|
||||
"""Parse a series of tokens and return the syntax tree."""
|
||||
@ -104,7 +102,7 @@ def parse_tokens(self, tokens, debug=False):
|
||||
|
||||
def parse_stream_raw(self, stream, debug=False):
|
||||
"""Parse a stream and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
|
||||
tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
def parse_stream(self, stream, debug=False):
|
||||
@ -120,7 +118,7 @@ def parse_string(self, text, debug=False):
|
||||
"""Parse a string and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(
|
||||
io.StringIO(text).readline,
|
||||
config=self.tokenizer_config,
|
||||
grammar=self.grammar
|
||||
)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
|
@ -8,20 +8,13 @@ from logging import Logger
|
||||
from blib2to3.pytree import _Convert, _NL
|
||||
from blib2to3.pgen2 import _Path
|
||||
from blib2to3.pgen2.grammar import Grammar
|
||||
from blib2to3.pgen2.tokenize import TokenizerConfig
|
||||
|
||||
|
||||
class Driver:
|
||||
grammar: Grammar
|
||||
logger: Logger
|
||||
convert: _Convert
|
||||
def __init__(
|
||||
self,
|
||||
grammar: Grammar,
|
||||
convert: Optional[_Convert] = ...,
|
||||
logger: Optional[Logger] = ...,
|
||||
tokenizer_config: TokenizerConfig = ...
|
||||
) -> None: ...
|
||||
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
|
||||
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
|
||||
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
|
||||
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
|
||||
|
@ -85,6 +85,8 @@ def __init__(self):
|
||||
self.tokens = {}
|
||||
self.symbol2label = {}
|
||||
self.start = 256
|
||||
# Python 3.7+ parses async as a keyword, not an identifier
|
||||
self.async_keywords = False
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
@ -113,6 +115,7 @@ def copy(self):
|
||||
new.labels = self.labels[:]
|
||||
new.states = self.states[:]
|
||||
new.start = self.start
|
||||
new.async_keywords = self.async_keywords
|
||||
return new
|
||||
|
||||
def report(self):
|
||||
|
@ -19,6 +19,7 @@ class Grammar:
|
||||
tokens: Dict[int, int]
|
||||
symbol2label: Dict[Text, int]
|
||||
start: int
|
||||
async_keywords: bool
|
||||
def __init__(self) -> None: ...
|
||||
def dump(self, filename: _Path) -> None: ...
|
||||
def load(self, filename: _Path) -> None: ...
|
||||
|
@ -31,7 +31,6 @@
|
||||
|
||||
import re
|
||||
from codecs import BOM_UTF8, lookup
|
||||
from attr import dataclass
|
||||
from blib2to3.pgen2.token import *
|
||||
|
||||
from . import token
|
||||
@ -138,10 +137,6 @@ def _combinations(*l):
|
||||
|
||||
tabsize = 8
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TokenizerConfig:
|
||||
async_is_reserved_keyword: bool = False
|
||||
|
||||
class TokenError(Exception): pass
|
||||
|
||||
class StopTokenizing(Exception): pass
|
||||
@ -339,7 +334,7 @@ def untokenize(iterable):
|
||||
ut = Untokenizer()
|
||||
return ut.untokenize(iterable)
|
||||
|
||||
def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
|
||||
def generate_tokens(readline, grammar=None):
|
||||
"""
|
||||
The generate_tokens() generator requires one argument, readline, which
|
||||
must be a callable object which provides the same interface as the
|
||||
@ -363,7 +358,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
|
||||
|
||||
# If we know we're parsing 3.7+, we can unconditionally parse `async` and
|
||||
# `await` as keywords.
|
||||
async_is_reserved_keyword = config.async_is_reserved_keyword
|
||||
async_keywords = False if grammar is None else grammar.async_keywords
|
||||
# 'stashed' and 'async_*' are used for async/await parsing
|
||||
stashed = None
|
||||
async_def = False
|
||||
@ -514,7 +509,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
|
||||
yield (STRING, token, spos, epos, line)
|
||||
elif initial.isidentifier(): # ordinary name
|
||||
if token in ('async', 'await'):
|
||||
if async_is_reserved_keyword or async_def:
|
||||
if async_keywords or async_def:
|
||||
yield (ASYNC if token == 'async' else AWAIT,
|
||||
token, spos, epos, line)
|
||||
continue
|
||||
|
@ -1,18 +1,15 @@
|
||||
# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
|
||||
# NOTE: Only elements from __all__ are present.
|
||||
|
||||
from typing import Callable, Iterable, Iterator, List, Text, Tuple
|
||||
from attr import dataclass
|
||||
from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple
|
||||
from blib2to3.pgen2.token import * # noqa
|
||||
from blib2to3.pygram import Grammar
|
||||
|
||||
|
||||
_Coord = Tuple[int, int]
|
||||
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
|
||||
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TokenizerConfig:
|
||||
async_is_reserved_keyword: bool = False
|
||||
|
||||
class TokenError(Exception): ...
|
||||
class StopTokenizing(Exception): ...
|
||||
@ -30,5 +27,6 @@ class Untokenizer:
|
||||
|
||||
def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
|
||||
def generate_tokens(
|
||||
readline: Callable[[], Text]
|
||||
readline: Callable[[], Text],
|
||||
grammar: Optional[Grammar] = ...
|
||||
) -> Iterator[_TokenInfo]: ...
|
||||
|
@ -33,6 +33,7 @@ def initialize(cache_dir=None):
|
||||
global python_grammar
|
||||
global python_grammar_no_print_statement
|
||||
global python_grammar_no_print_statement_no_exec_statement
|
||||
global python_grammar_no_print_statement_no_exec_statement_async_keywords
|
||||
global python_symbols
|
||||
global pattern_grammar
|
||||
global pattern_symbols
|
||||
@ -47,11 +48,17 @@ def initialize(cache_dir=None):
|
||||
python_grammar_no_print_statement = python_grammar.copy()
|
||||
del python_grammar_no_print_statement.keywords["print"]
|
||||
|
||||
# Python 3
|
||||
# Python 3.0-3.6
|
||||
python_grammar_no_print_statement_no_exec_statement = python_grammar.copy()
|
||||
del python_grammar_no_print_statement_no_exec_statement.keywords["print"]
|
||||
del python_grammar_no_print_statement_no_exec_statement.keywords["exec"]
|
||||
|
||||
# Python 3.7+
|
||||
python_grammar_no_print_statement_no_exec_statement_async_keywords = (
|
||||
python_grammar_no_print_statement_no_exec_statement.copy()
|
||||
)
|
||||
python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
|
||||
|
||||
pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
|
||||
cache_dir)
|
||||
pattern_symbols = Symbols(pattern_grammar)
|
||||
|
@ -118,6 +118,7 @@ class pattern_symbols(Symbols):
|
||||
python_grammar: Grammar
|
||||
python_grammar_no_print_statement: Grammar
|
||||
python_grammar_no_print_statement_no_exec_statement: Grammar
|
||||
python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar
|
||||
python_grammar_no_exec_statement: Grammar
|
||||
pattern_grammar: Grammar
|
||||
|
||||
|
@ -1,10 +1,16 @@
|
||||
#!/usr/bin/env python3.7
|
||||
|
||||
|
||||
def f():
|
||||
return (i * 2 async for i in arange(42))
|
||||
|
||||
|
||||
def g():
|
||||
return (something_long * something_long async for something_long in async_generator(with_an_argument))
|
||||
return (
|
||||
something_long * something_long
|
||||
async for something_long in async_generator(with_an_argument)
|
||||
)
|
||||
|
||||
|
||||
async def func():
|
||||
if test:
|
||||
@ -15,9 +21,11 @@ async def func():
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def awaited_generator_value(n):
|
||||
return (await awaitable for awaitable in awaitable_list)
|
||||
|
||||
|
||||
def make_arange(n):
|
||||
return (i * 2 for i in range(n) if await wrap(i))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user