Support compilation with mypyc (#1009)

* Make most of blib2to3 directly typed and mypyc-compatible

This used a combination of retype and pytype's merge-pyi to do the
initial merges of the stubs, which then required manual tweaking to
make actually typecheck and work with mypyc.

Co-authored-by: Sanjit Kalapatapu <sanjitkal@gmail.com>
Co-authored-by: Michael J. Sullivan <sully@msully.net>

* Make black able to compile and run with mypyc

The changes made fall into a couple categories:
 * Fixing actual type mistakes that slip through the cracks
 * Working around a couple mypy bugs (the most annoying of which being
   that we need to add type annotations in a number of places where
   variables are initialized to None)

Co-authored-by: Sanjit Kalapatapu <sanjitkal@gmail.com>
Co-authored-by: Michael J. Sullivan <sully@msully.net>
This commit is contained in:
Michael J. Sullivan 2019-10-30 07:29:29 -07:00 committed by Jelle Zijlstra
parent 12826f3c1e
commit 3e60f6d454
30 changed files with 802 additions and 802 deletions

View File

@ -7,7 +7,7 @@ build: off
test_script: test_script:
- C:\Python36\python.exe tests/test_black.py - C:\Python36\python.exe tests/test_black.py
- C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py - C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py blib2to3
after_test: after_test:
- C:\Python36\python.exe -m pip install pyinstaller - C:\Python36\python.exe -m pip install pyinstaller

View File

@ -3,3 +3,7 @@ ignore = E203, E266, E501, W503
max-line-length = 80 max-line-length = 80
max-complexity = 18 max-complexity = 18
select = B,C,E,F,W,T4,B9 select = B,C,E,F,W,T4,B9
# We need to configure the mypy.ini because the flake8-mypy's default
# options don't properly override it, so if we don't specify it we get
# half of the config from mypy.ini and half from flake8-mypy.
mypy_config = mypy.ini

1
.gitignore vendored
View File

@ -12,3 +12,4 @@ pip-wheel-metadata/
_black_version.py _black_version.py
.idea .idea
.eggs .eggs
.dmypy.json

View File

@ -11,6 +11,8 @@ toml = ">=0.9.4"
black = {path = ".",extras = ["d"],editable = true} black = {path = ".",extras = ["d"],editable = true}
aiohttp-cors = "*" aiohttp-cors = "*"
typed-ast = "==1.4.0" typed-ast = "==1.4.0"
typing_extensions = ">=3.7.4"
mypy_extensions = ">=0.4.3"
regex = ">=2019.8" regex = ">=2019.8"
pathspec = ">=0.6" pathspec = ">=0.6"
dataclasses = {version = ">=0.6", python_version = "< 3.7"} dataclasses = {version = ">=0.6", python_version = "< 3.7"}

28
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "ad54dbd29085bc14caf655456b93d9f09e8556406ef956a5a05c20e30363ffa1" "sha256": "9df9582de1e290f76bd43bbe8dc291bc71e4031517c7e824eb67c65d8e01f78f"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -134,6 +134,14 @@
], ],
"version": "==4.5.2" "version": "==4.5.2"
}, },
"mypy-extensions": {
"hashes": [
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
],
"index": "pypi",
"version": "==0.4.3"
},
"pathspec": { "pathspec": {
"hashes": [ "hashes": [
"sha256:e285ccc8b0785beadd4c18e5708b12bb8fcf529a1e61215b3feff1d1e559ea5c" "sha256:e285ccc8b0785beadd4c18e5708b12bb8fcf529a1e61215b3feff1d1e559ea5c"
@ -194,6 +202,15 @@
"index": "pypi", "index": "pypi",
"version": "==1.4.0" "version": "==1.4.0"
}, },
"typing-extensions": {
"hashes": [
"sha256:091ecc894d5e908ac75209f10d5b4f118fbdb2eb1ede6a63544054bb1edb41f2",
"sha256:910f4656f54de5993ad9304959ce9bb903f90aadc7c67a0bef07e678014e892d",
"sha256:cf8b63fedea4d89bab840ecbb93e75578af28f76f66c35889bd7065f5af88575"
],
"index": "pypi",
"version": "==3.7.4.1"
},
"yarl": { "yarl": {
"hashes": [ "hashes": [
"sha256:024ecdc12bc02b321bc66b41327f930d1c2c543fa9a561b39861da9388ba7aa9", "sha256:024ecdc12bc02b321bc66b41327f930d1c2c543fa9a561b39861da9388ba7aa9",
@ -697,11 +714,12 @@
}, },
"typing-extensions": { "typing-extensions": {
"hashes": [ "hashes": [
"sha256:2ed632b30bb54fc3941c382decfd0ee4148f5c591651c9272473fea2c6397d95", "sha256:091ecc894d5e908ac75209f10d5b4f118fbdb2eb1ede6a63544054bb1edb41f2",
"sha256:b1edbbf0652660e32ae780ac9433f4231e7339c7f9a8057d0f042fcbcea49b87", "sha256:910f4656f54de5993ad9304959ce9bb903f90aadc7c67a0bef07e678014e892d",
"sha256:d8179012ec2c620d3791ca6fe2bf7979d979acdbef1fca0bc56b37411db682ed" "sha256:cf8b63fedea4d89bab840ecbb93e75578af28f76f66c35889bd7065f5af88575"
], ],
"version": "==3.7.4" "index": "pypi",
"version": "==3.7.4.1"
}, },
"urllib3": { "urllib3": {
"hashes": [ "hashes": [

View File

@ -37,6 +37,8 @@
Union, Union,
cast, cast,
) )
from typing_extensions import Final
from mypy_extensions import mypyc_attr
from appdirs import user_cache_dir from appdirs import user_cache_dir
from dataclasses import dataclass, field, replace from dataclasses import dataclass, field, replace
@ -247,6 +249,17 @@ def read_pyproject_toml(
return value return value
def target_version_option_callback(
c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
) -> List[TargetVersion]:
"""Compute the target versions from a --target-version flag.
This is its own function because mypy couldn't infer the type correctly
when it was a lambda, causing mypyc trouble.
"""
return [TargetVersion[val.upper()] for val in v]
@click.command(context_settings=dict(help_option_names=["-h", "--help"])) @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
@click.option("-c", "--code", type=str, help="Format the code passed in as a string.") @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
@click.option( @click.option(
@ -261,7 +274,7 @@ def read_pyproject_toml(
"-t", "-t",
"--target-version", "--target-version",
type=click.Choice([v.name.lower() for v in TargetVersion]), type=click.Choice([v.name.lower() for v in TargetVersion]),
callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v], callback=target_version_option_callback,
multiple=True, multiple=True,
help=( help=(
"Python versions that should be supported by Black's output. [default: " "Python versions that should be supported by Black's output. [default: "
@ -388,7 +401,7 @@ def main(
verbose: bool, verbose: bool,
include: str, include: str,
exclude: str, exclude: str,
src: Tuple[str], src: Tuple[str, ...],
config: Optional[str], config: Optional[str],
) -> None: ) -> None:
"""The uncompromising code formatter.""" """The uncompromising code formatter."""
@ -470,7 +483,9 @@ def main(
ctx.exit(report.return_code) ctx.exit(report.return_code)
def path_empty(src: Tuple[str], quiet: bool, verbose: bool, ctx: click.Context) -> None: def path_empty(
src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context
) -> None:
""" """
Exit if there is no `src` provided for formatting Exit if there is no `src` provided for formatting
""" """
@ -585,7 +600,7 @@ async def schedule_formatting(
): src ): src
for src in sorted(sources) for src in sorted(sources)
} }
pending: Iterable[asyncio.Future] = tasks.keys() pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
try: try:
loop.add_signal_handler(signal.SIGINT, cancel, pending) loop.add_signal_handler(signal.SIGINT, cancel, pending)
loop.add_signal_handler(signal.SIGTERM, cancel, pending) loop.add_signal_handler(signal.SIGTERM, cancel, pending)
@ -639,10 +654,10 @@ def format_file_in_place(
except NothingChanged: except NothingChanged:
return False return False
if write_back == write_back.YES: if write_back == WriteBack.YES:
with open(src, "w", encoding=encoding, newline=newline) as f: with open(src, "w", encoding=encoding, newline=newline) as f:
f.write(dst_contents) f.write(dst_contents)
elif write_back == write_back.DIFF: elif write_back == WriteBack.DIFF:
now = datetime.utcnow() now = datetime.utcnow()
src_name = f"{src}\t{then} +0000" src_name = f"{src}\t{then} +0000"
dst_name = f"{src}\t{now} +0000" dst_name = f"{src}\t{now} +0000"
@ -865,8 +880,16 @@ def visit(self, node: LN) -> Iterator[T]:
if node.type < 256: if node.type < 256:
name = token.tok_name[node.type] name = token.tok_name[node.type]
else: else:
name = type_repr(node.type) name = str(type_repr(node.type))
yield from getattr(self, f"visit_{name}", self.visit_default)(node) # We explicitly branch on whether a visitor exists (instead of
# using self.visit_default as the default arg to getattr) in order
# to save needing to create a bound method object and so mypyc can
# generate a native call to visit_default.
visitf = getattr(self, f"visit_{name}", None)
if visitf:
yield from visitf(node)
else:
yield from self.visit_default(node)
def visit_default(self, node: LN) -> Iterator[T]: def visit_default(self, node: LN) -> Iterator[T]:
"""Default `visit_*()` implementation. Recurses to children of `node`.""" """Default `visit_*()` implementation. Recurses to children of `node`."""
@ -911,8 +934,8 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
list(v.visit(code)) list(v.visit(code))
WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE} WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
STATEMENT = { STATEMENT: Final = {
syms.if_stmt, syms.if_stmt,
syms.while_stmt, syms.while_stmt,
syms.for_stmt, syms.for_stmt,
@ -922,10 +945,10 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
syms.funcdef, syms.funcdef,
syms.classdef, syms.classdef,
} }
STANDALONE_COMMENT = 153 STANDALONE_COMMENT: Final = 153
token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT" token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
LOGIC_OPERATORS = {"and", "or"} LOGIC_OPERATORS: Final = {"and", "or"}
COMPARATORS = { COMPARATORS: Final = {
token.LESS, token.LESS,
token.GREATER, token.GREATER,
token.EQEQUAL, token.EQEQUAL,
@ -933,7 +956,7 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
token.LESSEQUAL, token.LESSEQUAL,
token.GREATEREQUAL, token.GREATEREQUAL,
} }
MATH_OPERATORS = { MATH_OPERATORS: Final = {
token.VBAR, token.VBAR,
token.CIRCUMFLEX, token.CIRCUMFLEX,
token.AMPER, token.AMPER,
@ -949,23 +972,23 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
token.TILDE, token.TILDE,
token.DOUBLESTAR, token.DOUBLESTAR,
} }
STARS = {token.STAR, token.DOUBLESTAR} STARS: Final = {token.STAR, token.DOUBLESTAR}
VARARGS_SPECIALS = STARS | {token.SLASH} VARARGS_SPECIALS: Final = STARS | {token.SLASH}
VARARGS_PARENTS = { VARARGS_PARENTS: Final = {
syms.arglist, syms.arglist,
syms.argument, # double star in arglist syms.argument, # double star in arglist
syms.trailer, # single argument to call syms.trailer, # single argument to call
syms.typedargslist, syms.typedargslist,
syms.varargslist, # lambdas syms.varargslist, # lambdas
} }
UNPACKING_PARENTS = { UNPACKING_PARENTS: Final = {
syms.atom, # single element of a list or set literal syms.atom, # single element of a list or set literal
syms.dictsetmaker, syms.dictsetmaker,
syms.listmaker, syms.listmaker,
syms.testlist_gexp, syms.testlist_gexp,
syms.testlist_star_expr, syms.testlist_star_expr,
} }
TEST_DESCENDANTS = { TEST_DESCENDANTS: Final = {
syms.test, syms.test,
syms.lambdef, syms.lambdef,
syms.or_test, syms.or_test,
@ -982,7 +1005,7 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
syms.term, syms.term,
syms.power, syms.power,
} }
ASSIGNMENTS = { ASSIGNMENTS: Final = {
"=", "=",
"+=", "+=",
"-=", "-=",
@ -998,13 +1021,13 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
"**=", "**=",
"//=", "//=",
} }
COMPREHENSION_PRIORITY = 20 COMPREHENSION_PRIORITY: Final = 20
COMMA_PRIORITY = 18 COMMA_PRIORITY: Final = 18
TERNARY_PRIORITY = 16 TERNARY_PRIORITY: Final = 16
LOGIC_PRIORITY = 14 LOGIC_PRIORITY: Final = 14
STRING_PRIORITY = 12 STRING_PRIORITY: Final = 12
COMPARATOR_PRIORITY = 10 COMPARATOR_PRIORITY: Final = 10
MATH_PRIORITIES = { MATH_PRIORITIES: Final = {
token.VBAR: 9, token.VBAR: 9,
token.CIRCUMFLEX: 8, token.CIRCUMFLEX: 8,
token.AMPER: 7, token.AMPER: 7,
@ -1020,7 +1043,7 @@ def show(cls, code: Union[str, Leaf, Node]) -> None:
token.TILDE: 3, token.TILDE: 3,
token.DOUBLESTAR: 2, token.DOUBLESTAR: 2,
} }
DOT_PRIORITY = 1 DOT_PRIORITY: Final = 1
@dataclass @dataclass
@ -1729,13 +1752,13 @@ def visit_default(self, node: LN) -> Iterator[Line]:
self.current_line.append(node) self.current_line.append(node)
yield from super().visit_default(node) yield from super().visit_default(node)
def visit_INDENT(self, node: Node) -> Iterator[Line]: def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
"""Increase indentation level, maybe yield a line.""" """Increase indentation level, maybe yield a line."""
# In blib2to3 INDENT never holds comments. # In blib2to3 INDENT never holds comments.
yield from self.line(+1) yield from self.line(+1)
yield from self.visit_default(node) yield from self.visit_default(node)
def visit_DEDENT(self, node: Node) -> Iterator[Line]: def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
"""Decrease indentation level, maybe yield a line.""" """Decrease indentation level, maybe yield a line."""
# The current line might still wait for trailing comments. At DEDENT time # The current line might still wait for trailing comments. At DEDENT time
# there won't be any (they would be prefixes on the preceding NEWLINE). # there won't be any (they would be prefixes on the preceding NEWLINE).
@ -2463,7 +2486,7 @@ def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[
body_leaves: List[Leaf] = [] body_leaves: List[Leaf] = []
head_leaves: List[Leaf] = [] head_leaves: List[Leaf] = []
current_leaves = head_leaves current_leaves = head_leaves
matching_bracket = None matching_bracket: Optional[Leaf] = None
for leaf in line.leaves: for leaf in line.leaves:
if ( if (
current_leaves is body_leaves current_leaves is body_leaves
@ -2506,8 +2529,8 @@ def right_hand_split(
body_leaves: List[Leaf] = [] body_leaves: List[Leaf] = []
head_leaves: List[Leaf] = [] head_leaves: List[Leaf] = []
current_leaves = tail_leaves current_leaves = tail_leaves
opening_bracket = None opening_bracket: Optional[Leaf] = None
closing_bracket = None closing_bracket: Optional[Leaf] = None
for leaf in reversed(line.leaves): for leaf in reversed(line.leaves):
if current_leaves is body_leaves: if current_leaves is body_leaves:
if leaf is opening_bracket: if leaf is opening_bracket:
@ -3028,7 +3051,7 @@ def convert_one_fmt_off_pair(node: Node) -> bool:
# That happens when one of the `ignored_nodes` ended with a NEWLINE # That happens when one of the `ignored_nodes` ended with a NEWLINE
# leaf (possibly followed by a DEDENT). # leaf (possibly followed by a DEDENT).
hidden_value = hidden_value[:-1] hidden_value = hidden_value[:-1]
first_idx = None first_idx: Optional[int] = None
for ignored in ignored_nodes: for ignored in ignored_nodes:
index = ignored.remove() index = ignored.remove()
if first_idx is None: if first_idx is None:
@ -3399,8 +3422,8 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
yield omit yield omit
length = 4 * line.depth length = 4 * line.depth
opening_bracket = None opening_bracket: Optional[Leaf] = None
closing_bracket = None closing_bracket: Optional[Leaf] = None
inner_brackets: Set[LeafID] = set() inner_brackets: Set[LeafID] = set()
for index, leaf, leaf_length in enumerate_with_length(line, reversed=True): for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
length += leaf_length length += leaf_length
@ -3797,6 +3820,7 @@ def assert_stable(src: str, dst: str, mode: FileMode) -> None:
) from None ) from None
@mypyc_attr(patchable=True)
def dump_to_file(*output: str) -> str: def dump_to_file(*output: str) -> str:
"""Dump `output` to a temporary file. Return path to the file.""" """Dump `output` to a temporary file. Return path to the file."""
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
@ -3829,7 +3853,7 @@ def diff(a: str, b: str, a_name: str, b_name: str) -> str:
) )
def cancel(tasks: Iterable[asyncio.Task]) -> None: def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
"""asyncio signal handler that cancels all `tasks` and reports to stderr.""" """asyncio signal handler that cancels all `tasks` and reports to stderr."""
err("Aborted!") err("Aborted!")
for task in tasks: for task in tasks:

View File

@ -1 +0,0 @@
# Stubs for lib2to3 (Python 3.6)

View File

@ -1,10 +0,0 @@
# Stubs for lib2to3.pgen2 (Python 3.6)
import os
import sys
from typing import Text, Union
if sys.version_info >= (3, 6):
_Path = Union[Text, os.PathLike]
else:
_Path = Text

View File

@ -1,6 +1,8 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement. # Licensed to PSF under a Contributor Agreement.
# mypy: ignore-errors
"""Convert graminit.[ch] spit out by pgen to Python code. """Convert graminit.[ch] spit out by pgen to Python code.
Pgen is the Python parser generator. It is useful to quickly create a Pgen is the Python parser generator. It is useful to quickly create a

View File

@ -22,20 +22,42 @@
import logging import logging
import pkgutil import pkgutil
import sys import sys
from typing import (
Any,
Callable,
IO,
Iterable,
List,
Optional,
Text,
Tuple,
Union,
Sequence,
)
# Pgen imports # Pgen imports
from . import grammar, parse, token, tokenize, pgen from . import grammar, parse, token, tokenize, pgen
from logging import Logger
from blib2to3.pytree import _Convert, NL
from blib2to3.pgen2.grammar import Grammar
Path = Union[str, "os.PathLike[str]"]
class Driver(object): class Driver(object):
def __init__(self, grammar, convert=None, logger=None): def __init__(
self,
grammar: Grammar,
convert: Optional[_Convert] = None,
logger: Optional[Logger] = None,
) -> None:
self.grammar = grammar self.grammar = grammar
if logger is None: if logger is None:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
self.logger = logger self.logger = logger
self.convert = convert self.convert = convert
def parse_tokens(self, tokens, debug=False): def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
"""Parse a series of tokens and return the syntax tree.""" """Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize. # XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert) p = parse.Parser(self.grammar, self.convert)
@ -91,32 +113,36 @@ def parse_tokens(self, tokens, debug=False):
column = 0 column = 0
else: else:
# We never broke out -- EOF is too soon (how can this happen???) # We never broke out -- EOF is too soon (how can this happen???)
assert start is not None
raise parse.ParseError("incomplete input", type, value, (prefix, start)) raise parse.ParseError("incomplete input", type, value, (prefix, start))
assert p.rootnode is not None
return p.rootnode return p.rootnode
def parse_stream_raw(self, stream, debug=False): def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree.""" """Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar) tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
return self.parse_tokens(tokens, debug) return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False): def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree.""" """Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug) return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, encoding=None, debug=False): def parse_file(
self, filename: Path, encoding: Optional[Text] = None, debug: bool = False,
) -> NL:
"""Parse a file and return the syntax tree.""" """Parse a file and return the syntax tree."""
with io.open(filename, "r", encoding=encoding) as stream: with io.open(filename, "r", encoding=encoding) as stream:
return self.parse_stream(stream, debug) return self.parse_stream(stream, debug)
def parse_string(self, text, debug=False): def parse_string(self, text: Text, debug: bool = False) -> NL:
"""Parse a string and return the syntax tree.""" """Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens( tokens = tokenize.generate_tokens(
io.StringIO(text).readline, grammar=self.grammar io.StringIO(text).readline, grammar=self.grammar
) )
return self.parse_tokens(tokens, debug) return self.parse_tokens(tokens, debug)
def _partially_consume_prefix(self, prefix, column): def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
lines = [] lines: List[str] = []
current_line = "" current_line = ""
current_column = 0 current_column = 0
wait_for_nl = False wait_for_nl = False
@ -143,7 +169,7 @@ def _partially_consume_prefix(self, prefix, column):
return "".join(lines), current_line return "".join(lines), current_line
def _generate_pickle_name(gt, cache_dir=None): def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
head, tail = os.path.splitext(gt) head, tail = os.path.splitext(gt)
if tail == ".txt": if tail == ".txt":
tail = "" tail = ""
@ -154,14 +180,20 @@ def _generate_pickle_name(gt, cache_dir=None):
return name return name
def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None): def load_grammar(
gt: Text = "Grammar.txt",
gp: Optional[Text] = None,
save: bool = True,
force: bool = False,
logger: Optional[Logger] = None,
) -> Grammar:
"""Load the grammar (maybe from a pickle).""" """Load the grammar (maybe from a pickle)."""
if logger is None: if logger is None:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
gp = _generate_pickle_name(gt) if gp is None else gp gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt): if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt) logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt) g: grammar.Grammar = pgen.generate_grammar(gt)
if save: if save:
logger.info("Writing grammar tables to %s", gp) logger.info("Writing grammar tables to %s", gp)
try: try:
@ -174,7 +206,7 @@ def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None)
return g return g
def _newer(a, b): def _newer(a: Text, b: Text) -> bool:
"""Inquire whether file a was written since file b.""" """Inquire whether file a was written since file b."""
if not os.path.exists(a): if not os.path.exists(a):
return False return False
@ -183,7 +215,9 @@ def _newer(a, b):
return os.path.getmtime(a) >= os.path.getmtime(b) return os.path.getmtime(a) >= os.path.getmtime(b)
def load_packaged_grammar(package, grammar_source, cache_dir=None): def load_packaged_grammar(
package: str, grammar_source: Text, cache_dir: Optional[Path] = None
) -> grammar.Grammar:
"""Normally, loads a pickled grammar by doing """Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar) pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the where *pickled_grammar* is computed from *grammar_source* by adding the
@ -199,18 +233,19 @@ def load_packaged_grammar(package, grammar_source, cache_dir=None):
return load_grammar(grammar_source, gp=gp) return load_grammar(grammar_source, gp=gp)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir) pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
data = pkgutil.get_data(package, pickled_name) data = pkgutil.get_data(package, pickled_name)
assert data is not None
g = grammar.Grammar() g = grammar.Grammar()
g.loads(data) g.loads(data)
return g return g
def main(*args): def main(*args: Text) -> bool:
"""Main program, when run as a script: produce grammar pickle files. """Main program, when run as a script: produce grammar pickle files.
Calls load_grammar for each argument, a path to a grammar text file. Calls load_grammar for each argument, a path to a grammar text file.
""" """
if not args: if not args:
args = sys.argv[1:] args = tuple(sys.argv[1:])
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s") logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
for gt in args: for gt in args:
load_grammar(gt, save=True, force=True) load_grammar(gt, save=True, force=True)

View File

@ -1,24 +0,0 @@
# Stubs for lib2to3.pgen2.driver (Python 3.6)
import os
import sys
from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union
from logging import Logger
from blib2to3.pytree import _Convert, _NL
from blib2to3.pgen2 import _Path
from blib2to3.pgen2.grammar import Grammar
class Driver:
grammar: Grammar
logger: Logger
convert: _Convert
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ...
def parse_string(self, text: Text, debug: bool = ...) -> _NL: ...
def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ...

View File

@ -16,10 +16,17 @@
import os import os
import pickle import pickle
import tempfile import tempfile
from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union
# Local imports # Local imports
from . import token from . import token
_P = TypeVar("_P", bound="Grammar")
Label = Tuple[int, Optional[Text]]
DFA = List[List[Tuple[int, int]]]
DFAS = Tuple[DFA, Dict[int, int]]
Path = Union[str, "os.PathLike[str]"]
class Grammar(object): class Grammar(object):
"""Pgen parsing tables conversion class. """Pgen parsing tables conversion class.
@ -75,38 +82,51 @@ class Grammar(object):
""" """
def __init__(self): def __init__(self) -> None:
self.symbol2number = {} self.symbol2number: Dict[str, int] = {}
self.number2symbol = {} self.number2symbol: Dict[int, str] = {}
self.states = [] self.states: List[DFA] = []
self.dfas = {} self.dfas: Dict[int, DFAS] = {}
self.labels = [(0, "EMPTY")] self.labels: List[Label] = [(0, "EMPTY")]
self.keywords = {} self.keywords: Dict[str, int] = {}
self.tokens = {} self.tokens: Dict[int, int] = {}
self.symbol2label = {} self.symbol2label: Dict[str, int] = {}
self.start = 256 self.start = 256
# Python 3.7+ parses async as a keyword, not an identifier # Python 3.7+ parses async as a keyword, not an identifier
self.async_keywords = False self.async_keywords = False
def dump(self, filename): def dump(self, filename: Path) -> None:
"""Dump the grammar tables to a pickle file.""" """Dump the grammar tables to a pickle file."""
# mypyc generates objects that don't have a __dict__, but they
# do have __getstate__ methods that will return an equivalent
# dictionary
if hasattr(self, "__dict__"):
d = self.__dict__
else:
d = self.__getstate__() # type: ignore
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
dir=os.path.dirname(filename), delete=False dir=os.path.dirname(filename), delete=False
) as f: ) as f:
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
os.replace(f.name, filename) os.replace(f.name, filename)
def load(self, filename): def _update(self, attrs: Dict[str, Any]) -> None:
for k, v in attrs.items():
setattr(self, k, v)
def load(self, filename: Path) -> None:
"""Load the grammar tables from a pickle file.""" """Load the grammar tables from a pickle file."""
with open(filename, "rb") as f: with open(filename, "rb") as f:
d = pickle.load(f) d = pickle.load(f)
self.__dict__.update(d) self._update(d)
def loads(self, pkl): def loads(self, pkl: bytes) -> None:
"""Load the grammar tables from a pickle bytes object.""" """Load the grammar tables from a pickle bytes object."""
self.__dict__.update(pickle.loads(pkl)) self._update(pickle.loads(pkl))
def copy(self): def copy(self: _P) -> _P:
""" """
Copy the grammar. Copy the grammar.
""" """
@ -126,7 +146,7 @@ def copy(self):
new.async_keywords = self.async_keywords new.async_keywords = self.async_keywords
return new return new
def report(self): def report(self) -> None:
"""Dump the grammar tables to standard output, for debugging.""" """Dump the grammar tables to standard output, for debugging."""
from pprint import pprint from pprint import pprint

View File

@ -1,30 +0,0 @@
# Stubs for lib2to3.pgen2.grammar (Python 3.6)
from blib2to3.pgen2 import _Path
from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar
_P = TypeVar('_P')
_Label = Tuple[int, Optional[Text]]
_DFA = List[List[Tuple[int, int]]]
_DFAS = Tuple[_DFA, Dict[int, int]]
class Grammar:
symbol2number: Dict[Text, int]
number2symbol: Dict[int, Text]
states: List[_DFA]
dfas: Dict[int, _DFAS]
labels: List[_Label]
keywords: Dict[Text, int]
tokens: Dict[int, int]
symbol2label: Dict[Text, int]
start: int
async_keywords: bool
def __init__(self) -> None: ...
def dump(self, filename: _Path) -> None: ...
def load(self, filename: _Path) -> None: ...
def copy(self: _P) -> _P: ...
def report(self) -> None: ...
opmap_raw: Text
opmap: Dict[Text, Text]

View File

@ -3,9 +3,12 @@
"""Safely evaluate Python string literals without using eval().""" """Safely evaluate Python string literals without using eval()."""
import regex as re import re
simple_escapes = { from typing import Dict, Match, Text
simple_escapes: Dict[Text, Text] = {
"a": "\a", "a": "\a",
"b": "\b", "b": "\b",
"f": "\f", "f": "\f",
@ -19,7 +22,7 @@
} }
def escape(m): def escape(m: Match[Text]) -> Text:
all, tail = m.group(0, 1) all, tail = m.group(0, 1)
assert all.startswith("\\") assert all.startswith("\\")
esc = simple_escapes.get(tail) esc = simple_escapes.get(tail)
@ -41,7 +44,7 @@ def escape(m):
return chr(i) return chr(i)
def evalString(s): def evalString(s: Text) -> Text:
assert s.startswith("'") or s.startswith('"'), repr(s[:1]) assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0] q = s[0]
if s[:3] == q * 3: if s[:3] == q * 3:
@ -52,7 +55,7 @@ def evalString(s):
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
def test(): def test() -> None:
for i in range(256): for i in range(256):
c = chr(i) c = chr(i)
s = repr(c) s = repr(c)

View File

@ -1,9 +0,0 @@
# Stubs for lib2to3.pgen2.literals (Python 3.6)
from typing import Dict, Match, Text
simple_escapes: Dict[Text, Text]
def escape(m: Match) -> Text: ...
def evalString(s: Text) -> Text: ...
def test() -> None: ...

View File

@ -12,12 +12,39 @@
# Local imports # Local imports
from . import token from . import token
from typing import (
Optional,
Text,
Sequence,
Any,
Union,
Tuple,
Dict,
List,
Callable,
Set,
)
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pytree import NL, Context, RawNode, Leaf, Node
Results = Dict[Text, NL]
Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
DFA = List[List[Tuple[int, int]]]
DFAS = Tuple[DFA, Dict[int, int]]
def lam_sub(grammar: Grammar, node: RawNode) -> NL:
assert node[3] is not None
return Node(type=node[0], children=node[3], context=node[2])
class ParseError(Exception): class ParseError(Exception):
"""Exception to signal the parser is stuck.""" """Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context): def __init__(
self, msg: Text, type: Optional[int], value: Optional[Text], context: Context
) -> None:
Exception.__init__( Exception.__init__(
self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context) self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
) )
@ -57,7 +84,7 @@ class Parser(object):
""" """
def __init__(self, grammar, convert=None): def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
"""Constructor. """Constructor.
The grammar argument is a grammar.Grammar instance; see the The grammar argument is a grammar.Grammar instance; see the
@ -87,9 +114,9 @@ def __init__(self, grammar, convert=None):
""" """
self.grammar = grammar self.grammar = grammar
self.convert = convert or (lambda grammar, node: node) self.convert = convert or lam_sub
def setup(self, start=None): def setup(self, start: Optional[int] = None) -> None:
"""Prepare for parsing. """Prepare for parsing.
This *must* be called before starting to parse. This *must* be called before starting to parse.
@ -107,13 +134,13 @@ def setup(self, start=None):
# Each stack entry is a tuple: (dfa, state, node). # Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children), # A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None. # where children is a list of nodes or None, and context may be None.
newnode = (start, None, None, []) newnode: RawNode = (start, None, None, [])
stackentry = (self.grammar.dfas[start], 0, newnode) stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack = [stackentry] self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
self.rootnode = None self.rootnode: Optional[NL] = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop() self.used_names: Set[str] = set()
def addtoken(self, type, value, context): def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool:
"""Add a token; return True iff this is the end of the program.""" """Add a token; return True iff this is the end of the program."""
# Map from token to label # Map from token to label
ilabel = self.classify(type, value, context) ilabel = self.classify(type, value, context)
@ -160,10 +187,11 @@ def addtoken(self, type, value, context):
# No success finding a transition # No success finding a transition
raise ParseError("bad input", type, value, context) raise ParseError("bad input", type, value, context)
def classify(self, type, value, context): def classify(self, type: int, value: Optional[Text], context: Context) -> int:
"""Turn a token into a label. (Internal)""" """Turn a token into a label. (Internal)"""
if type == token.NAME: if type == token.NAME:
# Keep a listing of all used names # Keep a listing of all used names
assert value is not None
self.used_names.add(value) self.used_names.add(value)
# Check for reserved words # Check for reserved words
ilabel = self.grammar.keywords.get(value) ilabel = self.grammar.keywords.get(value)
@ -174,29 +202,35 @@ def classify(self, type, value, context):
raise ParseError("bad token", type, value, context) raise ParseError("bad token", type, value, context)
return ilabel return ilabel
def shift(self, type, value, newstate, context): def shift(
self, type: int, value: Optional[Text], newstate: int, context: Context
) -> None:
"""Shift a token. (Internal)""" """Shift a token. (Internal)"""
dfa, state, node = self.stack[-1] dfa, state, node = self.stack[-1]
newnode = (type, value, context, None) assert value is not None
newnode = self.convert(self.grammar, newnode) assert context is not None
rawnode: RawNode = (type, value, context, None)
newnode = self.convert(self.grammar, rawnode)
if newnode is not None: if newnode is not None:
assert node[-1] is not None
node[-1].append(newnode) node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node) self.stack[-1] = (dfa, newstate, node)
def push(self, type, newdfa, newstate, context): def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
"""Push a nonterminal. (Internal)""" """Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1] dfa, state, node = self.stack[-1]
newnode = (type, None, context, []) newnode: RawNode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node) self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode)) self.stack.append((newdfa, 0, newnode))
def pop(self): def pop(self) -> None:
"""Pop a nonterminal. (Internal)""" """Pop a nonterminal. (Internal)"""
popdfa, popstate, popnode = self.stack.pop() popdfa, popstate, popnode = self.stack.pop()
newnode = self.convert(self.grammar, popnode) newnode = self.convert(self.grammar, popnode)
if newnode is not None: if newnode is not None:
if self.stack: if self.stack:
dfa, state, node = self.stack[-1] dfa, state, node = self.stack[-1]
assert node[-1] is not None
node[-1].append(newnode) node[-1].append(newnode)
else: else:
self.rootnode = newnode self.rootnode = newnode

View File

@ -1,29 +0,0 @@
# Stubs for lib2to3.pgen2.parse (Python 3.6)
from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple
from blib2to3.pgen2.grammar import Grammar, _DFAS
from blib2to3.pytree import _NL, _Convert, _RawNode
_Context = Sequence[Any]
class ParseError(Exception):
msg: Text
type: int
value: Optional[Text]
context: _Context
def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ...
class Parser:
grammar: Grammar
convert: _Convert
stack: List[Tuple[_DFAS, int, _RawNode]]
rootnode: Optional[_NL]
used_names: Set[Text]
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ...
def setup(self, start: Optional[int] = ...) -> None: ...
def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ...
def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ...
def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ...
def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ...
def pop(self) -> None: ...

View File

@ -4,13 +4,40 @@
# Pgen imports # Pgen imports
from . import grammar, token, tokenize from . import grammar, token, tokenize
from typing import (
Any,
Dict,
IO,
Iterable,
Iterator,
List,
Optional,
Text,
Tuple,
Union,
Sequence,
NoReturn,
)
from blib2to3.pgen2 import grammar
from blib2to3.pgen2.tokenize import GoodTokenInfo
import os
Path = Union[str, "os.PathLike[str]"]
class PgenGrammar(grammar.Grammar): class PgenGrammar(grammar.Grammar):
pass pass
class ParserGenerator(object): class ParserGenerator(object):
def __init__(self, filename, stream=None):
filename: Path
stream: IO[Text]
generator: Iterator[GoodTokenInfo]
first: Dict[Text, Optional[Dict[Text, int]]]
def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None:
close_stream = None close_stream = None
if stream is None: if stream is None:
stream = open(filename) stream = open(filename)
@ -25,7 +52,7 @@ def __init__(self, filename, stream=None):
self.first = {} # map from symbol name to set of tokens self.first = {} # map from symbol name to set of tokens
self.addfirstsets() self.addfirstsets()
def make_grammar(self): def make_grammar(self) -> PgenGrammar:
c = PgenGrammar() c = PgenGrammar()
names = list(self.dfas.keys()) names = list(self.dfas.keys())
names.sort() names.sort()
@ -50,8 +77,9 @@ def make_grammar(self):
c.start = c.symbol2number[self.startsymbol] c.start = c.symbol2number[self.startsymbol]
return c return c
def make_first(self, c, name): def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]:
rawfirst = self.first[name] rawfirst = self.first[name]
assert rawfirst is not None
first = {} first = {}
for label in sorted(rawfirst): for label in sorted(rawfirst):
ilabel = self.make_label(c, label) ilabel = self.make_label(c, label)
@ -59,7 +87,7 @@ def make_first(self, c, name):
first[ilabel] = 1 first[ilabel] = 1
return first return first
def make_label(self, c, label): def make_label(self, c: PgenGrammar, label: Text) -> int:
# XXX Maybe this should be a method on a subclass of converter? # XXX Maybe this should be a method on a subclass of converter?
ilabel = len(c.labels) ilabel = len(c.labels)
if label[0].isalpha(): if label[0].isalpha():
@ -105,7 +133,7 @@ def make_label(self, c, label):
c.tokens[itoken] = ilabel c.tokens[itoken] = ilabel
return ilabel return ilabel
def addfirstsets(self): def addfirstsets(self) -> None:
names = list(self.dfas.keys()) names = list(self.dfas.keys())
names.sort() names.sort()
for name in names: for name in names:
@ -113,11 +141,11 @@ def addfirstsets(self):
self.calcfirst(name) self.calcfirst(name)
# print name, self.first[name].keys() # print name, self.first[name].keys()
def calcfirst(self, name): def calcfirst(self, name: Text) -> None:
dfa = self.dfas[name] dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion self.first[name] = None # dummy to detect left recursion
state = dfa[0] state = dfa[0]
totalset = {} totalset: Dict[str, int] = {}
overlapcheck = {} overlapcheck = {}
for label, next in state.arcs.items(): for label, next in state.arcs.items():
if label in self.dfas: if label in self.dfas:
@ -128,12 +156,13 @@ def calcfirst(self, name):
else: else:
self.calcfirst(label) self.calcfirst(label)
fset = self.first[label] fset = self.first[label]
assert fset is not None
totalset.update(fset) totalset.update(fset)
overlapcheck[label] = fset overlapcheck[label] = fset
else: else:
totalset[label] = 1 totalset[label] = 1
overlapcheck[label] = {label: 1} overlapcheck[label] = {label: 1}
inverse = {} inverse: Dict[str, str] = {}
for label, itsfirst in overlapcheck.items(): for label, itsfirst in overlapcheck.items():
for symbol in itsfirst: for symbol in itsfirst:
if symbol in inverse: if symbol in inverse:
@ -145,9 +174,9 @@ def calcfirst(self, name):
inverse[symbol] = label inverse[symbol] = label
self.first[name] = totalset self.first[name] = totalset
def parse(self): def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]:
dfas = {} dfas = {}
startsymbol = None startsymbol: Optional[str] = None
# MSTART: (NEWLINE | RULE)* ENDMARKER # MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER: while self.type != token.ENDMARKER:
while self.type == token.NEWLINE: while self.type == token.NEWLINE:
@ -167,9 +196,10 @@ def parse(self):
# print name, oldlen, newlen # print name, oldlen, newlen
if startsymbol is None: if startsymbol is None:
startsymbol = name startsymbol = name
assert startsymbol is not None
return dfas, startsymbol return dfas, startsymbol
def make_dfa(self, start, finish): def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]:
# To turn an NFA into a DFA, we define the states of the DFA # To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some # to correspond to *sets* of states of the NFA. Then do some
# state reduction. Let's represent sets as dicts with 1 for # state reduction. Let's represent sets as dicts with 1 for
@ -177,12 +207,12 @@ def make_dfa(self, start, finish):
assert isinstance(start, NFAState) assert isinstance(start, NFAState)
assert isinstance(finish, NFAState) assert isinstance(finish, NFAState)
def closure(state): def closure(state: NFAState) -> Dict[NFAState, int]:
base = {} base: Dict[NFAState, int] = {}
addclosure(state, base) addclosure(state, base)
return base return base
def addclosure(state, base): def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None:
assert isinstance(state, NFAState) assert isinstance(state, NFAState)
if state in base: if state in base:
return return
@ -193,7 +223,7 @@ def addclosure(state, base):
states = [DFAState(closure(start), finish)] states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating for state in states: # NB states grows while we're iterating
arcs = {} arcs: Dict[str, Dict[NFAState, int]] = {}
for nfastate in state.nfaset: for nfastate in state.nfaset:
for label, next in nfastate.arcs: for label, next in nfastate.arcs:
if label is not None: if label is not None:
@ -208,7 +238,7 @@ def addclosure(state, base):
state.addarc(st, label) state.addarc(st, label)
return states # List of DFAState instances; first one is start return states # List of DFAState instances; first one is start
def dump_nfa(self, name, start, finish): def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None:
print("Dump of NFA for", name) print("Dump of NFA for", name)
todo = [start] todo = [start]
for i, state in enumerate(todo): for i, state in enumerate(todo):
@ -224,14 +254,14 @@ def dump_nfa(self, name, start, finish):
else: else:
print(" %s -> %d" % (label, j)) print(" %s -> %d" % (label, j))
def dump_dfa(self, name, dfa): def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None:
print("Dump of DFA for", name) print("Dump of DFA for", name)
for i, state in enumerate(dfa): for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "") print(" State", i, state.isfinal and "(final)" or "")
for label, next in sorted(state.arcs.items()): for label, next in sorted(state.arcs.items()):
print(" %s -> %d" % (label, dfa.index(next))) print(" %s -> %d" % (label, dfa.index(next)))
def simplify_dfa(self, dfa): def simplify_dfa(self, dfa: List["DFAState"]) -> None:
# This is not theoretically optimal, but works well enough. # This is not theoretically optimal, but works well enough.
# Algorithm: repeatedly look for two states that have the same # Algorithm: repeatedly look for two states that have the same
# set of arcs (same labels pointing to the same nodes) and # set of arcs (same labels pointing to the same nodes) and
@ -252,7 +282,7 @@ def simplify_dfa(self, dfa):
changes = True changes = True
break break
def parse_rhs(self): def parse_rhs(self) -> Tuple["NFAState", "NFAState"]:
# RHS: ALT ('|' ALT)* # RHS: ALT ('|' ALT)*
a, z = self.parse_alt() a, z = self.parse_alt()
if self.value != "|": if self.value != "|":
@ -269,7 +299,7 @@ def parse_rhs(self):
z.addarc(zz) z.addarc(zz)
return aa, zz return aa, zz
def parse_alt(self): def parse_alt(self) -> Tuple["NFAState", "NFAState"]:
# ALT: ITEM+ # ALT: ITEM+
a, b = self.parse_item() a, b = self.parse_item()
while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
@ -278,7 +308,7 @@ def parse_alt(self):
b = d b = d
return a, b return a, b
def parse_item(self): def parse_item(self) -> Tuple["NFAState", "NFAState"]:
# ITEM: '[' RHS ']' | ATOM ['+' | '*'] # ITEM: '[' RHS ']' | ATOM ['+' | '*']
if self.value == "[": if self.value == "[":
self.gettoken() self.gettoken()
@ -298,7 +328,7 @@ def parse_item(self):
else: else:
return a, a return a, a
def parse_atom(self): def parse_atom(self) -> Tuple["NFAState", "NFAState"]:
# ATOM: '(' RHS ')' | NAME | STRING # ATOM: '(' RHS ')' | NAME | STRING
if self.value == "(": if self.value == "(":
self.gettoken() self.gettoken()
@ -315,8 +345,9 @@ def parse_atom(self):
self.raise_error( self.raise_error(
"expected (...) or NAME or STRING, got %s/%s", self.type, self.value "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
) )
assert False
def expect(self, type, value=None): def expect(self, type: int, value: Optional[Any] = None) -> Text:
if self.type != type or (value is not None and self.value != value): if self.type != type or (value is not None and self.value != value):
self.raise_error( self.raise_error(
"expected %s/%s, got %s/%s", type, value, self.type, self.value "expected %s/%s, got %s/%s", type, value, self.type, self.value
@ -325,14 +356,14 @@ def expect(self, type, value=None):
self.gettoken() self.gettoken()
return value return value
def gettoken(self): def gettoken(self) -> None:
tup = next(self.generator) tup = next(self.generator)
while tup[0] in (tokenize.COMMENT, tokenize.NL): while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = next(self.generator) tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup self.type, self.value, self.begin, self.end, self.line = tup
# print token.tok_name[self.type], repr(self.value) # print token.tok_name[self.type], repr(self.value)
def raise_error(self, msg, *args): def raise_error(self, msg: str, *args: Any) -> NoReturn:
if args: if args:
try: try:
msg = msg % args msg = msg % args
@ -342,17 +373,23 @@ def raise_error(self, msg, *args):
class NFAState(object): class NFAState(object):
def __init__(self): arcs: List[Tuple[Optional[Text], "NFAState"]]
def __init__(self) -> None:
self.arcs = [] # list of (label, NFAState) pairs self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next, label=None): def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None:
assert label is None or isinstance(label, str) assert label is None or isinstance(label, str)
assert isinstance(next, NFAState) assert isinstance(next, NFAState)
self.arcs.append((label, next)) self.arcs.append((label, next))
class DFAState(object): class DFAState(object):
def __init__(self, nfaset, final): nfaset: Dict[NFAState, Any]
isfinal: bool
arcs: Dict[Text, "DFAState"]
def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None:
assert isinstance(nfaset, dict) assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState) assert isinstance(next(iter(nfaset)), NFAState)
assert isinstance(final, NFAState) assert isinstance(final, NFAState)
@ -360,18 +397,18 @@ def __init__(self, nfaset, final):
self.isfinal = final in nfaset self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState self.arcs = {} # map from label to DFAState
def addarc(self, next, label): def addarc(self, next: "DFAState", label: Text) -> None:
assert isinstance(label, str) assert isinstance(label, str)
assert label not in self.arcs assert label not in self.arcs
assert isinstance(next, DFAState) assert isinstance(next, DFAState)
self.arcs[label] = next self.arcs[label] = next
def unifystate(self, old, new): def unifystate(self, old: "DFAState", new: "DFAState") -> None:
for label, next in self.arcs.items(): for label, next in self.arcs.items():
if next is old: if next is old:
self.arcs[label] = new self.arcs[label] = new
def __eq__(self, other): def __eq__(self, other: Any) -> bool:
# Equality test -- ignore the nfaset instance variable # Equality test -- ignore the nfaset instance variable
assert isinstance(other, DFAState) assert isinstance(other, DFAState)
if self.isfinal != other.isfinal: if self.isfinal != other.isfinal:
@ -385,9 +422,9 @@ def __eq__(self, other):
return False return False
return True return True
__hash__ = None # For Py3 compatibility. __hash__: Any = None # For Py3 compatibility.
def generate_grammar(filename="Grammar.txt"): def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar:
p = ParserGenerator(filename) p = ParserGenerator(filename)
return p.make_grammar() return p.make_grammar()

View File

@ -1,49 +0,0 @@
# Stubs for lib2to3.pgen2.pgen (Python 3.6)
from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple
from mypy_extensions import NoReturn
from blib2to3.pgen2 import _Path, grammar
from blib2to3.pgen2.tokenize import _TokenInfo
class PgenGrammar(grammar.Grammar): ...
class ParserGenerator:
filename: _Path
stream: IO[Text]
generator: Iterator[_TokenInfo]
first: Dict[Text, Dict[Text, int]]
def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ...
def make_grammar(self) -> PgenGrammar: ...
def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ...
def make_label(self, c: PgenGrammar, label: Text) -> int: ...
def addfirstsets(self) -> None: ...
def calcfirst(self, name: Text) -> None: ...
def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ...
def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ...
def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ...
def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ...
def simplify_dfa(self, dfa: List[DFAState]) -> None: ...
def parse_rhs(self) -> Tuple[NFAState, NFAState]: ...
def parse_alt(self) -> Tuple[NFAState, NFAState]: ...
def parse_item(self) -> Tuple[NFAState, NFAState]: ...
def parse_atom(self) -> Tuple[NFAState, NFAState]: ...
def expect(self, type: int, value: Optional[Any] = ...) -> Text: ...
def gettoken(self) -> None: ...
def raise_error(self, msg: str, *args: Any) -> NoReturn: ...
class NFAState:
arcs: List[Tuple[Optional[Text], NFAState]]
def __init__(self) -> None: ...
def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ...
class DFAState:
nfaset: Dict[NFAState, Any]
isfinal: bool
arcs: Dict[Text, DFAState]
def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ...
def addarc(self, next: DFAState, label: Text) -> None: ...
def unifystate(self, old: DFAState, new: DFAState) -> None: ...
def __eq__(self, other: Any) -> bool: ...
def generate_grammar(filename: _Path = ...) -> PgenGrammar: ...

View File

@ -1,86 +1,89 @@
"""Token constants (from "token.h").""" """Token constants (from "token.h")."""
from typing import Dict
from typing_extensions import Final
# Taken from Python (r53757) and modified to include some tokens # Taken from Python (r53757) and modified to include some tokens
# originally monkeypatched in by pgen2.tokenize # originally monkeypatched in by pgen2.tokenize
# --start constants-- # --start constants--
ENDMARKER = 0 ENDMARKER: Final = 0
NAME = 1 NAME: Final = 1
NUMBER = 2 NUMBER: Final = 2
STRING = 3 STRING: Final = 3
NEWLINE = 4 NEWLINE: Final = 4
INDENT = 5 INDENT: Final = 5
DEDENT = 6 DEDENT: Final = 6
LPAR = 7 LPAR: Final = 7
RPAR = 8 RPAR: Final = 8
LSQB = 9 LSQB: Final = 9
RSQB = 10 RSQB: Final = 10
COLON = 11 COLON: Final = 11
COMMA = 12 COMMA: Final = 12
SEMI = 13 SEMI: Final = 13
PLUS = 14 PLUS: Final = 14
MINUS = 15 MINUS: Final = 15
STAR = 16 STAR: Final = 16
SLASH = 17 SLASH: Final = 17
VBAR = 18 VBAR: Final = 18
AMPER = 19 AMPER: Final = 19
LESS = 20 LESS: Final = 20
GREATER = 21 GREATER: Final = 21
EQUAL = 22 EQUAL: Final = 22
DOT = 23 DOT: Final = 23
PERCENT = 24 PERCENT: Final = 24
BACKQUOTE = 25 BACKQUOTE: Final = 25
LBRACE = 26 LBRACE: Final = 26
RBRACE = 27 RBRACE: Final = 27
EQEQUAL = 28 EQEQUAL: Final = 28
NOTEQUAL = 29 NOTEQUAL: Final = 29
LESSEQUAL = 30 LESSEQUAL: Final = 30
GREATEREQUAL = 31 GREATEREQUAL: Final = 31
TILDE = 32 TILDE: Final = 32
CIRCUMFLEX = 33 CIRCUMFLEX: Final = 33
LEFTSHIFT = 34 LEFTSHIFT: Final = 34
RIGHTSHIFT = 35 RIGHTSHIFT: Final = 35
DOUBLESTAR = 36 DOUBLESTAR: Final = 36
PLUSEQUAL = 37 PLUSEQUAL: Final = 37
MINEQUAL = 38 MINEQUAL: Final = 38
STAREQUAL = 39 STAREQUAL: Final = 39
SLASHEQUAL = 40 SLASHEQUAL: Final = 40
PERCENTEQUAL = 41 PERCENTEQUAL: Final = 41
AMPEREQUAL = 42 AMPEREQUAL: Final = 42
VBAREQUAL = 43 VBAREQUAL: Final = 43
CIRCUMFLEXEQUAL = 44 CIRCUMFLEXEQUAL: Final = 44
LEFTSHIFTEQUAL = 45 LEFTSHIFTEQUAL: Final = 45
RIGHTSHIFTEQUAL = 46 RIGHTSHIFTEQUAL: Final = 46
DOUBLESTAREQUAL = 47 DOUBLESTAREQUAL: Final = 47
DOUBLESLASH = 48 DOUBLESLASH: Final = 48
DOUBLESLASHEQUAL = 49 DOUBLESLASHEQUAL: Final = 49
AT = 50 AT: Final = 50
ATEQUAL = 51 ATEQUAL: Final = 51
OP = 52 OP: Final = 52
COMMENT = 53 COMMENT: Final = 53
NL = 54 NL: Final = 54
RARROW = 55 RARROW: Final = 55
AWAIT = 56 AWAIT: Final = 56
ASYNC = 57 ASYNC: Final = 57
ERRORTOKEN = 58 ERRORTOKEN: Final = 58
COLONEQUAL = 59 COLONEQUAL: Final = 59
N_TOKENS = 60 N_TOKENS: Final = 60
NT_OFFSET = 256 NT_OFFSET: Final = 256
# --end constants-- # --end constants--
tok_name = {} tok_name: Final[Dict[int, str]] = {}
for _name, _value in list(globals().items()): for _name, _value in list(globals().items()):
if type(_value) is type(0): if type(_value) is type(0):
tok_name[_value] = _name tok_name[_value] = _name
def ISTERMINAL(x): def ISTERMINAL(x: int) -> bool:
return x < NT_OFFSET return x < NT_OFFSET
def ISNONTERMINAL(x): def ISNONTERMINAL(x: int) -> bool:
return x >= NT_OFFSET return x >= NT_OFFSET
def ISEOF(x): def ISEOF(x: int) -> bool:
return x == ENDMARKER return x == ENDMARKER

View File

@ -1,74 +0,0 @@
# Stubs for lib2to3.pgen2.token (Python 3.6)
import sys
from typing import Dict, Text
ENDMARKER: int
NAME: int
NUMBER: int
STRING: int
NEWLINE: int
INDENT: int
DEDENT: int
LPAR: int
RPAR: int
LSQB: int
RSQB: int
COLON: int
COMMA: int
SEMI: int
PLUS: int
MINUS: int
STAR: int
SLASH: int
VBAR: int
AMPER: int
LESS: int
GREATER: int
EQUAL: int
DOT: int
PERCENT: int
BACKQUOTE: int
LBRACE: int
RBRACE: int
EQEQUAL: int
NOTEQUAL: int
LESSEQUAL: int
GREATEREQUAL: int
TILDE: int
CIRCUMFLEX: int
LEFTSHIFT: int
RIGHTSHIFT: int
DOUBLESTAR: int
PLUSEQUAL: int
MINEQUAL: int
STAREQUAL: int
SLASHEQUAL: int
PERCENTEQUAL: int
AMPEREQUAL: int
VBAREQUAL: int
CIRCUMFLEXEQUAL: int
LEFTSHIFTEQUAL: int
RIGHTSHIFTEQUAL: int
DOUBLESTAREQUAL: int
DOUBLESLASH: int
DOUBLESLASHEQUAL: int
OP: int
COMMENT: int
NL: int
if sys.version_info >= (3,):
RARROW: int
if sys.version_info >= (3, 5):
AT: int
ATEQUAL: int
AWAIT: int
ASYNC: int
ERRORTOKEN: int
COLONEQUAL: int
N_TOKENS: int
NT_OFFSET: int
tok_name: Dict[int, Text]
def ISTERMINAL(x: int) -> bool: ...
def ISNONTERMINAL(x: int) -> bool: ...
def ISEOF(x: int) -> bool: ...

View File

@ -1,6 +1,8 @@
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
# All rights reserved. # All rights reserved.
# mypy: allow-untyped-defs, allow-untyped-calls
"""Tokenization help for Python programs. """Tokenization help for Python programs.
generate_tokens(readline) is a generator that breaks a stream of generate_tokens(readline) is a generator that breaks a stream of
@ -25,6 +27,21 @@
function to which the 5 fields described above are passed as 5 arguments, function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found.""" each time a new token is found."""
from typing import (
Callable,
Iterable,
Iterator,
List,
Optional,
Text,
Tuple,
Pattern,
Union,
cast,
)
from blib2to3.pgen2.token import *
from blib2to3.pgen2.grammar import Grammar
__author__ = "Ka-Ping Yee <ping@lfw.org>" __author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro" __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
@ -41,13 +58,6 @@
] ]
del token del token
try:
bytes
except NameError:
# Support bytes type in Python <= 2.5, so 2to3 turns itself into
# valid Python 3 code.
bytes = str
def group(*choices): def group(*choices):
return "(" + "|".join(choices) + ")" return "(" + "|".join(choices) + ")"
@ -181,7 +191,11 @@ def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for
) )
def tokenize(readline, tokeneater=printtoken): Coord = Tuple[int, int]
TokenEater = Callable[[int, Text, Coord, Coord, Text], None]
def tokenize(readline: Callable[[], Text], tokeneater: TokenEater = printtoken) -> None:
""" """
The tokenize() function accepts two parameters: one representing the The tokenize() function accepts two parameters: one representing the
input stream, and one providing an output mechanism for tokenize(). input stream, and one providing an output mechanism for tokenize().
@ -206,25 +220,36 @@ def tokenize_loop(readline, tokeneater):
tokeneater(*token_info) tokeneater(*token_info)
GoodTokenInfo = Tuple[int, Text, Coord, Coord, Text]
TokenInfo = Union[Tuple[int, str], GoodTokenInfo]
class Untokenizer: class Untokenizer:
def __init__(self):
tokens: List[Text]
prev_row: int
prev_col: int
def __init__(self) -> None:
self.tokens = [] self.tokens = []
self.prev_row = 1 self.prev_row = 1
self.prev_col = 0 self.prev_col = 0
def add_whitespace(self, start): def add_whitespace(self, start: Coord) -> None:
row, col = start row, col = start
assert row <= self.prev_row assert row <= self.prev_row
col_offset = col - self.prev_col col_offset = col - self.prev_col
if col_offset: if col_offset:
self.tokens.append(" " * col_offset) self.tokens.append(" " * col_offset)
def untokenize(self, iterable): def untokenize(self, iterable: Iterable[TokenInfo]) -> Text:
for t in iterable: for t in iterable:
if len(t) == 2: if len(t) == 2:
self.compat(t, iterable) self.compat(cast(Tuple[int, str], t), iterable)
break break
tok_type, token, start, end, line = t tok_type, token, start, end, line = cast(
Tuple[int, Text, Coord, Coord, Text], t
)
self.add_whitespace(start) self.add_whitespace(start)
self.tokens.append(token) self.tokens.append(token)
self.prev_row, self.prev_col = end self.prev_row, self.prev_col = end
@ -233,7 +258,7 @@ def untokenize(self, iterable):
self.prev_col = 0 self.prev_col = 0
return "".join(self.tokens) return "".join(self.tokens)
def compat(self, token, iterable): def compat(self, token: Tuple[int, Text], iterable: Iterable[TokenInfo]) -> None:
startline = False startline = False
indents = [] indents = []
toks_append = self.tokens.append toks_append = self.tokens.append
@ -266,7 +291,7 @@ def compat(self, token, iterable):
blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII) blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
def _get_normal_name(orig_enc): def _get_normal_name(orig_enc: str) -> str:
"""Imitates get_normal_name in tokenizer.c.""" """Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters. # Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-") enc = orig_enc[:12].lower().replace("_", "-")
@ -279,7 +304,7 @@ def _get_normal_name(orig_enc):
return orig_enc return orig_enc
def detect_encoding(readline): def detect_encoding(readline: Callable[[], bytes]) -> Tuple[str, List[bytes]]:
""" """
The detect_encoding() function is used to detect the encoding that should The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argument, readline, be used to decode a Python source file. It requires one argument, readline,
@ -301,13 +326,13 @@ def detect_encoding(readline):
encoding = None encoding = None
default = "utf-8" default = "utf-8"
def read_or_stop(): def read_or_stop() -> bytes:
try: try:
return readline() return readline()
except StopIteration: except StopIteration:
return bytes() return bytes()
def find_cookie(line): def find_cookie(line: bytes) -> Optional[str]:
try: try:
line_string = line.decode("ascii") line_string = line.decode("ascii")
except UnicodeDecodeError: except UnicodeDecodeError:
@ -354,7 +379,7 @@ def find_cookie(line):
return default, [first, second] return default, [first, second]
def untokenize(iterable): def untokenize(iterable: Iterable[TokenInfo]) -> Text:
"""Transform tokens back into Python source code. """Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence Each element returned by the iterable must be a token sequence
@ -376,7 +401,9 @@ def untokenize(iterable):
return ut.untokenize(iterable) return ut.untokenize(iterable)
def generate_tokens(readline, grammar=None): def generate_tokens(
readline: Callable[[], Text], grammar: Optional[Grammar] = None
) -> Iterator[GoodTokenInfo]:
""" """
The generate_tokens() generator requires one argument, readline, which The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the must be a callable object which provides the same interface as the
@ -395,7 +422,7 @@ def generate_tokens(readline, grammar=None):
lnum = parenlev = continued = 0 lnum = parenlev = continued = 0
numchars = "0123456789" numchars = "0123456789"
contstr, needcont = "", 0 contstr, needcont = "", 0
contline = None contline: Optional[str] = None
indents = [0] indents = [0]
# If we know we're parsing 3.7+, we can unconditionally parse `async` and # If we know we're parsing 3.7+, we can unconditionally parse `async` and
@ -407,6 +434,9 @@ def generate_tokens(readline, grammar=None):
async_def_indent = 0 async_def_indent = 0
async_def_nl = False async_def_nl = False
strstart: Tuple[int, int]
endprog: Pattern[str]
while 1: # loop over lines in stream while 1: # loop over lines in stream
try: try:
line = readline() line = readline()
@ -416,6 +446,7 @@ def generate_tokens(readline, grammar=None):
pos, max = 0, len(line) pos, max = 0, len(line)
if contstr: # continued string if contstr: # continued string
assert contline is not None
if not line: if not line:
raise TokenError("EOF in multi-line string", strstart) raise TokenError("EOF in multi-line string", strstart)
endmatch = endprog.match(line) endmatch = endprog.match(line)

View File

@ -1,32 +0,0 @@
# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
# NOTE: Only elements from __all__ are present.
from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple
from blib2to3.pgen2.token import * # noqa
from blib2to3.pygram import Grammar
_Coord = Tuple[int, int]
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
class TokenError(Exception): ...
class StopTokenizing(Exception): ...
def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ...
class Untokenizer:
tokens: List[Text]
prev_row: int
prev_col: int
def __init__(self) -> None: ...
def add_whitespace(self, start: _Coord) -> None: ...
def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ...
def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ...
def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
def generate_tokens(
readline: Callable[[], Text],
grammar: Optional[Grammar] = ...
) -> Iterator[_TokenInfo]: ...

View File

@ -6,17 +6,23 @@
# Python imports # Python imports
import os import os
from typing import Union
# Local imports # Local imports
from .pgen2 import token from .pgen2 import token
from .pgen2 import driver from .pgen2 import driver
# The grammar file from .pgen2.grammar import Grammar
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt") # Moved into initialize because mypyc can't handle __file__ (XXX bug)
# # The grammar file
# _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
# _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
# "PatternGrammar.txt")
class Symbols(object): class Symbols(object):
def __init__(self, grammar): def __init__(self, grammar: Grammar) -> None:
"""Initializer. """Initializer.
Creates an attribute for each grammar symbol (nonterminal), Creates an attribute for each grammar symbol (nonterminal),
@ -26,7 +32,129 @@ def __init__(self, grammar):
setattr(self, name, symbol) setattr(self, name, symbol)
def initialize(cache_dir=None): class _python_symbols(Symbols):
and_expr: int
and_test: int
annassign: int
arglist: int
argument: int
arith_expr: int
assert_stmt: int
async_funcdef: int
async_stmt: int
atom: int
augassign: int
break_stmt: int
classdef: int
comp_for: int
comp_if: int
comp_iter: int
comp_op: int
comparison: int
compound_stmt: int
continue_stmt: int
decorated: int
decorator: int
decorators: int
del_stmt: int
dictsetmaker: int
dotted_as_name: int
dotted_as_names: int
dotted_name: int
encoding_decl: int
eval_input: int
except_clause: int
exec_stmt: int
expr: int
expr_stmt: int
exprlist: int
factor: int
file_input: int
flow_stmt: int
for_stmt: int
funcdef: int
global_stmt: int
if_stmt: int
import_as_name: int
import_as_names: int
import_from: int
import_name: int
import_stmt: int
lambdef: int
listmaker: int
namedexpr_test: int
not_test: int
old_comp_for: int
old_comp_if: int
old_comp_iter: int
old_lambdef: int
old_test: int
or_test: int
parameters: int
pass_stmt: int
power: int
print_stmt: int
raise_stmt: int
return_stmt: int
shift_expr: int
simple_stmt: int
single_input: int
sliceop: int
small_stmt: int
star_expr: int
stmt: int
subscript: int
subscriptlist: int
suite: int
term: int
test: int
testlist: int
testlist1: int
testlist_gexp: int
testlist_safe: int
testlist_star_expr: int
tfpdef: int
tfplist: int
tname: int
trailer: int
try_stmt: int
typedargslist: int
varargslist: int
vfpdef: int
vfplist: int
vname: int
while_stmt: int
with_item: int
with_stmt: int
with_var: int
xor_expr: int
yield_arg: int
yield_expr: int
yield_stmt: int
class _pattern_symbols(Symbols):
Alternative: int
Alternatives: int
Details: int
Matcher: int
NegatedUnit: int
Repeater: int
Unit: int
python_grammar: Grammar
python_grammar_no_print_statement: Grammar
python_grammar_no_print_statement_no_exec_statement: Grammar
python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar
python_grammar_no_exec_statement: Grammar
pattern_grammar: Grammar
python_symbols: _python_symbols
pattern_symbols: _pattern_symbols
def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None:
global python_grammar global python_grammar
global python_grammar_no_print_statement global python_grammar_no_print_statement
global python_grammar_no_print_statement_no_exec_statement global python_grammar_no_print_statement_no_exec_statement
@ -35,10 +163,16 @@ def initialize(cache_dir=None):
global pattern_grammar global pattern_grammar
global pattern_symbols global pattern_symbols
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(
os.path.dirname(__file__), "PatternGrammar.txt"
)
# Python 2 # Python 2
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir) python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
python_symbols = Symbols(python_grammar) python_symbols = _python_symbols(python_grammar)
# Python 2 + from __future__ import print_function # Python 2 + from __future__ import print_function
python_grammar_no_print_statement = python_grammar.copy() python_grammar_no_print_statement = python_grammar.copy()
@ -60,4 +194,4 @@ def initialize(cache_dir=None):
pattern_grammar = driver.load_packaged_grammar( pattern_grammar = driver.load_packaged_grammar(
"blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
) )
pattern_symbols = Symbols(pattern_grammar) pattern_symbols = _pattern_symbols(pattern_grammar)

View File

@ -1,126 +0,0 @@
# Stubs for lib2to3.pygram (Python 3.6)
import os
from typing import Any, Union
from blib2to3.pgen2.grammar import Grammar
class Symbols:
def __init__(self, grammar: Grammar) -> None: ...
class python_symbols(Symbols):
and_expr: int
and_test: int
annassign: int
arglist: int
argument: int
arith_expr: int
assert_stmt: int
async_funcdef: int
async_stmt: int
atom: int
augassign: int
break_stmt: int
classdef: int
comp_for: int
comp_if: int
comp_iter: int
comp_op: int
comparison: int
compound_stmt: int
continue_stmt: int
decorated: int
decorator: int
decorators: int
del_stmt: int
dictsetmaker: int
dotted_as_name: int
dotted_as_names: int
dotted_name: int
encoding_decl: int
eval_input: int
except_clause: int
exec_stmt: int
expr: int
expr_stmt: int
exprlist: int
factor: int
file_input: int
flow_stmt: int
for_stmt: int
funcdef: int
global_stmt: int
if_stmt: int
import_as_name: int
import_as_names: int
import_from: int
import_name: int
import_stmt: int
lambdef: int
listmaker: int
namedexpr_test: int
not_test: int
old_comp_for: int
old_comp_if: int
old_comp_iter: int
old_lambdef: int
old_test: int
or_test: int
parameters: int
pass_stmt: int
power: int
print_stmt: int
raise_stmt: int
return_stmt: int
shift_expr: int
simple_stmt: int
single_input: int
sliceop: int
small_stmt: int
star_expr: int
stmt: int
subscript: int
subscriptlist: int
suite: int
term: int
test: int
testlist: int
testlist1: int
testlist_gexp: int
testlist_safe: int
testlist_star_expr: int
tfpdef: int
tfplist: int
tname: int
trailer: int
try_stmt: int
typedargslist: int
varargslist: int
vfpdef: int
vfplist: int
vname: int
while_stmt: int
with_item: int
with_stmt: int
with_var: int
xor_expr: int
yield_arg: int
yield_expr: int
yield_stmt: int
class pattern_symbols(Symbols):
Alternative: int
Alternatives: int
Details: int
Matcher: int
NegatedUnit: int
Repeater: int
Unit: int
python_grammar: Grammar
python_grammar_no_print_statement: Grammar
python_grammar_no_print_statement_no_exec_statement: Grammar
python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar
python_grammar_no_exec_statement: Grammar
pattern_grammar: Grammar
def initialize(cache_dir: Union[str, os.PathLike, None]) -> None: ...

View File

@ -10,29 +10,56 @@
There's also a pattern matching implementation here. There's also a pattern matching implementation here.
""" """
# mypy: allow-untyped-defs
from typing import (
Any,
Callable,
Dict,
Iterator,
List,
Optional,
Text,
Tuple,
TypeVar,
Union,
Set,
Iterable,
Sequence,
)
from blib2to3.pgen2.grammar import Grammar
__author__ = "Guido van Rossum <guido@python.org>" __author__ = "Guido van Rossum <guido@python.org>"
import sys import sys
from io import StringIO from io import StringIO
HUGE = 0x7FFFFFFF # maximum repeat count, default max HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs = {} _type_reprs: Dict[int, Union[Text, int]] = {}
def type_repr(type_num): def type_repr(type_num: int) -> Union[Text, int]:
global _type_reprs global _type_reprs
if not _type_reprs: if not _type_reprs:
from .pygram import python_symbols from .pygram import python_symbols
# printing tokens is possible but not as useful # printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items(): # from .pgen2 import token // token.__dict__.items():
for name, val in python_symbols.__dict__.items(): for name in dir(python_symbols):
val = getattr(python_symbols, name)
if type(val) == int: if type(val) == int:
_type_reprs[val] = name _type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num) return _type_reprs.setdefault(type_num, type_num)
_P = TypeVar("_P")
NL = Union["Node", "Leaf"]
Context = Tuple[Text, Tuple[int, int]]
RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
class Base(object): class Base(object):
""" """
@ -45,18 +72,18 @@ class Base(object):
""" """
# Default values for instance variables # Default values for instance variables
type = None # int: token number (< 256) or symbol number (>= 256) type: int # int: token number (< 256) or symbol number (>= 256)
parent = None # Parent node pointer, or None parent: Optional["Node"] = None # Parent node pointer, or None
children = () # Tuple of subnodes children: List[NL] # List of subnodes
was_changed = False was_changed: bool = False
was_checked = False was_checked: bool = False
def __new__(cls, *args, **kwds): def __new__(cls, *args, **kwds):
"""Constructor that prevents Base from being instantiated.""" """Constructor that prevents Base from being instantiated."""
assert cls is not Base, "Cannot instantiate Base" assert cls is not Base, "Cannot instantiate Base"
return object.__new__(cls) return object.__new__(cls)
def __eq__(self, other): def __eq__(self, other: Any) -> bool:
""" """
Compare two nodes for equality. Compare two nodes for equality.
@ -66,9 +93,13 @@ def __eq__(self, other):
return NotImplemented return NotImplemented
return self._eq(other) return self._eq(other)
__hash__ = None # For Py3 compatibility. __hash__ = None # type: Any # For Py3 compatibility.
def _eq(self, other): @property
def prefix(self) -> Text:
raise NotImplementedError
def _eq(self: _P, other: _P) -> bool:
""" """
Compare two nodes for equality. Compare two nodes for equality.
@ -79,7 +110,7 @@ def _eq(self, other):
""" """
raise NotImplementedError raise NotImplementedError
def clone(self): def clone(self: _P) -> _P:
""" """
Return a cloned (deep) copy of self. Return a cloned (deep) copy of self.
@ -87,7 +118,7 @@ def clone(self):
""" """
raise NotImplementedError raise NotImplementedError
def post_order(self): def post_order(self) -> Iterator[NL]:
""" """
Return a post-order iterator for the tree. Return a post-order iterator for the tree.
@ -95,7 +126,7 @@ def post_order(self):
""" """
raise NotImplementedError raise NotImplementedError
def pre_order(self): def pre_order(self) -> Iterator[NL]:
""" """
Return a pre-order iterator for the tree. Return a pre-order iterator for the tree.
@ -103,7 +134,7 @@ def pre_order(self):
""" """
raise NotImplementedError raise NotImplementedError
def replace(self, new): def replace(self, new: Union[NL, List[NL]]) -> None:
"""Replace this node with a new one in the parent.""" """Replace this node with a new one in the parent."""
assert self.parent is not None, str(self) assert self.parent is not None, str(self)
assert new is not None assert new is not None
@ -127,23 +158,23 @@ def replace(self, new):
x.parent = self.parent x.parent = self.parent
self.parent = None self.parent = None
def get_lineno(self): def get_lineno(self) -> Optional[int]:
"""Return the line number which generated the invocant node.""" """Return the line number which generated the invocant node."""
node = self node = self
while not isinstance(node, Leaf): while not isinstance(node, Leaf):
if not node.children: if not node.children:
return return None
node = node.children[0] node = node.children[0]
return node.lineno return node.lineno
def changed(self): def changed(self) -> None:
if self.was_changed: if self.was_changed:
return return
if self.parent: if self.parent:
self.parent.changed() self.parent.changed()
self.was_changed = True self.was_changed = True
def remove(self): def remove(self) -> Optional[int]:
""" """
Remove the node from the tree. Returns the position of the node in its Remove the node from the tree. Returns the position of the node in its
parent's children before it was removed. parent's children before it was removed.
@ -156,9 +187,10 @@ def remove(self):
self.parent.invalidate_sibling_maps() self.parent.invalidate_sibling_maps()
self.parent = None self.parent = None
return i return i
return None
@property @property
def next_sibling(self): def next_sibling(self) -> Optional[NL]:
""" """
The node immediately following the invocant in their parent's children The node immediately following the invocant in their parent's children
list. If the invocant does not have a next sibling, it is None list. If the invocant does not have a next sibling, it is None
@ -168,10 +200,11 @@ def next_sibling(self):
if self.parent.next_sibling_map is None: if self.parent.next_sibling_map is None:
self.parent.update_sibling_maps() self.parent.update_sibling_maps()
assert self.parent.next_sibling_map is not None
return self.parent.next_sibling_map[id(self)] return self.parent.next_sibling_map[id(self)]
@property @property
def prev_sibling(self): def prev_sibling(self) -> Optional[NL]:
""" """
The node immediately preceding the invocant in their parent's children The node immediately preceding the invocant in their parent's children
list. If the invocant does not have a previous sibling, it is None. list. If the invocant does not have a previous sibling, it is None.
@ -181,18 +214,19 @@ def prev_sibling(self):
if self.parent.prev_sibling_map is None: if self.parent.prev_sibling_map is None:
self.parent.update_sibling_maps() self.parent.update_sibling_maps()
assert self.parent.prev_sibling_map is not None
return self.parent.prev_sibling_map[id(self)] return self.parent.prev_sibling_map[id(self)]
def leaves(self): def leaves(self) -> Iterator["Leaf"]:
for child in self.children: for child in self.children:
yield from child.leaves() yield from child.leaves()
def depth(self): def depth(self) -> int:
if self.parent is None: if self.parent is None:
return 0 return 0
return 1 + self.parent.depth() return 1 + self.parent.depth()
def get_suffix(self): def get_suffix(self) -> Text:
""" """
Return the string immediately following the invocant node. This is Return the string immediately following the invocant node. This is
effectively equivalent to node.next_sibling.prefix effectively equivalent to node.next_sibling.prefix
@ -200,19 +234,25 @@ def get_suffix(self):
next_sib = self.next_sibling next_sib = self.next_sibling
if next_sib is None: if next_sib is None:
return "" return ""
return next_sib.prefix prefix = next_sib.prefix
return prefix
if sys.version_info < (3, 0):
def __str__(self):
return str(self).encode("ascii")
class Node(Base): class Node(Base):
"""Concrete implementation for interior nodes.""" """Concrete implementation for interior nodes."""
def __init__(self, type, children, context=None, prefix=None, fixers_applied=None): fixers_applied: Optional[List[Any]]
used_names: Optional[Set[Text]]
def __init__(
self,
type: int,
children: List[NL],
context: Optional[Any] = None,
prefix: Optional[Text] = None,
fixers_applied: Optional[List[Any]] = None,
) -> None:
""" """
Initializer. Initializer.
@ -235,15 +275,16 @@ def __init__(self, type, children, context=None, prefix=None, fixers_applied=Non
else: else:
self.fixers_applied = None self.fixers_applied = None
def __repr__(self): def __repr__(self) -> Text:
"""Return a canonical string representation.""" """Return a canonical string representation."""
assert self.type is not None
return "%s(%s, %r)" % ( return "%s(%s, %r)" % (
self.__class__.__name__, self.__class__.__name__,
type_repr(self.type), type_repr(self.type),
self.children, self.children,
) )
def __unicode__(self): def __str__(self) -> Text:
""" """
Return a pretty string representation. Return a pretty string representation.
@ -251,14 +292,12 @@ def __unicode__(self):
""" """
return "".join(map(str, self.children)) return "".join(map(str, self.children))
if sys.version_info > (3, 0): def _eq(self, other) -> bool:
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality.""" """Compare two nodes for equality."""
return (self.type, self.children) == (other.type, other.children) return (self.type, self.children) == (other.type, other.children)
def clone(self): def clone(self) -> "Node":
assert self.type is not None
"""Return a cloned (deep) copy of self.""" """Return a cloned (deep) copy of self."""
return Node( return Node(
self.type, self.type,
@ -266,20 +305,20 @@ def clone(self):
fixers_applied=self.fixers_applied, fixers_applied=self.fixers_applied,
) )
def post_order(self): def post_order(self) -> Iterator[NL]:
"""Return a post-order iterator for the tree.""" """Return a post-order iterator for the tree."""
for child in self.children: for child in self.children:
yield from child.post_order() yield from child.post_order()
yield self yield self
def pre_order(self): def pre_order(self) -> Iterator[NL]:
"""Return a pre-order iterator for the tree.""" """Return a pre-order iterator for the tree."""
yield self yield self
for child in self.children: for child in self.children:
yield from child.pre_order() yield from child.pre_order()
@property @property
def prefix(self): def prefix(self) -> Text:
""" """
The whitespace and comments preceding this node in the input. The whitespace and comments preceding this node in the input.
""" """
@ -288,11 +327,11 @@ def prefix(self):
return self.children[0].prefix return self.children[0].prefix
@prefix.setter @prefix.setter
def prefix(self, prefix): def prefix(self, prefix) -> None:
if self.children: if self.children:
self.children[0].prefix = prefix self.children[0].prefix = prefix
def set_child(self, i, child): def set_child(self, i: int, child: NL) -> None:
""" """
Equivalent to 'node.children[i] = child'. This method also sets the Equivalent to 'node.children[i] = child'. This method also sets the
child's parent attribute appropriately. child's parent attribute appropriately.
@ -303,7 +342,7 @@ def set_child(self, i, child):
self.changed() self.changed()
self.invalidate_sibling_maps() self.invalidate_sibling_maps()
def insert_child(self, i, child): def insert_child(self, i: int, child: NL) -> None:
""" """
Equivalent to 'node.children.insert(i, child)'. This method also sets Equivalent to 'node.children.insert(i, child)'. This method also sets
the child's parent attribute appropriately. the child's parent attribute appropriately.
@ -313,7 +352,7 @@ def insert_child(self, i, child):
self.changed() self.changed()
self.invalidate_sibling_maps() self.invalidate_sibling_maps()
def append_child(self, child): def append_child(self, child: NL) -> None:
""" """
Equivalent to 'node.children.append(child)'. This method also sets the Equivalent to 'node.children.append(child)'. This method also sets the
child's parent attribute appropriately. child's parent attribute appropriately.
@ -323,14 +362,16 @@ def append_child(self, child):
self.changed() self.changed()
self.invalidate_sibling_maps() self.invalidate_sibling_maps()
def invalidate_sibling_maps(self): def invalidate_sibling_maps(self) -> None:
self.prev_sibling_map = None self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
self.next_sibling_map = None self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
def update_sibling_maps(self): def update_sibling_maps(self) -> None:
self.prev_sibling_map = _prev = {} _prev: Dict[int, Optional[NL]] = {}
self.next_sibling_map = _next = {} _next: Dict[int, Optional[NL]] = {}
previous = None self.prev_sibling_map = _prev
self.next_sibling_map = _next
previous: Optional[NL] = None
for current in self.children: for current in self.children:
_prev[id(current)] = previous _prev[id(current)] = previous
_next[id(previous)] = current _next[id(previous)] = current
@ -343,17 +384,30 @@ class Leaf(Base):
"""Concrete implementation for leaf nodes.""" """Concrete implementation for leaf nodes."""
# Default values for instance variables # Default values for instance variables
value: Text
fixers_applied: List[Any]
bracket_depth: int
opening_bracket: "Leaf"
used_names: Optional[Set[Text]]
_prefix = "" # Whitespace and comments preceding this token in the input _prefix = "" # Whitespace and comments preceding this token in the input
lineno = 0 # Line where this token starts in the input lineno: int = 0 # Line where this token starts in the input
column = 0 # Column where this token starts in the input column: int = 0 # Column where this token starts in the input
def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]): def __init__(
self,
type: int,
value: Text,
context: Optional[Context] = None,
prefix: Optional[Text] = None,
fixers_applied: List[Any] = [],
) -> None:
""" """
Initializer. Initializer.
Takes a type constant (a token number < 256), a string value, and an Takes a type constant (a token number < 256), a string value, and an
optional context keyword argument. optional context keyword argument.
""" """
assert 0 <= type < 256, type assert 0 <= type < 256, type
if context is not None: if context is not None:
self._prefix, (self.lineno, self.column) = context self._prefix, (self.lineno, self.column) = context
@ -361,19 +415,21 @@ def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
self.value = value self.value = value
if prefix is not None: if prefix is not None:
self._prefix = prefix self._prefix = prefix
self.fixers_applied = fixers_applied[:] self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
self.children = []
def __repr__(self): def __repr__(self) -> str:
"""Return a canonical string representation.""" """Return a canonical string representation."""
from .pgen2.token import tok_name from .pgen2.token import tok_name
assert self.type is not None
return "%s(%s, %r)" % ( return "%s(%s, %r)" % (
self.__class__.__name__, self.__class__.__name__,
tok_name.get(self.type, self.type), tok_name.get(self.type, self.type),
self.value, self.value,
) )
def __unicode__(self): def __str__(self) -> Text:
""" """
Return a pretty string representation. Return a pretty string representation.
@ -381,14 +437,12 @@ def __unicode__(self):
""" """
return self.prefix + str(self.value) return self.prefix + str(self.value)
if sys.version_info > (3, 0): def _eq(self, other) -> bool:
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality.""" """Compare two nodes for equality."""
return (self.type, self.value) == (other.type, other.value) return (self.type, self.value) == (other.type, other.value)
def clone(self): def clone(self) -> "Leaf":
assert self.type is not None
"""Return a cloned (deep) copy of self.""" """Return a cloned (deep) copy of self."""
return Leaf( return Leaf(
self.type, self.type,
@ -397,31 +451,31 @@ def clone(self):
fixers_applied=self.fixers_applied, fixers_applied=self.fixers_applied,
) )
def leaves(self): def leaves(self) -> Iterator["Leaf"]:
yield self yield self
def post_order(self): def post_order(self) -> Iterator["Leaf"]:
"""Return a post-order iterator for the tree.""" """Return a post-order iterator for the tree."""
yield self yield self
def pre_order(self): def pre_order(self) -> Iterator["Leaf"]:
"""Return a pre-order iterator for the tree.""" """Return a pre-order iterator for the tree."""
yield self yield self
@property @property
def prefix(self): def prefix(self) -> Text:
""" """
The whitespace and comments preceding this token in the input. The whitespace and comments preceding this token in the input.
""" """
return self._prefix return self._prefix
@prefix.setter @prefix.setter
def prefix(self, prefix): def prefix(self, prefix) -> None:
self.changed() self.changed()
self._prefix = prefix self._prefix = prefix
def convert(gr, raw_node): def convert(gr: Grammar, raw_node: RawNode) -> NL:
""" """
Convert raw node information to a Node or Leaf instance. Convert raw node information to a Node or Leaf instance.
@ -433,11 +487,15 @@ def convert(gr, raw_node):
if children or type in gr.number2symbol: if children or type in gr.number2symbol:
# If there's exactly one child, return that child instead of # If there's exactly one child, return that child instead of
# creating a new node. # creating a new node.
assert children is not None
if len(children) == 1: if len(children) == 1:
return children[0] return children[0]
return Node(type, children, context=context) return Node(type, children, context=context)
else: else:
return Leaf(type, value, context=context) return Leaf(type, value or "", context=context)
_Results = Dict[Text, NL]
class BasePattern(object): class BasePattern(object):
@ -457,22 +515,27 @@ class BasePattern(object):
""" """
# Defaults for instance variables # Defaults for instance variables
type: Optional[int]
type = None # Node type (token if < 256, symbol if >= 256) type = None # Node type (token if < 256, symbol if >= 256)
content = None # Optional content matching pattern content: Any = None # Optional content matching pattern
name = None # Optional name used to store match in results dict name: Optional[Text] = None # Optional name used to store match in results dict
def __new__(cls, *args, **kwds): def __new__(cls, *args, **kwds):
"""Constructor that prevents BasePattern from being instantiated.""" """Constructor that prevents BasePattern from being instantiated."""
assert cls is not BasePattern, "Cannot instantiate BasePattern" assert cls is not BasePattern, "Cannot instantiate BasePattern"
return object.__new__(cls) return object.__new__(cls)
def __repr__(self): def __repr__(self) -> Text:
assert self.type is not None
args = [type_repr(self.type), self.content, self.name] args = [type_repr(self.type), self.content, self.name]
while args and args[-1] is None: while args and args[-1] is None:
del args[-1] del args[-1]
return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
def optimize(self): def _submatch(self, node, results=None) -> bool:
raise NotImplementedError
def optimize(self) -> "BasePattern":
""" """
A subclass can define this as a hook for optimizations. A subclass can define this as a hook for optimizations.
@ -480,7 +543,7 @@ def optimize(self):
""" """
return self return self
def match(self, node, results=None): def match(self, node: NL, results: Optional[_Results] = None) -> bool:
""" """
Does this pattern exactly match a node? Does this pattern exactly match a node?
@ -494,18 +557,19 @@ def match(self, node, results=None):
if self.type is not None and node.type != self.type: if self.type is not None and node.type != self.type:
return False return False
if self.content is not None: if self.content is not None:
r = None r: Optional[_Results] = None
if results is not None: if results is not None:
r = {} r = {}
if not self._submatch(node, r): if not self._submatch(node, r):
return False return False
if r: if r:
assert results is not None
results.update(r) results.update(r)
if results is not None and self.name: if results is not None and self.name:
results[self.name] = node results[self.name] = node
return True return True
def match_seq(self, nodes, results=None): def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
""" """
Does this pattern exactly match a sequence of nodes? Does this pattern exactly match a sequence of nodes?
@ -515,19 +579,24 @@ def match_seq(self, nodes, results=None):
return False return False
return self.match(nodes[0], results) return self.match(nodes[0], results)
def generate_matches(self, nodes): def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
""" """
Generator yielding all matches for this pattern. Generator yielding all matches for this pattern.
Default implementation for non-wildcard patterns. Default implementation for non-wildcard patterns.
""" """
r = {} r: _Results = {}
if nodes and self.match(nodes[0], r): if nodes and self.match(nodes[0], r):
yield 1, r yield 1, r
class LeafPattern(BasePattern): class LeafPattern(BasePattern):
def __init__(self, type=None, content=None, name=None): def __init__(
self,
type: Optional[int] = None,
content: Optional[Text] = None,
name: Optional[Text] = None,
) -> None:
""" """
Initializer. Takes optional type, content, and name. Initializer. Takes optional type, content, and name.
@ -547,7 +616,7 @@ def __init__(self, type=None, content=None, name=None):
self.content = content self.content = content
self.name = name self.name = name
def match(self, node, results=None): def match(self, node: NL, results=None):
"""Override match() to insist on a leaf node.""" """Override match() to insist on a leaf node."""
if not isinstance(node, Leaf): if not isinstance(node, Leaf):
return False return False
@ -571,9 +640,14 @@ def _submatch(self, node, results=None):
class NodePattern(BasePattern): class NodePattern(BasePattern):
wildcards = False wildcards: bool = False
def __init__(self, type=None, content=None, name=None): def __init__(
self,
type: Optional[int] = None,
content: Optional[Iterable[Text]] = None,
name: Optional[Text] = None,
) -> None:
""" """
Initializer. Takes optional type, content, and name. Initializer. Takes optional type, content, and name.
@ -593,16 +667,16 @@ def __init__(self, type=None, content=None, name=None):
assert type >= 256, type assert type >= 256, type
if content is not None: if content is not None:
assert not isinstance(content, str), repr(content) assert not isinstance(content, str), repr(content)
content = list(content) newcontent = list(content)
for i, item in enumerate(content): for i, item in enumerate(newcontent):
assert isinstance(item, BasePattern), (i, item) assert isinstance(item, BasePattern), (i, item)
if isinstance(item, WildcardPattern): if isinstance(item, WildcardPattern):
self.wildcards = True self.wildcards = True
self.type = type self.type = type
self.content = content self.content = newcontent
self.name = name self.name = name
def _submatch(self, node, results=None): def _submatch(self, node, results=None) -> bool:
""" """
Match the pattern's content to the node's children. Match the pattern's content to the node's children.
@ -644,7 +718,16 @@ class WildcardPattern(BasePattern):
except it always uses non-greedy matching. except it always uses non-greedy matching.
""" """
def __init__(self, content=None, min=0, max=HUGE, name=None): min: int
max: int
def __init__(
self,
content: Optional[Text] = None,
min: int = 0,
max: int = HUGE,
name: Optional[Text] = None,
) -> None:
""" """
Initializer. Initializer.
@ -669,17 +752,20 @@ def __init__(self, content=None, min=0, max=HUGE, name=None):
""" """
assert 0 <= min <= max <= HUGE, (min, max) assert 0 <= min <= max <= HUGE, (min, max)
if content is not None: if content is not None:
content = tuple(map(tuple, content)) # Protect against alterations f = lambda s: tuple(s)
wrapped_content = tuple(map(f, content)) # Protect against alterations
# Check sanity of alternatives # Check sanity of alternatives
assert len(content), repr(content) # Can't have zero alternatives assert len(wrapped_content), repr(
for alt in content: wrapped_content
) # Can't have zero alternatives
for alt in wrapped_content:
assert len(alt), repr(alt) # Can have empty alternatives assert len(alt), repr(alt) # Can have empty alternatives
self.content = content self.content = wrapped_content
self.min = min self.min = min
self.max = max self.max = max
self.name = name self.name = name
def optimize(self): def optimize(self) -> Any:
"""Optimize certain stacked wildcard patterns.""" """Optimize certain stacked wildcard patterns."""
subpattern = None subpattern = None
if ( if (
@ -707,11 +793,11 @@ def optimize(self):
) )
return self return self
def match(self, node, results=None): def match(self, node, results=None) -> bool:
"""Does this pattern exactly match a node?""" """Does this pattern exactly match a node?"""
return self.match_seq([node], results) return self.match_seq([node], results)
def match_seq(self, nodes, results=None): def match_seq(self, nodes, results=None) -> bool:
"""Does this pattern exactly match a sequence of nodes?""" """Does this pattern exactly match a sequence of nodes?"""
for c, r in self.generate_matches(nodes): for c, r in self.generate_matches(nodes):
if c == len(nodes): if c == len(nodes):
@ -722,7 +808,7 @@ def match_seq(self, nodes, results=None):
return True return True
return False return False
def generate_matches(self, nodes): def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
""" """
Generator yielding matches for a sequence of nodes. Generator yielding matches for a sequence of nodes.
@ -767,7 +853,7 @@ def generate_matches(self, nodes):
if hasattr(sys, "getrefcount"): if hasattr(sys, "getrefcount"):
sys.stderr = save_stderr sys.stderr = save_stderr
def _iterative_matches(self, nodes): def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
"""Helper to iteratively yield the matches.""" """Helper to iteratively yield the matches."""
nodelen = len(nodes) nodelen = len(nodes)
if 0 >= self.min: if 0 >= self.min:
@ -796,10 +882,10 @@ def _iterative_matches(self, nodes):
new_results.append((c0 + c1, r)) new_results.append((c0 + c1, r))
results = new_results results = new_results
def _bare_name_matches(self, nodes): def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
"""Special optimized matcher for bare_name.""" """Special optimized matcher for bare_name."""
count = 0 count = 0
r = {} r = {} # type: _Results
done = False done = False
max = len(nodes) max = len(nodes)
while not done and count < max: while not done and count < max:
@ -809,10 +895,11 @@ def _bare_name_matches(self, nodes):
count += 1 count += 1
done = False done = False
break break
assert self.name is not None
r[self.name] = nodes[:count] r[self.name] = nodes[:count]
return count, r return count, r
def _recursive_matches(self, nodes, count): def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
"""Helper to recursively yield the matches.""" """Helper to recursively yield the matches."""
assert self.content is not None assert self.content is not None
if count >= self.min: if count >= self.min:
@ -828,7 +915,7 @@ def _recursive_matches(self, nodes, count):
class NegatedPattern(BasePattern): class NegatedPattern(BasePattern):
def __init__(self, content=None): def __init__(self, content: Optional[Any] = None) -> None:
""" """
Initializer. Initializer.
@ -841,15 +928,15 @@ def __init__(self, content=None):
assert isinstance(content, BasePattern), repr(content) assert isinstance(content, BasePattern), repr(content)
self.content = content self.content = content
def match(self, node): def match(self, node, results=None) -> bool:
# We never match a node in its entirety # We never match a node in its entirety
return False return False
def match_seq(self, nodes): def match_seq(self, nodes, results=None) -> bool:
# We only match an empty sequence of nodes in its entirety # We only match an empty sequence of nodes in its entirety
return len(nodes) == 0 return len(nodes) == 0
def generate_matches(self, nodes): def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
if self.content is None: if self.content is None:
# Return a match if there is an empty sequence # Return a match if there is an empty sequence
if len(nodes) == 0: if len(nodes) == 0:
@ -861,7 +948,9 @@ def generate_matches(self, nodes):
yield 0, {} yield 0, {}
def generate_matches(patterns, nodes): def generate_matches(
patterns: List[BasePattern], nodes: List[NL]
) -> Iterator[Tuple[int, _Results]]:
""" """
Generator yielding matches for a sequence of patterns and nodes. Generator yielding matches for a sequence of patterns and nodes.
@ -887,3 +976,6 @@ def generate_matches(patterns, nodes):
r.update(r0) r.update(r0)
r.update(r1) r.update(r1)
yield c0 + c1, r yield c0 + c1, r
_Convert = Callable[[Grammar, RawNode], Any]

View File

@ -1,89 +0,0 @@
# Stubs for lib2to3.pytree (Python 3.6)
import sys
from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union
from blib2to3.pgen2.grammar import Grammar
_P = TypeVar('_P')
_NL = Union[Node, Leaf]
_Context = Tuple[Text, int, int]
_Results = Dict[Text, _NL]
_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]]
_Convert = Callable[[Grammar, _RawNode], Any]
HUGE: int
def type_repr(type_num: int) -> Text: ...
class Base:
type: int
parent: Optional[Node]
prefix: Text
children: List[_NL]
was_changed: bool
was_checked: bool
def __eq__(self, other: Any) -> bool: ...
def _eq(self: _P, other: _P) -> bool: ...
def clone(self: _P) -> _P: ...
def post_order(self) -> Iterator[_NL]: ...
def pre_order(self) -> Iterator[_NL]: ...
def replace(self, new: Union[_NL, List[_NL]]) -> None: ...
def get_lineno(self) -> int: ...
def changed(self) -> None: ...
def remove(self) -> Optional[int]: ...
@property
def next_sibling(self) -> Optional[_NL]: ...
@property
def prev_sibling(self) -> Optional[_NL]: ...
def leaves(self) -> Iterator[Leaf]: ...
def depth(self) -> int: ...
def get_suffix(self) -> Text: ...
if sys.version_info < (3,):
def get_prefix(self) -> Text: ...
def set_prefix(self, prefix: Text) -> None: ...
class Node(Base):
fixers_applied: List[Any]
def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ...
def set_child(self, i: int, child: _NL) -> None: ...
def insert_child(self, i: int, child: _NL) -> None: ...
def append_child(self, child: _NL) -> None: ...
class Leaf(Base):
lineno: int
column: int
value: Text
fixers_applied: List[Any]
def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ...
# bolted on attributes by Black
bracket_depth: int
opening_bracket: Leaf
def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ...
class BasePattern:
type: int
content: Optional[Text]
name: Optional[Text]
def optimize(self) -> BasePattern: ... # sic, subclasses are free to optimize themselves into different patterns
def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ...
def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ...
def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...
class LeafPattern(BasePattern):
def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
class NodePattern(BasePattern):
wildcards: bool
def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
class WildcardPattern(BasePattern):
min: int
max: int
def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ...
class NegatedPattern(BasePattern):
def __init__(self, content: Optional[Text] = ...) -> None: ...
def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...

View File

@ -21,6 +21,7 @@ strict_optional=True
warn_no_return=True warn_no_return=True
warn_redundant_casts=True warn_redundant_casts=True
warn_unused_ignores=True warn_unused_ignores=True
disallow_any_generics=True
# The following are off by default. Flip them on if you feel # The following are off by default. Flip them on if you feel
# adventurous. # adventurous.

View File

@ -1,6 +1,7 @@
# Copyright (C) 2018 Łukasz Langa # Copyright (C) 2018 Łukasz Langa
from setuptools import setup from setuptools import setup
import sys import sys
import os
assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+" assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+"
from pathlib import Path # noqa E402 from pathlib import Path # noqa E402
@ -15,6 +16,33 @@ def get_long_description() -> str:
return ld_file.read() return ld_file.read()
USE_MYPYC = False
# To compile with mypyc, a mypyc checkout must be present on the PYTHONPATH
if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc":
sys.argv.pop(1)
USE_MYPYC = True
if os.getenv("BLACK_USE_MYPYC", None) == "1":
USE_MYPYC = True
if USE_MYPYC:
mypyc_targets = [
"black.py",
"blib2to3/pytree.py",
"blib2to3/pygram.py",
"blib2to3/pgen2/parse.py",
"blib2to3/pgen2/grammar.py",
"blib2to3/pgen2/token.py",
"blib2to3/pgen2/driver.py",
"blib2to3/pgen2/pgen.py",
]
from mypyc.build import mypycify
opt_level = os.getenv("MYPYC_OPT_LEVEL", "3")
ext_modules = mypycify(mypyc_targets, opt_level=opt_level)
else:
ext_modules = []
setup( setup(
name="black", name="black",
use_scm_version={ use_scm_version={
@ -30,6 +58,7 @@ def get_long_description() -> str:
url="https://github.com/psf/black", url="https://github.com/psf/black",
license="MIT", license="MIT",
py_modules=["black", "blackd", "_black_version"], py_modules=["black", "blackd", "_black_version"],
ext_modules=ext_modules,
packages=["blib2to3", "blib2to3.pgen2"], packages=["blib2to3", "blib2to3.pgen2"],
package_data={"blib2to3": ["*.txt"]}, package_data={"blib2to3": ["*.txt"]},
python_requires=">=3.6", python_requires=">=3.6",
@ -43,6 +72,8 @@ def get_long_description() -> str:
"regex", "regex",
"pathspec>=0.6, <1", "pathspec>=0.6, <1",
"dataclasses>=0.6; python_version < '3.7'", "dataclasses>=0.6; python_version < '3.7'",
"typing_extensions>=3.7.4",
"mypy_extensions>=0.4.3",
], ],
extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]}, extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]},
test_suite="tests.test_black", test_suite="tests.test_black",

View File

@ -1540,6 +1540,7 @@ def test_symlink_out_of_root_directory(self) -> None:
# outside of the `root` directory. # outside of the `root` directory.
path.iterdir.return_value = [child] path.iterdir.return_value = [child]
child.resolve.return_value = Path("/a/b/c") child.resolve.return_value = Path("/a/b/c")
child.as_posix.return_value = "/a/b/c"
child.is_symlink.return_value = True child.is_symlink.return_value = True
try: try:
list( list(