Blacken .py files in blib2to3 (#1011)

* Blacken .py files in blib2to3 This is in preparation for adding type annotations to blib2to3 in order to compiling it with mypyc (#1009, which I can rebase on top of this). To enforce that it stays blackened, I just cargo-culted the existing test code used for validating formatting. It feels pretty clunky now, though, so I can abstract the common logic out into a helper if that seems better. (But error messages might be less clear then?) * Tidy up the tests
2019-10-20 06:55:31 -07:00 · 2019-10-20 06:55:31 -07:00 · 0ff718e1e2
commit 0ff718e1e2
parent 3bfb66971f
12 changed files with 457 additions and 319 deletions
--- a/blib2to3/pgen2/conv.py
+++ b/blib2to3/pgen2/conv.py
@ -70,8 +70,7 @@ def parse_graminit_h(self, filename):
            lineno += 1
            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
            if not mo and line.strip():
-                print("%s(%s): can't parse %s" % (filename, lineno,
+                print("%s(%s): can't parse %s" % (filename, lineno, line.strip()))
                                                  line.strip()))
            else:
                symbol, number = mo.groups()
                number = int(number)
@ -129,8 +128,7 @@ def parse_graminit_c(self, filename):
        states = []
        while line.startswith("static arc "):
            while line.startswith("static arc "):
-                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
+                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
                              line)
                assert mo, (lineno, line)
                n, m, k = list(map(int, mo.groups()))
                arcs = []
@ -170,8 +168,7 @@ def parse_graminit_c(self, filename):
        ndfas = int(mo.group(1))
        for i in range(ndfas):
            lineno, line = lineno + 1, next(f)
-            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
+            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
                          line)
            assert mo, (lineno, line)
            symbol = mo.group(2)
            number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
--- a/blib2to3/pgen2/driver.py
+++ b/blib2to3/pgen2/driver.py
@ -28,13 +28,7 @@
 class Driver(object):
-
+    def __init__(self, grammar, convert=None, logger=None):
    def __init__(
        self,
        grammar,
        convert=None,
        logger=None,
    ):
        self.grammar = grammar
        if logger is None:
            logger = logging.getLogger(__name__)
@ -73,8 +67,9 @@ def parse_tokens(self, tokens, debug=False):
            if type == token.OP:
                type = grammar.opmap[value]
            if debug:
-                self.logger.debug("%s %r (prefix=%r)",
+                self.logger.debug(
-                                  token.tok_name[type], value, prefix)
+                    "%s %r (prefix=%r)", token.tok_name[type], value, prefix
                )
            if type == token.INDENT:
                indent_columns.append(len(value))
                _prefix = prefix + value
@ -96,8 +91,7 @@ def parse_tokens(self, tokens, debug=False):
                column = 0
        else:
            # We never broke out -- EOF is too soon (how can this happen???)
-            raise parse.ParseError("incomplete input",
+            raise parse.ParseError("incomplete input", type, value, (prefix, start))
                                   type, value, (prefix, start))
        return p.rootnode
    def parse_stream_raw(self, stream, debug=False):
@ -117,8 +111,7 @@ def parse_file(self, filename, encoding=None, debug=False):
    def parse_string(self, text, debug=False):
        """Parse a string and return the syntax tree."""
        tokens = tokenize.generate_tokens(
-            io.StringIO(text).readline,
+            io.StringIO(text).readline, grammar=self.grammar
            grammar=self.grammar
        )
        return self.parse_tokens(tokens, debug)
@ -130,24 +123,24 @@ def _partially_consume_prefix(self, prefix, column):
        for char in prefix:
            current_line += char
            if wait_for_nl:
-                if char == '\n':
+                if char == "\n":
                    if current_line.strip() and current_column < column:
-                        res = ''.join(lines)
+                        res = "".join(lines)
                        return res, prefix[len(res) :]
                    lines.append(current_line)
                    current_line = ""
                    current_column = 0
                    wait_for_nl = False
-            elif char in ' \t':
+            elif char in " \t":
                current_column += 1
-            elif char == '\n':
+            elif char == "\n":
                # unexpected empty line
                current_column = 0
            else:
                # indent is finished
                wait_for_nl = True
-        return ''.join(lines), current_line
+        return "".join(lines), current_line
 def _generate_pickle_name(gt, cache_dir=None):
@ -161,8 +154,7 @@ def _generate_pickle_name(gt, cache_dir=None):
        return name
-def load_grammar(gt="Grammar.txt", gp=None,
+def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
                 save=True, force=False, logger=None):
    """Load the grammar (maybe from a pickle)."""
    if logger is None:
        logger = logging.getLogger(__name__)
@ -219,11 +211,11 @@ def main(*args):
    """
    if not args:
        args = sys.argv[1:]
-    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
                        format='%(message)s')
    for gt in args:
        load_grammar(gt, save=True, force=True)
    return True
 if __name__ == "__main__":
    sys.exit(int(not main()))
--- a/blib2to3/pgen2/grammar.py
+++ b/blib2to3/pgen2/grammar.py
@ -90,7 +90,9 @@ def __init__(self):
    def dump(self, filename):
        """Dump the grammar tables to a pickle file."""
-        with tempfile.NamedTemporaryFile(dir=os.path.dirname(filename), delete=False) as f:
+        with tempfile.NamedTemporaryFile(
            dir=os.path.dirname(filename), delete=False
        ) as f:
            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
        os.replace(f.name, filename)
@ -109,8 +111,14 @@ def copy(self):
        Copy the grammar.
        """
        new = self.__class__()
-        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
+        for dict_attr in (
-                          "tokens", "symbol2label"):
+            "symbol2number",
            "number2symbol",
            "dfas",
            "keywords",
            "tokens",
            "symbol2label",
        ):
            setattr(new, dict_attr, getattr(self, dict_attr).copy())
        new.labels = self.labels[:]
        new.states = self.states[:]
@ -121,6 +129,7 @@ def copy(self):
    def report(self):
        """Dump the grammar tables to standard output, for debugging."""
        from pprint import pprint
        print("s2n")
        pprint(self.symbol2number)
        print("n2s")
--- a/blib2to3/pgen2/literals.py
+++ b/blib2to3/pgen2/literals.py
@ -5,7 +5,8 @@
 import regex as re
-simple_escapes = {"a": "\a",
+simple_escapes = {
    "a": "\a",
    "b": "\b",
    "f": "\f",
    "n": "\n",
@ -14,7 +15,9 @@
    "v": "\v",
    "'": "'",
    '"': '"',
-                  "\\": "\\"}
+    "\\": "\\",
 }
 def escape(m):
    all, tail = m.group(0, 1)
@ -37,6 +40,7 @@ def escape(m):
            raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
    return chr(i)
 def evalString(s):
    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
    q = s[0]
@ -47,6 +51,7 @@ def evalString(s):
    s = s[len(q) : -len(q)]
    return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
 def test():
    for i in range(256):
        c = chr(i)
--- a/blib2to3/pgen2/parse.py
+++ b/blib2to3/pgen2/parse.py
@ -13,17 +13,20 @@
 # Local imports
 from . import token
 class ParseError(Exception):
    """Exception to signal the parser is stuck."""
    def __init__(self, msg, type, value, context):
-        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+        Exception.__init__(
-                           (msg, type, value, context))
+            self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
        )
        self.msg = msg
        self.type = type
        self.value = value
        self.context = context
 class Parser(object):
    """Parser engine.
@ -152,8 +155,7 @@ def addtoken(self, type, value, context):
                    self.pop()
                    if not self.stack:
                        # Done parsing, but another token is input
-                        raise ParseError("too much input",
+                        raise ParseError("too much input", type, value, context)
                                         type, value, context)
                else:
                    # No success finding a transition
                    raise ParseError("bad input", type, value, context)
--- a/blib2to3/pgen2/pgen.py
+++ b/blib2to3/pgen2/pgen.py
@ -4,11 +4,12 @@
 # Pgen imports
 from . import grammar, token, tokenize
 class PgenGrammar(grammar.Grammar):
    pass
 class ParserGenerator(object):
 class ParserGenerator(object):
    def __init__(self, filename, stream=None):
        close_stream = None
        if stream is None:
@ -136,9 +137,11 @@ def calcfirst(self, name):
        for label, itsfirst in overlapcheck.items():
            for symbol in itsfirst:
                if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
+                    raise ValueError(
-                                     " first sets of %s as well as %s" %
+                        "rule %s is ambiguous; %s is in the"
-                                     (name, symbol, label, inverse[symbol]))
+                        " first sets of %s as well as %s"
                        % (name, symbol, label, inverse[symbol])
                    )
                inverse[symbol] = label
        self.first[name] = totalset
@ -173,10 +176,12 @@ def make_dfa(self, start, finish):
        # values.
        assert isinstance(start, NFAState)
        assert isinstance(finish, NFAState)
        def closure(state):
            base = {}
            addclosure(state, base)
            return base
        def addclosure(state, base):
            assert isinstance(state, NFAState)
            if state in base:
@ -185,6 +190,7 @@ def addclosure(state, base):
            for label, next in state.arcs:
                if label is None:
                    addclosure(next, base)
        states = [DFAState(closure(start), finish)]
        for state in states:  # NB states grows while we're iterating
            arcs = {}
@ -266,8 +272,7 @@ def parse_rhs(self):
    def parse_alt(self):
        # ALT: ITEM+
        a, b = self.parse_item()
-        while (self.value in ("(", "[") or
+        while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
               self.type in (token.NAME, token.STRING)):
            c, d = self.parse_item()
            b.addarc(c)
            b = d
@ -307,13 +312,15 @@ def parse_atom(self):
            self.gettoken()
            return a, z
        else:
-            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
+            self.raise_error(
-                             self.type, self.value)
+                "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
            )
    def expect(self, type, value=None):
        if self.type != type or (value is not None and self.value != value):
-            self.raise_error("expected %s/%s, got %s/%s",
+            self.raise_error(
-                             type, value, self.type, self.value)
+                "expected %s/%s, got %s/%s", type, value, self.type, self.value
            )
        value = self.value
        self.gettoken()
        return value
@ -331,11 +338,10 @@ def raise_error(self, msg, *args):
                msg = msg % args
            except:
                msg = " ".join([msg] + list(map(str, args)))
-        raise SyntaxError(msg, (self.filename, self.end[0],
+        raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
-                                self.end[1], self.line))
+
 class NFAState(object):
    def __init__(self):
        self.arcs = []  # list of (label, NFAState) pairs
@ -344,8 +350,8 @@ def addarc(self, next, label=None):
        assert isinstance(next, NFAState)
        self.arcs.append((label, next))
 class DFAState(object):
 class DFAState(object):
    def __init__(self, nfaset, final):
        assert isinstance(nfaset, dict)
        assert isinstance(next(iter(nfaset)), NFAState)
@ -381,6 +387,7 @@ def __eq__(self, other):
    __hash__ = None  # For Py3 compatibility.
 def generate_grammar(filename="Grammar.txt"):
    p = ParserGenerator(filename)
    return p.make_grammar()
--- a/blib2to3/pgen2/token.py
+++ b/blib2to3/pgen2/token.py
@ -77,8 +77,10 @@
 def ISTERMINAL(x):
    return x < NT_OFFSET
 def ISNONTERMINAL(x):
    return x >= NT_OFFSET
 def ISEOF(x):
    return x == ENDMARKER
--- a/blib2to3/pgen2/tokenize.py
+++ b/blib2to3/pgen2/tokenize.py
@ -25,17 +25,20 @@
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found."""
-__author__ = 'Ka-Ping Yee <ping@lfw.org>'
+__author__ = "Ka-Ping Yee <ping@lfw.org>"
-__credits__ = \
+__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
 import regex as re
 from codecs import BOM_UTF8, lookup
 from blib2to3.pgen2.token import *
 from . import token
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
+
-           "generate_tokens", "untokenize"]
+__all__ = [x for x in dir(token) if x[0] != "_"] + [
    "tokenize",
    "generate_tokens",
    "untokenize",
 ]
 del token
 try:
@ -45,29 +48,40 @@
    # valid Python 3 code.
    bytes = str
-def group(*choices): return '(' + '|'.join(choices) + ')'
+
-def any(*choices): return group(*choices) + '*'
+def group(*choices):
-def maybe(*choices): return group(*choices) + '?'
+    return "(" + "|".join(choices) + ")"
 def any(*choices):
    return group(*choices) + "*"
 def maybe(*choices):
    return group(*choices) + "?"
 def _combinations(*l):
-    return set(
+    return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
        x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()
    )
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Name = r'\w+'  # this is invalid but it's fine because Name comes after Number in all groups
-Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
+Whitespace = r"[ \f\t]*"
-Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
+Comment = r"#[^\r\n]*"
-Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
+Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
-Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
+Name = r"\w+"  # this is invalid but it's fine because Name comes after Number in all groups
 Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
 Hexnumber = r"0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?"
 Octnumber = r"0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?"
 Decnumber = group(r"[1-9]\d*(?:_\d+)*[lL]?", "0[lL]?")
 Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?\d+(?:_\d+)*'
+Exponent = r"[eE][-+]?\d+(?:_\d+)*"
-Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
+Pointfloat = group(r"\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?", r"\.\d+(?:_\d+)*") + maybe(
-Expfloat = r'\d+(?:_\d+)*' + Exponent
+    Exponent
 )
 Expfloat = r"\d+(?:_\d+)*" + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r"\d+(?:_\d+)*[jJ]", Floatnumber + r"[jJ]")
 Number = group(Imagnumber, Floatnumber, Intnumber)
 # Tail end of ' string.
@ -81,30 +95,39 @@ def _combinations(*l):
 _litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
 Triple = group(_litprefix + "'''", _litprefix + '"""')
 # Single-line ' or " string.
-String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+String = group(
-               _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
 )
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+Operator = group(
-                 r"//=?", r"->",
+    r"\*\*=?",
    r">>=?",
    r"<<=?",
    r"<>",
    r"!=",
    r"//=?",
    r"->",
    r"[+\-*/%&@|^=<>:]=?",
-                 r"~")
+    r"~",
 )
-Bracket = '[][(){}]'
+Bracket = "[][(){}]"
-Special = group(r'\r?\n', r'[:;.,`@]')
+Special = group(r"\r?\n", r"[:;.,`@]")
 Funny = group(Operator, Bracket, Special)
 PlainToken = group(Number, Funny, String, Name)
 Token = Ignore + PlainToken
 # First (or only) line of ' or " string.
-ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ContStr = group(
-                group("'", r'\\\r?\n'),
+    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
-                _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
-                group('"', r'\\\r?\n'))
+)
-PseudoExtras = group(r'\\\r?\n', Comment, Triple)
+PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 tokenprog = re.compile(Token, re.UNICODE)
@ -113,39 +136,50 @@ def _combinations(*l):
 double3prog = re.compile(Double3)
 _strprefixes = (
-    _combinations('r', 'R', 'f', 'F') |
+    _combinations("r", "R", "f", "F")
-    _combinations('r', 'R', 'b', 'B') |
+    | _combinations("r", "R", "b", "B")
-    {'u', 'U', 'ur', 'uR', 'Ur', 'UR'}
+    | {"u", "U", "ur", "uR", "Ur", "UR"}
 )
-endprogs = {"'": re.compile(Single), '"': re.compile(Double),
+endprogs = {
-            "'''": single3prog, '"""': double3prog,
+    "'": re.compile(Single),
    '"': re.compile(Double),
    "'''": single3prog,
    '"""': double3prog,
    **{f"{prefix}'''": single3prog for prefix in _strprefixes},
    **{f'{prefix}"""': double3prog for prefix in _strprefixes},
-            **{prefix: None for prefix in _strprefixes}}
+    **{prefix: None for prefix in _strprefixes},
 }
 triple_quoted = (
-    {"'''", '"""'} |
+    {"'''", '"""'}
-    {f"{prefix}'''" for prefix in _strprefixes} |
+    | {f"{prefix}'''" for prefix in _strprefixes}
-    {f'{prefix}"""' for prefix in _strprefixes}
+    | {f'{prefix}"""' for prefix in _strprefixes}
 )
 single_quoted = (
-    {"'", '"'} |
+    {"'", '"'}
-    {f"{prefix}'" for prefix in _strprefixes} |
+    | {f"{prefix}'" for prefix in _strprefixes}
-    {f'{prefix}"' for prefix in _strprefixes}
+    | {f'{prefix}"' for prefix in _strprefixes}
 )
 tabsize = 8
 class TokenError(Exception): pass
-class StopTokenizing(Exception): pass
+class TokenError(Exception):
    pass
 class StopTokenizing(Exception):
    pass
 def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line):  # for testing
    (srow, scol) = xxx_todo_changeme
    (erow, ecol) = xxx_todo_changeme1
-    print("%d,%d-%d,%d:\t%s\t%s" % \
+    print(
-        (srow, scol, erow, ecol, tok_name[type], repr(token)))
+        "%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
    )
 def tokenize(readline, tokeneater=printtoken):
    """
@ -165,13 +199,14 @@ def tokenize(readline, tokeneater=printtoken):
    except StopTokenizing:
        pass
 # backwards compatible interface
 def tokenize_loop(readline, tokeneater):
    for token_info in generate_tokens(readline):
        tokeneater(*token_info)
 class Untokenizer:
 class Untokenizer:
    def __init__(self):
        self.tokens = []
        self.prev_row = 1
@ -204,14 +239,14 @@ def compat(self, token, iterable):
        toks_append = self.tokens.append
        toknum, tokval = token
        if toknum in (NAME, NUMBER):
-            tokval += ' '
+            tokval += " "
        if toknum in (NEWLINE, NL):
            startline = True
        for tok in iterable:
            toknum, tokval = tok[:2]
            if toknum in (NAME, NUMBER, ASYNC, AWAIT):
-                tokval += ' '
+                tokval += " "
            if toknum == INDENT:
                indents.append(tokval)
@ -226,8 +261,10 @@ def compat(self, token, iterable):
                startline = False
            toks_append(tokval)
-cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
+
-blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
+cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
 blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
 def _get_normal_name(orig_enc):
    """Imitates get_normal_name in tokenizer.c."""
@ -235,11 +272,13 @@ def _get_normal_name(orig_enc):
    enc = orig_enc[:12].lower().replace("_", "-")
    if enc == "utf-8" or enc.startswith("utf-8-"):
        return "utf-8"
-    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
+    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
-       enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
+        ("latin-1-", "iso-8859-1-", "iso-latin-1-")
    ):
        return "iso-8859-1"
    return orig_enc
 def detect_encoding(readline):
    """
    The detect_encoding() function is used to detect the encoding that should
@ -260,7 +299,8 @@ def detect_encoding(readline):
    """
    bom_found = False
    encoding = None
-    default = 'utf-8'
+    default = "utf-8"
    def read_or_stop():
        try:
            return readline()
@ -269,7 +309,7 @@ def read_or_stop():
    def find_cookie(line):
        try:
-            line_string = line.decode('ascii')
+            line_string = line.decode("ascii")
        except UnicodeDecodeError:
            return None
        match = cookie_re.match(line_string)
@ -283,17 +323,17 @@ def find_cookie(line):
            raise SyntaxError("unknown encoding: " + encoding)
        if bom_found:
-            if codec.name != 'utf-8':
+            if codec.name != "utf-8":
                # This behaviour mimics the Python interpreter
-                raise SyntaxError('encoding problem: utf-8')
+                raise SyntaxError("encoding problem: utf-8")
-            encoding += '-sig'
+            encoding += "-sig"
        return encoding
    first = read_or_stop()
    if first.startswith(BOM_UTF8):
        bom_found = True
        first = first[3:]
-        default = 'utf-8-sig'
+        default = "utf-8-sig"
    if not first:
        return default, []
@ -313,6 +353,7 @@ def find_cookie(line):
    return default, [first, second]
 def untokenize(iterable):
    """Transform tokens back into Python source code.
@ -334,6 +375,7 @@ def untokenize(iterable):
    ut = Untokenizer()
    return ut.untokenize(iterable)
 def generate_tokens(readline, grammar=None):
    """
    The generate_tokens() generator requires one argument, readline, which
@ -351,8 +393,8 @@ def generate_tokens(readline, grammar=None):
    logical line; continuation lines are included.
    """
    lnum = parenlev = continued = 0
-    numchars = '0123456789'
+    numchars = "0123456789"
-    contstr, needcont = '', 0
+    contstr, needcont = "", 0
    contline = None
    indents = [0]
@ -369,7 +411,7 @@ def generate_tokens(readline, grammar=None):
        try:
            line = readline()
        except StopIteration:
-            line = ''
+            line = ""
        lnum = lnum + 1
        pos, max = 0, len(line)
@ -379,14 +421,24 @@ def generate_tokens(readline, grammar=None):
            endmatch = endprog.match(line)
            if endmatch:
                pos = end = endmatch.end(0)
-                yield (STRING, contstr + line[:end],
+                yield (
-                       strstart, (lnum, end), contline + line)
+                    STRING,
-                contstr, needcont = '', 0
+                    contstr + line[:end],
                    strstart,
                    (lnum, end),
                    contline + line,
                )
                contstr, needcont = "", 0
                contline = None
-            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
+            elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
-                yield (ERRORTOKEN, contstr + line,
+                yield (
-                           strstart, (lnum, len(line)), contline)
+                    ERRORTOKEN,
-                contstr = ''
+                    contstr + line,
                    strstart,
                    (lnum, len(line)),
                    contline,
                )
                contstr = ""
                contline = None
                continue
            else:
@ -395,31 +447,41 @@ def generate_tokens(readline, grammar=None):
                continue
        elif parenlev == 0 and not continued:  # new statement
-            if not line: break
+            if not line:
                break
            column = 0
            while pos < max:  # measure leading whitespace
-                if line[pos] == ' ': column = column + 1
+                if line[pos] == " ":
-                elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
+                    column = column + 1
-                elif line[pos] == '\f': column = 0
+                elif line[pos] == "\t":
-                else: break
+                    column = (column // tabsize + 1) * tabsize
                elif line[pos] == "\f":
                    column = 0
                else:
                    break
                pos = pos + 1
-            if pos == max: break
+            if pos == max:
                break
            if stashed:
                yield stashed
                stashed = None
-            if line[pos] in '\r\n':            # skip blank lines
+            if line[pos] in "\r\n":  # skip blank lines
                yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
                continue
-            if line[pos] == '#':               # skip comments
+            if line[pos] == "#":  # skip comments
-                comment_token = line[pos:].rstrip('\r\n')
+                comment_token = line[pos:].rstrip("\r\n")
                nl_pos = pos + len(comment_token)
-                yield (COMMENT, comment_token,
+                yield (
-                        (lnum, pos), (lnum, pos + len(comment_token)), line)
+                    COMMENT,
-                yield (NL, line[nl_pos:],
+                    comment_token,
-                        (lnum, nl_pos), (lnum, len(line)), line)
+                    (lnum, pos),
                    (lnum, pos + len(comment_token)),
                    line,
                )
                yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line)
                continue
            if column > indents[-1]:  # count indents
@ -430,7 +492,8 @@ def generate_tokens(readline, grammar=None):
                if column not in indents:
                    raise IndentationError(
                        "unindent does not match any outer indentation level",
-                        ("<tokenize>", lnum, pos, line))
+                        ("<tokenize>", lnum, pos, line),
                    )
                indents = indents[:-1]
                if async_def and async_def_indent >= indents[-1]:
@ -438,7 +501,7 @@ def generate_tokens(readline, grammar=None):
                    async_def_nl = False
                    async_def_indent = 0
-                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+                yield (DEDENT, "", (lnum, pos), (lnum, pos), line)
            if async_def and async_def_nl and async_def_indent >= indents[-1]:
                async_def = False
@ -457,10 +520,11 @@ def generate_tokens(readline, grammar=None):
                spos, epos, pos = (lnum, start), (lnum, end), end
                token, initial = line[start:end], line[start]
-                if initial in numchars or \
+                if initial in numchars or (
-                   (initial == '.' and token != '.'):      # ordinary number
+                    initial == "." and token != "."
                ):  # ordinary number
                    yield (NUMBER, token, spos, epos, line)
-                elif initial in '\r\n':
+                elif initial in "\r\n":
                    newline = NEWLINE
                    if parenlev > 0:
                        newline = NL
@ -471,7 +535,7 @@ def generate_tokens(readline, grammar=None):
                        stashed = None
                    yield (newline, token, spos, epos, line)
-                elif initial == '#':
+                elif initial == "#":
                    assert not token.endswith("\n")
                    if stashed:
                        yield stashed
@ -492,13 +556,18 @@ def generate_tokens(readline, grammar=None):
                        contstr = line[start:]
                        contline = line
                        break
-                elif initial in single_quoted or \
+                elif (
-                    token[:2] in single_quoted or \
+                    initial in single_quoted
-                    token[:3] in single_quoted:
+                    or token[:2] in single_quoted
-                    if token[-1] == '\n':                  # continued string
+                    or token[:3] in single_quoted
                ):
                    if token[-1] == "\n":  # continued string
                        strstart = (lnum, start)
-                        endprog = (endprogs[initial] or endprogs[token[1]] or
+                        endprog = (
-                                   endprogs[token[2]])
+                            endprogs[initial]
                            or endprogs[token[1]]
                            or endprogs[token[2]]
                        )
                        contstr, needcont = line[start:], 1
                        contline = line
                        break
@ -508,29 +577,36 @@ def generate_tokens(readline, grammar=None):
                            stashed = None
                        yield (STRING, token, spos, epos, line)
                elif initial.isidentifier():  # ordinary name
-                    if token in ('async', 'await'):
+                    if token in ("async", "await"):
                        if async_keywords or async_def:
-                            yield (ASYNC if token == 'async' else AWAIT,
+                            yield (
-                                   token, spos, epos, line)
+                                ASYNC if token == "async" else AWAIT,
                                token,
                                spos,
                                epos,
                                line,
                            )
                            continue
                    tok = (NAME, token, spos, epos, line)
-                    if token == 'async' and not stashed:
+                    if token == "async" and not stashed:
                        stashed = tok
                        continue
-                    if token in ('def', 'for'):
+                    if token in ("def", "for"):
-                        if (stashed
+                        if stashed and stashed[0] == NAME and stashed[1] == "async":
                                and stashed[0] == NAME
                                and stashed[1] == 'async'):
-                            if token == 'def':
+                            if token == "def":
                                async_def = True
                                async_def_indent = indents[-1]
-                            yield (ASYNC, stashed[1],
+                            yield (
-                                   stashed[2], stashed[3],
+                                ASYNC,
-                                   stashed[4])
+                                stashed[1],
                                stashed[2],
                                stashed[3],
                                stashed[4],
                            )
                            stashed = None
                    if stashed:
@ -538,7 +614,7 @@ def generate_tokens(readline, grammar=None):
                        stashed = None
                    yield tok
-                elif initial == '\\':                      # continued stmt
+                elif initial == "\\":  # continued stmt
                    # This yield is new; needed for better idempotency:
                    if stashed:
                        yield stashed
@ -546,15 +622,16 @@ def generate_tokens(readline, grammar=None):
                    yield (NL, token, spos, (lnum, pos), line)
                    continued = 1
                else:
-                    if initial in '([{': parenlev = parenlev + 1
+                    if initial in "([{":
-                    elif initial in ')]}': parenlev = parenlev - 1
+                        parenlev = parenlev + 1
                    elif initial in ")]}":
                        parenlev = parenlev - 1
                    if stashed:
                        yield stashed
                        stashed = None
                    yield (OP, token, spos, epos, line)
            else:
-                yield (ERRORTOKEN, line[pos],
+                yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
                           (lnum, pos), (lnum, pos+1), line)
                pos = pos + 1
    if stashed:
@ -562,10 +639,14 @@ def generate_tokens(readline, grammar=None):
        stashed = None
    for indent in indents[1:]:  # pop remaining indent levels
-        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+        yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
-    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+    yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
-if __name__ == '__main__':                     # testing
+
 if __name__ == "__main__":  # testing
    import sys
-    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
+
-    else: tokenize(sys.stdin.readline)
+    if len(sys.argv) > 1:
        tokenize(open(sys.argv[1]).readline)
    else:
        tokenize(sys.stdin.readline)
--- a/blib2to3/pygram.py
+++ b/blib2to3/pygram.py
@ -12,12 +12,10 @@
 # The grammar file
 _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
-_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
+_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt")
                                     "PatternGrammar.txt")
 class Symbols(object):
    def __init__(self, grammar):
        """Initializer.
@ -38,8 +36,7 @@ def initialize(cache_dir=None):
    global pattern_symbols
    # Python 2
-    python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE,
+    python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
                                                  cache_dir)
    python_symbols = Symbols(python_grammar)
@ -56,8 +53,11 @@ def initialize(cache_dir=None):
    python_grammar_no_print_statement_no_exec_statement_async_keywords = (
        python_grammar_no_print_statement_no_exec_statement.copy()
    )
-    python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
+    python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = (
        True
    )
-    pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
+    pattern_grammar = driver.load_packaged_grammar(
-                                                   cache_dir)
+        "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
    )
    pattern_symbols = Symbols(pattern_grammar)
--- a/blib2to3/pytree.py
+++ b/blib2to3/pytree.py
@ -18,16 +18,21 @@
 HUGE = 0x7FFFFFFF  # maximum repeat count, default max
 _type_reprs = {}
 def type_repr(type_num):
    global _type_reprs
    if not _type_reprs:
        from .pygram import python_symbols
        # printing tokens is possible but not as useful
        # from .pgen2 import token // token.__dict__.items():
        for name, val in python_symbols.__dict__.items():
-            if type(val) == int: _type_reprs[val] = name
+            if type(val) == int:
                _type_reprs[val] = name
    return _type_reprs.setdefault(type_num, type_num)
 class Base(object):
    """
@ -198,17 +203,16 @@ def get_suffix(self):
        return next_sib.prefix
    if sys.version_info < (3, 0):
        def __str__(self):
            return str(self).encode("ascii")
 class Node(Base):
    """Concrete implementation for interior nodes."""
-    def __init__(self,type, children,
+    def __init__(self, type, children, context=None, prefix=None, fixers_applied=None):
                 context=None,
                 prefix=None,
                 fixers_applied=None):
        """
        Initializer.
@ -233,9 +237,11 @@ def __init__(self,type, children,
    def __repr__(self):
        """Return a canonical string representation."""
-        return "%s(%s, %r)" % (self.__class__.__name__,
+        return "%s(%s, %r)" % (
            self.__class__.__name__,
            type_repr(self.type),
-                               self.children)
+            self.children,
        )
    def __unicode__(self):
        """
@ -254,8 +260,11 @@ def _eq(self, other):
    def clone(self):
        """Return a cloned (deep) copy of self."""
-        return Node(self.type, [ch.clone() for ch in self.children],
+        return Node(
-                    fixers_applied=self.fixers_applied)
+            self.type,
            [ch.clone() for ch in self.children],
            fixers_applied=self.fixers_applied,
        )
    def post_order(self):
        """Return a post-order iterator for the tree."""
@ -328,6 +337,7 @@ def update_sibling_maps(self):
            previous = current
        _next[id(current)] = None
 class Leaf(Base):
    """Concrete implementation for leaf nodes."""
@ -337,10 +347,7 @@ class Leaf(Base):
    lineno = 0  # Line where this token starts in the input
    column = 0  # Column where this token starts in the input
-    def __init__(self, type, value,
+    def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
                 context=None,
                 prefix=None,
                 fixers_applied=[]):
        """
        Initializer.
@ -359,9 +366,12 @@ def __init__(self, type, value,
    def __repr__(self):
        """Return a canonical string representation."""
        from .pgen2.token import tok_name
-        return "%s(%s, %r)" % (self.__class__.__name__,
+
        return "%s(%s, %r)" % (
            self.__class__.__name__,
            tok_name.get(self.type, self.type),
-                               self.value)
+            self.value,
        )
    def __unicode__(self):
        """
@ -380,9 +390,12 @@ def _eq(self, other):
    def clone(self):
        """Return a cloned (deep) copy of self."""
-        return Leaf(self.type, self.value,
+        return Leaf(
            self.type,
            self.value,
            (self.prefix, (self.lineno, self.column)),
-                    fixers_applied=self.fixers_applied)
+            fixers_applied=self.fixers_applied,
        )
    def leaves(self):
        yield self
@ -407,6 +420,7 @@ def prefix(self, prefix):
        self.changed()
        self._prefix = prefix
 def convert(gr, raw_node):
    """
    Convert raw node information to a Node or Leaf instance.
@ -513,7 +527,6 @@ def generate_matches(self, nodes):
 class LeafPattern(BasePattern):
    def __init__(self, type=None, content=None, name=None):
        """
        Initializer.  Takes optional type, content, and name.
@ -669,20 +682,29 @@ def __init__(self, content=None, min=0, max=HUGE, name=None):
    def optimize(self):
        """Optimize certain stacked wildcard patterns."""
        subpattern = None
-        if (self.content is not None and
+        if (
-            len(self.content) == 1 and len(self.content[0]) == 1):
+            self.content is not None
            and len(self.content) == 1
            and len(self.content[0]) == 1
        ):
            subpattern = self.content[0][0]
        if self.min == 1 and self.max == 1:
            if self.content is None:
                return NodePattern(name=self.name)
            if subpattern is not None and self.name == subpattern.name:
                return subpattern.optimize()
-        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
+        if (
-            subpattern.min <= 1 and self.name == subpattern.name):
+            self.min <= 1
-            return WildcardPattern(subpattern.content,
+            and isinstance(subpattern, WildcardPattern)
            and subpattern.min <= 1
            and self.name == subpattern.name
        ):
            return WildcardPattern(
                subpattern.content,
                self.min * subpattern.min,
                self.max * subpattern.max,
-                                   subpattern.name)
+                subpattern.name,
            )
        return self
    def match(self, node, results=None):
@ -806,7 +828,6 @@ def _recursive_matches(self, nodes, count):
 class NegatedPattern(BasePattern):
    def __init__(self, content=None):
        """
        Initializer.
--- a/tests/test_black.py
+++ b/tests/test_black.py
@ -158,6 +158,16 @@ def invokeBlack(
        result = runner.invoke(black.main, args)
        self.assertEqual(result.exit_code, exit_code, msg=runner.stderr_bytes.decode())
    @patch("black.dump_to_file", dump_to_stderr)
    def checkSourceFile(self, name: str) -> None:
        path = THIS_DIR.parent / name
        source, expected = read_data(str(path), data=False)
        actual = fs(source)
        self.assertFormatEqual(expected, actual)
        black.assert_equivalent(source, actual)
        black.assert_stable(source, actual, black.FileMode())
        self.assertFalse(ff(path))
    @patch("black.dump_to_file", dump_to_stderr)
    def test_empty(self) -> None:
        source = expected = ""
@ -177,23 +187,44 @@ def test_empty_ff(self) -> None:
            os.unlink(tmp_file)
        self.assertFormatEqual(expected, actual)
    @patch("black.dump_to_file", dump_to_stderr)
    def test_self(self) -> None:
-        source, expected = read_data("test_black", data=False)
+        self.checkSourceFile("tests/test_black.py")
        actual = fs(source)
        self.assertFormatEqual(expected, actual)
        black.assert_equivalent(source, actual)
        black.assert_stable(source, actual, black.FileMode())
        self.assertFalse(ff(THIS_FILE))
    @patch("black.dump_to_file", dump_to_stderr)
    def test_black(self) -> None:
-        source, expected = read_data("../black", data=False)
+        self.checkSourceFile("black.py")
-        actual = fs(source)
+
-        self.assertFormatEqual(expected, actual)
+    def test_pygram(self) -> None:
-        black.assert_equivalent(source, actual)
+        self.checkSourceFile("blib2to3/pygram.py")
-        black.assert_stable(source, actual, black.FileMode())
+
-        self.assertFalse(ff(THIS_DIR / ".." / "black.py"))
+    def test_pytree(self) -> None:
        self.checkSourceFile("blib2to3/pytree.py")
    def test_conv(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/conv.py")
    def test_driver(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/driver.py")
    def test_grammar(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/grammar.py")
    def test_literals(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/literals.py")
    def test_parse(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/parse.py")
    def test_pgen(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/pgen.py")
    def test_tokenize(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/tokenize.py")
    def test_token(self) -> None:
        self.checkSourceFile("blib2to3/pgen2/token.py")
    def test_setup(self) -> None:
        self.checkSourceFile("setup.py")
    def test_piping(self) -> None:
        source, expected = read_data("../black", data=False)
@ -230,15 +261,6 @@ def test_piping_diff(self) -> None:
        actual = actual.rstrip() + "\n"  # the diff output has a trailing space
        self.assertEqual(expected, actual)
    @patch("black.dump_to_file", dump_to_stderr)
    def test_setup(self) -> None:
        source, expected = read_data("../setup", data=False)
        actual = fs(source)
        self.assertFormatEqual(expected, actual)
        black.assert_equivalent(source, actual)
        black.assert_stable(source, actual, black.FileMode())
        self.assertFalse(ff(THIS_DIR / ".." / "setup.py"))
    @patch("black.dump_to_file", dump_to_stderr)
    def test_function(self) -> None:
        source, expected = read_data("function")