Blacken .py files in blib2to3 (#1011)

* Blacken .py files in blib2to3

This is in preparation for adding type annotations to blib2to3 in
order to compiling it with mypyc (#1009, which I can rebase on top of
this).

To enforce that it stays blackened, I just cargo-culted the existing
test code used for validating formatting. It feels pretty clunky now,
though, so I can abstract the common logic out into a helper if that
seems better. (But error messages might be less clear then?)

* Tidy up the tests
This commit is contained in:
Michael J. Sullivan 2019-10-20 06:55:31 -07:00 committed by Łukasz Langa
parent 3bfb66971f
commit 0ff718e1e2
12 changed files with 457 additions and 319 deletions

View File

@ -70,8 +70,7 @@ def parse_graminit_h(self, filename):
lineno += 1
mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
if not mo and line.strip():
print("%s(%s): can't parse %s" % (filename, lineno,
line.strip()))
print("%s(%s): can't parse %s" % (filename, lineno, line.strip()))
else:
symbol, number = mo.groups()
number = int(number)
@ -129,8 +128,7 @@ def parse_graminit_c(self, filename):
states = []
while line.startswith("static arc "):
while line.startswith("static arc "):
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
line)
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
n, m, k = list(map(int, mo.groups()))
arcs = []
@ -170,8 +168,7 @@ def parse_graminit_c(self, filename):
ndfas = int(mo.group(1))
for i in range(ndfas):
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
line)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
assert mo, (lineno, line)
symbol = mo.group(2)
number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))

View File

@ -28,13 +28,7 @@
class Driver(object):
def __init__(
self,
grammar,
convert=None,
logger=None,
):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
@ -73,8 +67,9 @@ def parse_tokens(self, tokens, debug=False):
if type == token.OP:
type = grammar.opmap[value]
if debug:
self.logger.debug("%s %r (prefix=%r)",
token.tok_name[type], value, prefix)
self.logger.debug(
"%s %r (prefix=%r)", token.tok_name[type], value, prefix
)
if type == token.INDENT:
indent_columns.append(len(value))
_prefix = prefix + value
@ -96,8 +91,7 @@ def parse_tokens(self, tokens, debug=False):
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input",
type, value, (prefix, start))
raise parse.ParseError("incomplete input", type, value, (prefix, start))
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
@ -117,8 +111,7 @@ def parse_file(self, filename, encoding=None, debug=False):
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline,
grammar=self.grammar
io.StringIO(text).readline, grammar=self.grammar
)
return self.parse_tokens(tokens, debug)
@ -130,24 +123,24 @@ def _partially_consume_prefix(self, prefix, column):
for char in prefix:
current_line += char
if wait_for_nl:
if char == '\n':
if char == "\n":
if current_line.strip() and current_column < column:
res = ''.join(lines)
res = "".join(lines)
return res, prefix[len(res) :]
lines.append(current_line)
current_line = ""
current_column = 0
wait_for_nl = False
elif char in ' \t':
elif char in " \t":
current_column += 1
elif char == '\n':
elif char == "\n":
# unexpected empty line
current_column = 0
else:
# indent is finished
wait_for_nl = True
return ''.join(lines), current_line
return "".join(lines), current_line
def _generate_pickle_name(gt, cache_dir=None):
@ -161,8 +154,7 @@ def _generate_pickle_name(gt, cache_dir=None):
return name
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger(__name__)
@ -219,11 +211,11 @@ def main(*args):
"""
if not args:
args = sys.argv[1:]
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
format='%(message)s')
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
for gt in args:
load_grammar(gt, save=True, force=True)
return True
if __name__ == "__main__":
sys.exit(int(not main()))

View File

@ -90,7 +90,9 @@ def __init__(self):
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
with tempfile.NamedTemporaryFile(dir=os.path.dirname(filename), delete=False) as f:
with tempfile.NamedTemporaryFile(
dir=os.path.dirname(filename), delete=False
) as f:
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
os.replace(f.name, filename)
@ -109,8 +111,14 @@ def copy(self):
Copy the grammar.
"""
new = self.__class__()
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
"tokens", "symbol2label"):
for dict_attr in (
"symbol2number",
"number2symbol",
"dfas",
"keywords",
"tokens",
"symbol2label",
):
setattr(new, dict_attr, getattr(self, dict_attr).copy())
new.labels = self.labels[:]
new.states = self.states[:]
@ -121,6 +129,7 @@ def copy(self):
def report(self):
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print("s2n")
pprint(self.symbol2number)
print("n2s")

View File

@ -5,7 +5,8 @@
import regex as re
simple_escapes = {"a": "\a",
simple_escapes = {
"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
@ -14,7 +15,9 @@
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\"}
"\\": "\\",
}
def escape(m):
all, tail = m.group(0, 1)
@ -37,6 +40,7 @@ def escape(m):
raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
return chr(i)
def evalString(s):
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
@ -47,6 +51,7 @@ def evalString(s):
s = s[len(q) : -len(q)]
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
def test():
for i in range(256):
c = chr(i)

View File

@ -13,17 +13,20 @@
# Local imports
from . import token
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
Exception.__init__(
self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
)
self.msg = msg
self.type = type
self.value = value
self.context = context
class Parser(object):
"""Parser engine.
@ -152,8 +155,7 @@ def addtoken(self, type, value, context):
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
raise ParseError("too much input", type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)

View File

@ -4,11 +4,12 @@
# Pgen imports
from . import grammar, token, tokenize
class PgenGrammar(grammar.Grammar):
pass
class ParserGenerator(object):
class ParserGenerator(object):
def __init__(self, filename, stream=None):
close_stream = None
if stream is None:
@ -136,9 +137,11 @@ def calcfirst(self, name):
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError("rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s" %
(name, symbol, label, inverse[symbol]))
raise ValueError(
"rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s"
% (name, symbol, label, inverse[symbol])
)
inverse[symbol] = label
self.first[name] = totalset
@ -173,10 +176,12 @@ def make_dfa(self, start, finish):
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state):
base = {}
addclosure(state, base)
return base
def addclosure(state, base):
assert isinstance(state, NFAState)
if state in base:
@ -185,6 +190,7 @@ def addclosure(state, base):
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
arcs = {}
@ -266,8 +272,7 @@ def parse_rhs(self):
def parse_alt(self):
# ALT: ITEM+
a, b = self.parse_item()
while (self.value in ("(", "[") or
self.type in (token.NAME, token.STRING)):
while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
c, d = self.parse_item()
b.addarc(c)
b = d
@ -307,13 +312,15 @@ def parse_atom(self):
self.gettoken()
return a, z
else:
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
self.raise_error(
"expected (...) or NAME or STRING, got %s/%s", self.type, self.value
)
def expect(self, type, value=None):
if self.type != type or (value is not None and self.value != value):
self.raise_error("expected %s/%s, got %s/%s",
type, value, self.type, self.value)
self.raise_error(
"expected %s/%s, got %s/%s", type, value, self.type, self.value
)
value = self.value
self.gettoken()
return value
@ -331,11 +338,10 @@ def raise_error(self, msg, *args):
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line))
raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
class NFAState(object):
def __init__(self):
self.arcs = [] # list of (label, NFAState) pairs
@ -344,8 +350,8 @@ def addarc(self, next, label=None):
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState(object):
class DFAState(object):
def __init__(self, nfaset, final):
assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState)
@ -381,6 +387,7 @@ def __eq__(self, other):
__hash__ = None # For Py3 compatibility.
def generate_grammar(filename="Grammar.txt"):
p = ParserGenerator(filename)
return p.make_grammar()

View File

@ -77,8 +77,10 @@
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER

View File

@ -25,17 +25,20 @@
function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found."""
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
import regex as re
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
from . import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
"generate_tokens", "untokenize"]
__all__ = [x for x in dir(token) if x[0] != "_"] + [
"tokenize",
"generate_tokens",
"untokenize",
]
del token
try:
@ -45,29 +48,40 @@
# valid Python 3 code.
bytes = str
def group(*choices): return '(' + '|'.join(choices) + ')'
def any(*choices): return group(*choices) + '*'
def maybe(*choices): return group(*choices) + '?'
def group(*choices):
return "(" + "|".join(choices) + ")"
def any(*choices):
return group(*choices) + "*"
def maybe(*choices):
return group(*choices) + "?"
def _combinations(*l):
return set(
x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()
)
return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'\w+' # this is invalid but it's fine because Name comes after Number in all groups
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
Whitespace = r"[ \f\t]*"
Comment = r"#[^\r\n]*"
Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
Name = r"\w+" # this is invalid but it's fine because Name comes after Number in all groups
Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
Hexnumber = r"0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?"
Octnumber = r"0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?"
Decnumber = group(r"[1-9]\d*(?:_\d+)*[lL]?", "0[lL]?")
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?\d+(?:_\d+)*'
Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
Expfloat = r'\d+(?:_\d+)*' + Exponent
Exponent = r"[eE][-+]?\d+(?:_\d+)*"
Pointfloat = group(r"\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?", r"\.\d+(?:_\d+)*") + maybe(
Exponent
)
Expfloat = r"\d+(?:_\d+)*" + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
Imagnumber = group(r"\d+(?:_\d+)*[jJ]", Floatnumber + r"[jJ]")
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
@ -81,30 +95,39 @@ def _combinations(*l):
_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
Triple = group(_litprefix + "'''", _litprefix + '"""')
# Single-line ' or " string.
String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
String = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
)
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
r"//=?", r"->",
Operator = group(
r"\*\*=?",
r">>=?",
r"<<=?",
r"<>",
r"!=",
r"//=?",
r"->",
r"[+\-*/%&@|^=<>:]=?",
r"~")
r"~",
)
Bracket = '[][(){}]'
Special = group(r'\r?\n', r'[:;.,`@]')
Bracket = "[][(){}]"
Special = group(r"\r?\n", r"[:;.,`@]")
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
ContStr = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
)
PseudoExtras = group(r"\\\r?\n", Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
tokenprog = re.compile(Token, re.UNICODE)
@ -113,39 +136,50 @@ def _combinations(*l):
double3prog = re.compile(Double3)
_strprefixes = (
_combinations('r', 'R', 'f', 'F') |
_combinations('r', 'R', 'b', 'B') |
{'u', 'U', 'ur', 'uR', 'Ur', 'UR'}
_combinations("r", "R", "f", "F")
| _combinations("r", "R", "b", "B")
| {"u", "U", "ur", "uR", "Ur", "UR"}
)
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
endprogs = {
"'": re.compile(Single),
'"': re.compile(Double),
"'''": single3prog,
'"""': double3prog,
**{f"{prefix}'''": single3prog for prefix in _strprefixes},
**{f'{prefix}"""': double3prog for prefix in _strprefixes},
**{prefix: None for prefix in _strprefixes}}
**{prefix: None for prefix in _strprefixes},
}
triple_quoted = (
{"'''", '"""'} |
{f"{prefix}'''" for prefix in _strprefixes} |
{f'{prefix}"""' for prefix in _strprefixes}
{"'''", '"""'}
| {f"{prefix}'''" for prefix in _strprefixes}
| {f'{prefix}"""' for prefix in _strprefixes}
)
single_quoted = (
{"'", '"'} |
{f"{prefix}'" for prefix in _strprefixes} |
{f'{prefix}"' for prefix in _strprefixes}
{"'", '"'}
| {f"{prefix}'" for prefix in _strprefixes}
| {f'{prefix}"' for prefix in _strprefixes}
)
tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
class TokenError(Exception):
pass
class StopTokenizing(Exception):
pass
def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
(srow, scol) = xxx_todo_changeme
(erow, ecol) = xxx_todo_changeme1
print("%d,%d-%d,%d:\t%s\t%s" % \
(srow, scol, erow, ecol, tok_name[type], repr(token)))
print(
"%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
)
def tokenize(readline, tokeneater=printtoken):
"""
@ -165,13 +199,14 @@ def tokenize(readline, tokeneater=printtoken):
except StopTokenizing:
pass
# backwards compatible interface
def tokenize_loop(readline, tokeneater):
for token_info in generate_tokens(readline):
tokeneater(*token_info)
class Untokenizer:
class Untokenizer:
def __init__(self):
self.tokens = []
self.prev_row = 1
@ -204,14 +239,14 @@ def compat(self, token, iterable):
toks_append = self.tokens.append
toknum, tokval = token
if toknum in (NAME, NUMBER):
tokval += ' '
tokval += " "
if toknum in (NEWLINE, NL):
startline = True
for tok in iterable:
toknum, tokval = tok[:2]
if toknum in (NAME, NUMBER, ASYNC, AWAIT):
tokval += ' '
tokval += " "
if toknum == INDENT:
indents.append(tokval)
@ -226,8 +261,10 @@ def compat(self, token, iterable):
startline = False
toks_append(tokval)
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
@ -235,11 +272,13 @@ def _get_normal_name(orig_enc):
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
("latin-1-", "iso-8859-1-", "iso-latin-1-")
):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
@ -260,7 +299,8 @@ def detect_encoding(readline):
"""
bom_found = False
encoding = None
default = 'utf-8'
default = "utf-8"
def read_or_stop():
try:
return readline()
@ -269,7 +309,7 @@ def read_or_stop():
def find_cookie(line):
try:
line_string = line.decode('ascii')
line_string = line.decode("ascii")
except UnicodeDecodeError:
return None
match = cookie_re.match(line_string)
@ -283,17 +323,17 @@ def find_cookie(line):
raise SyntaxError("unknown encoding: " + encoding)
if bom_found:
if codec.name != 'utf-8':
if codec.name != "utf-8":
# This behaviour mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
encoding += '-sig'
raise SyntaxError("encoding problem: utf-8")
encoding += "-sig"
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
default = "utf-8-sig"
if not first:
return default, []
@ -313,6 +353,7 @@ def find_cookie(line):
return default, [first, second]
def untokenize(iterable):
"""Transform tokens back into Python source code.
@ -334,6 +375,7 @@ def untokenize(iterable):
ut = Untokenizer()
return ut.untokenize(iterable)
def generate_tokens(readline, grammar=None):
"""
The generate_tokens() generator requires one argument, readline, which
@ -351,8 +393,8 @@ def generate_tokens(readline, grammar=None):
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
numchars = '0123456789'
contstr, needcont = '', 0
numchars = "0123456789"
contstr, needcont = "", 0
contline = None
indents = [0]
@ -369,7 +411,7 @@ def generate_tokens(readline, grammar=None):
try:
line = readline()
except StopIteration:
line = ''
line = ""
lnum = lnum + 1
pos, max = 0, len(line)
@ -379,14 +421,24 @@ def generate_tokens(readline, grammar=None):
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield (STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
yield (
STRING,
contstr + line[:end],
strstart,
(lnum, end),
contline + line,
)
contstr, needcont = "", 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
yield (ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
yield (
ERRORTOKEN,
contstr + line,
strstart,
(lnum, len(line)),
contline,
)
contstr = ""
contline = None
continue
else:
@ -395,31 +447,41 @@ def generate_tokens(readline, grammar=None):
continue
elif parenlev == 0 and not continued: # new statement
if not line: break
if not line:
break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == ' ': column = column + 1
elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
elif line[pos] == '\f': column = 0
else: break
if line[pos] == " ":
column = column + 1
elif line[pos] == "\t":
column = (column // tabsize + 1) * tabsize
elif line[pos] == "\f":
column = 0
else:
break
pos = pos + 1
if pos == max: break
if pos == max:
break
if stashed:
yield stashed
stashed = None
if line[pos] in '\r\n': # skip blank lines
if line[pos] in "\r\n": # skip blank lines
yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
continue
if line[pos] == '#': # skip comments
comment_token = line[pos:].rstrip('\r\n')
if line[pos] == "#": # skip comments
comment_token = line[pos:].rstrip("\r\n")
nl_pos = pos + len(comment_token)
yield (COMMENT, comment_token,
(lnum, pos), (lnum, pos + len(comment_token)), line)
yield (NL, line[nl_pos:],
(lnum, nl_pos), (lnum, len(line)), line)
yield (
COMMENT,
comment_token,
(lnum, pos),
(lnum, pos + len(comment_token)),
line,
)
yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents
@ -430,7 +492,8 @@ def generate_tokens(readline, grammar=None):
if column not in indents:
raise IndentationError(
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
("<tokenize>", lnum, pos, line),
)
indents = indents[:-1]
if async_def and async_def_indent >= indents[-1]:
@ -438,7 +501,7 @@ def generate_tokens(readline, grammar=None):
async_def_nl = False
async_def_indent = 0
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
yield (DEDENT, "", (lnum, pos), (lnum, pos), line)
if async_def and async_def_nl and async_def_indent >= indents[-1]:
async_def = False
@ -457,10 +520,11 @@ def generate_tokens(readline, grammar=None):
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
if initial in numchars or (
initial == "." and token != "."
): # ordinary number
yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
elif initial in "\r\n":
newline = NEWLINE
if parenlev > 0:
newline = NL
@ -471,7 +535,7 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield (newline, token, spos, epos, line)
elif initial == '#':
elif initial == "#":
assert not token.endswith("\n")
if stashed:
yield stashed
@ -492,13 +556,18 @@ def generate_tokens(readline, grammar=None):
contstr = line[start:]
contline = line
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
elif (
initial in single_quoted
or token[:2] in single_quoted
or token[:3] in single_quoted
):
if token[-1] == "\n": # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or
endprogs[token[2]])
endprog = (
endprogs[initial]
or endprogs[token[1]]
or endprogs[token[2]]
)
contstr, needcont = line[start:], 1
contline = line
break
@ -508,29 +577,36 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield (STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
if token in ("async", "await"):
if async_keywords or async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
yield (
ASYNC if token == "async" else AWAIT,
token,
spos,
epos,
line,
)
continue
tok = (NAME, token, spos, epos, line)
if token == 'async' and not stashed:
if token == "async" and not stashed:
stashed = tok
continue
if token in ('def', 'for'):
if (stashed
and stashed[0] == NAME
and stashed[1] == 'async'):
if token in ("def", "for"):
if stashed and stashed[0] == NAME and stashed[1] == "async":
if token == 'def':
if token == "def":
async_def = True
async_def_indent = indents[-1]
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
stashed[4])
yield (
ASYNC,
stashed[1],
stashed[2],
stashed[3],
stashed[4],
)
stashed = None
if stashed:
@ -538,7 +614,7 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield tok
elif initial == '\\': # continued stmt
elif initial == "\\": # continued stmt
# This yield is new; needed for better idempotency:
if stashed:
yield stashed
@ -546,15 +622,16 @@ def generate_tokens(readline, grammar=None):
yield (NL, token, spos, (lnum, pos), line)
continued = 1
else:
if initial in '([{': parenlev = parenlev + 1
elif initial in ')]}': parenlev = parenlev - 1
if initial in "([{":
parenlev = parenlev + 1
elif initial in ")]}":
parenlev = parenlev - 1
if stashed:
yield stashed
stashed = None
yield (OP, token, spos, epos, line)
else:
yield (ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
pos = pos + 1
if stashed:
@ -562,10 +639,14 @@ def generate_tokens(readline, grammar=None):
stashed = None
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
if __name__ == '__main__': # testing
if __name__ == "__main__": # testing
import sys
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
else: tokenize(sys.stdin.readline)
if len(sys.argv) > 1:
tokenize(open(sys.argv[1]).readline)
else:
tokenize(sys.stdin.readline)

View File

@ -12,12 +12,10 @@
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
"PatternGrammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt")
class Symbols(object):
def __init__(self, grammar):
"""Initializer.
@ -38,8 +36,7 @@ def initialize(cache_dir=None):
global pattern_symbols
# Python 2
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE,
cache_dir)
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
python_symbols = Symbols(python_grammar)
@ -56,8 +53,11 @@ def initialize(cache_dir=None):
python_grammar_no_print_statement_no_exec_statement_async_keywords = (
python_grammar_no_print_statement_no_exec_statement.copy()
)
python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = (
True
)
pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
cache_dir)
pattern_grammar = driver.load_packaged_grammar(
"blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
)
pattern_symbols = Symbols(pattern_grammar)

View File

@ -18,16 +18,21 @@
HUGE = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs = {}
def type_repr(type_num):
global _type_reprs
if not _type_reprs:
from .pygram import python_symbols
# printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items():
for name, val in python_symbols.__dict__.items():
if type(val) == int: _type_reprs[val] = name
if type(val) == int:
_type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
class Base(object):
"""
@ -198,17 +203,16 @@ def get_suffix(self):
return next_sib.prefix
if sys.version_info < (3, 0):
def __str__(self):
return str(self).encode("ascii")
class Node(Base):
"""Concrete implementation for interior nodes."""
def __init__(self,type, children,
context=None,
prefix=None,
fixers_applied=None):
def __init__(self, type, children, context=None, prefix=None, fixers_applied=None):
"""
Initializer.
@ -233,9 +237,11 @@ def __init__(self,type, children,
def __repr__(self):
"""Return a canonical string representation."""
return "%s(%s, %r)" % (self.__class__.__name__,
return "%s(%s, %r)" % (
self.__class__.__name__,
type_repr(self.type),
self.children)
self.children,
)
def __unicode__(self):
"""
@ -254,8 +260,11 @@ def _eq(self, other):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Node(self.type, [ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied)
return Node(
self.type,
[ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied,
)
def post_order(self):
"""Return a post-order iterator for the tree."""
@ -328,6 +337,7 @@ def update_sibling_maps(self):
previous = current
_next[id(current)] = None
class Leaf(Base):
"""Concrete implementation for leaf nodes."""
@ -337,10 +347,7 @@ class Leaf(Base):
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token starts in the input
def __init__(self, type, value,
context=None,
prefix=None,
fixers_applied=[]):
def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
"""
Initializer.
@ -359,9 +366,12 @@ def __init__(self, type, value,
def __repr__(self):
"""Return a canonical string representation."""
from .pgen2.token import tok_name
return "%s(%s, %r)" % (self.__class__.__name__,
return "%s(%s, %r)" % (
self.__class__.__name__,
tok_name.get(self.type, self.type),
self.value)
self.value,
)
def __unicode__(self):
"""
@ -380,9 +390,12 @@ def _eq(self, other):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Leaf(self.type, self.value,
return Leaf(
self.type,
self.value,
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied)
fixers_applied=self.fixers_applied,
)
def leaves(self):
yield self
@ -407,6 +420,7 @@ def prefix(self, prefix):
self.changed()
self._prefix = prefix
def convert(gr, raw_node):
"""
Convert raw node information to a Node or Leaf instance.
@ -513,7 +527,6 @@ def generate_matches(self, nodes):
class LeafPattern(BasePattern):
def __init__(self, type=None, content=None, name=None):
"""
Initializer. Takes optional type, content, and name.
@ -669,20 +682,29 @@ def __init__(self, content=None, min=0, max=HUGE, name=None):
def optimize(self):
"""Optimize certain stacked wildcard patterns."""
subpattern = None
if (self.content is not None and
len(self.content) == 1 and len(self.content[0]) == 1):
if (
self.content is not None
and len(self.content) == 1
and len(self.content[0]) == 1
):
subpattern = self.content[0][0]
if self.min == 1 and self.max == 1:
if self.content is None:
return NodePattern(name=self.name)
if subpattern is not None and self.name == subpattern.name:
return subpattern.optimize()
if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
subpattern.min <= 1 and self.name == subpattern.name):
return WildcardPattern(subpattern.content,
if (
self.min <= 1
and isinstance(subpattern, WildcardPattern)
and subpattern.min <= 1
and self.name == subpattern.name
):
return WildcardPattern(
subpattern.content,
self.min * subpattern.min,
self.max * subpattern.max,
subpattern.name)
subpattern.name,
)
return self
def match(self, node, results=None):
@ -806,7 +828,6 @@ def _recursive_matches(self, nodes, count):
class NegatedPattern(BasePattern):
def __init__(self, content=None):
"""
Initializer.

View File

@ -158,6 +158,16 @@ def invokeBlack(
result = runner.invoke(black.main, args)
self.assertEqual(result.exit_code, exit_code, msg=runner.stderr_bytes.decode())
@patch("black.dump_to_file", dump_to_stderr)
def checkSourceFile(self, name: str) -> None:
path = THIS_DIR.parent / name
source, expected = read_data(str(path), data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(path))
@patch("black.dump_to_file", dump_to_stderr)
def test_empty(self) -> None:
source = expected = ""
@ -177,23 +187,44 @@ def test_empty_ff(self) -> None:
os.unlink(tmp_file)
self.assertFormatEqual(expected, actual)
@patch("black.dump_to_file", dump_to_stderr)
def test_self(self) -> None:
source, expected = read_data("test_black", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_FILE))
self.checkSourceFile("tests/test_black.py")
@patch("black.dump_to_file", dump_to_stderr)
def test_black(self) -> None:
source, expected = read_data("../black", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_DIR / ".." / "black.py"))
self.checkSourceFile("black.py")
def test_pygram(self) -> None:
self.checkSourceFile("blib2to3/pygram.py")
def test_pytree(self) -> None:
self.checkSourceFile("blib2to3/pytree.py")
def test_conv(self) -> None:
self.checkSourceFile("blib2to3/pgen2/conv.py")
def test_driver(self) -> None:
self.checkSourceFile("blib2to3/pgen2/driver.py")
def test_grammar(self) -> None:
self.checkSourceFile("blib2to3/pgen2/grammar.py")
def test_literals(self) -> None:
self.checkSourceFile("blib2to3/pgen2/literals.py")
def test_parse(self) -> None:
self.checkSourceFile("blib2to3/pgen2/parse.py")
def test_pgen(self) -> None:
self.checkSourceFile("blib2to3/pgen2/pgen.py")
def test_tokenize(self) -> None:
self.checkSourceFile("blib2to3/pgen2/tokenize.py")
def test_token(self) -> None:
self.checkSourceFile("blib2to3/pgen2/token.py")
def test_setup(self) -> None:
self.checkSourceFile("setup.py")
def test_piping(self) -> None:
source, expected = read_data("../black", data=False)
@ -230,15 +261,6 @@ def test_piping_diff(self) -> None:
actual = actual.rstrip() + "\n" # the diff output has a trailing space
self.assertEqual(expected, actual)
@patch("black.dump_to_file", dump_to_stderr)
def test_setup(self) -> None:
source, expected = read_data("../setup", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_DIR / ".." / "setup.py"))
@patch("black.dump_to_file", dump_to_stderr)
def test_function(self) -> None:
source, expected = read_data("function")