Blacken .py files in blib2to3 (#1011)

* Blacken .py files in blib2to3

This is in preparation for adding type annotations to blib2to3 in
order to compiling it with mypyc (#1009, which I can rebase on top of
this).

To enforce that it stays blackened, I just cargo-culted the existing
test code used for validating formatting. It feels pretty clunky now,
though, so I can abstract the common logic out into a helper if that
seems better. (But error messages might be less clear then?)

* Tidy up the tests
This commit is contained in:
Michael J. Sullivan 2019-10-20 06:55:31 -07:00 committed by Łukasz Langa
parent 3bfb66971f
commit 0ff718e1e2
12 changed files with 457 additions and 319 deletions

View File

@ -1 +1 @@
#empty
# empty

View File

@ -70,8 +70,7 @@ def parse_graminit_h(self, filename):
lineno += 1
mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
if not mo and line.strip():
print("%s(%s): can't parse %s" % (filename, lineno,
line.strip()))
print("%s(%s): can't parse %s" % (filename, lineno, line.strip()))
else:
symbol, number = mo.groups()
number = int(number)
@ -118,39 +117,38 @@ def parse_graminit_c(self, filename):
lineno = 0
# Expect the two #include lines
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == '#include "pgenheaders.h"\n', (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == '#include "grammar.h"\n', (lineno, line)
# Parse the state definitions
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
allarcs = {}
states = []
while line.startswith("static arc "):
while line.startswith("static arc "):
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
line)
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
n, m, k = list(map(int, mo.groups()))
arcs = []
for _ in range(k):
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), (\d+)},$", line)
assert mo, (lineno, line)
i, j = list(map(int, mo.groups()))
arcs.append((i, j))
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
allarcs[(n, m)] = arcs
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
s, t = list(map(int, mo.groups()))
assert s == len(states), (lineno, line)
state = []
for _ in range(t):
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
assert mo, (lineno, line)
k, n, m = list(map(int, mo.groups()))
@ -158,9 +156,9 @@ def parse_graminit_c(self, filename):
assert k == len(arcs), (lineno, line)
state.append(arcs)
states.append(state)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
self.states = states
# Parse the dfas
@ -169,9 +167,8 @@ def parse_graminit_c(self, filename):
assert mo, (lineno, line)
ndfas = int(mo.group(1))
for i in range(ndfas):
lineno, line = lineno+1, next(f)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
line)
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
assert mo, (lineno, line)
symbol = mo.group(2)
number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
@ -180,7 +177,7 @@ def parse_graminit_c(self, filename):
assert x == 0, (lineno, line)
state = states[z]
assert y == len(state), (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
assert mo, (lineno, line)
first = {}
@ -188,21 +185,21 @@ def parse_graminit_c(self, filename):
for i, c in enumerate(rawbitset):
byte = ord(c)
for j in range(8):
if byte & (1<<j):
first[i*8 + j] = 1
if byte & (1 << j):
first[i * 8 + j] = 1
dfas[number] = (state, first)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
self.dfas = dfas
# Parse the labels
labels = []
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"static label labels\[(\d+)\] = {$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
for i in range(nlabels):
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
assert mo, (lineno, line)
x, y = mo.groups()
@ -212,35 +209,35 @@ def parse_graminit_c(self, filename):
else:
y = eval(y)
labels.append((x, y))
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
self.labels = labels
# Parse the grammar struct
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+(\d+),$", line)
assert mo, (lineno, line)
ndfas = int(mo.group(1))
assert ndfas == len(self.dfas)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "\tdfas,\n", (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), labels},$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
assert nlabels == len(self.labels), (lineno, line)
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+(\d+)$", line)
assert mo, (lineno, line)
start = int(mo.group(1))
assert start in self.number2symbol, (lineno, line)
self.start = start
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
try:
lineno, line = lineno+1, next(f)
lineno, line = lineno + 1, next(f)
except StopIteration:
pass
else:
@ -248,8 +245,8 @@ def parse_graminit_c(self, filename):
def finish_off(self):
"""Create additional useful structures. (Internal)."""
self.keywords = {} # map from keyword strings to arc labels
self.tokens = {} # map from numeric token values to arc labels
self.keywords = {} # map from keyword strings to arc labels
self.tokens = {} # map from numeric token values to arc labels
for ilabel, (type, value) in enumerate(self.labels):
if type == token.NAME and value is not None:
self.keywords[value] = ilabel

View File

@ -28,13 +28,7 @@
class Driver(object):
def __init__(
self,
grammar,
convert=None,
logger=None,
):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
@ -73,8 +67,9 @@ def parse_tokens(self, tokens, debug=False):
if type == token.OP:
type = grammar.opmap[value]
if debug:
self.logger.debug("%s %r (prefix=%r)",
token.tok_name[type], value, prefix)
self.logger.debug(
"%s %r (prefix=%r)", token.tok_name[type], value, prefix
)
if type == token.INDENT:
indent_columns.append(len(value))
_prefix = prefix + value
@ -96,8 +91,7 @@ def parse_tokens(self, tokens, debug=False):
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input",
type, value, (prefix, start))
raise parse.ParseError("incomplete input", type, value, (prefix, start))
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
@ -117,8 +111,7 @@ def parse_file(self, filename, encoding=None, debug=False):
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline,
grammar=self.grammar
io.StringIO(text).readline, grammar=self.grammar
)
return self.parse_tokens(tokens, debug)
@ -130,24 +123,24 @@ def _partially_consume_prefix(self, prefix, column):
for char in prefix:
current_line += char
if wait_for_nl:
if char == '\n':
if char == "\n":
if current_line.strip() and current_column < column:
res = ''.join(lines)
return res, prefix[len(res):]
res = "".join(lines)
return res, prefix[len(res) :]
lines.append(current_line)
current_line = ""
current_column = 0
wait_for_nl = False
elif char in ' \t':
elif char in " \t":
current_column += 1
elif char == '\n':
elif char == "\n":
# unexpected empty line
current_column = 0
else:
# indent is finished
wait_for_nl = True
return ''.join(lines), current_line
return "".join(lines), current_line
def _generate_pickle_name(gt, cache_dir=None):
@ -161,8 +154,7 @@ def _generate_pickle_name(gt, cache_dir=None):
return name
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger(__name__)
@ -219,11 +211,11 @@ def main(*args):
"""
if not args:
args = sys.argv[1:]
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
format='%(message)s')
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
for gt in args:
load_grammar(gt, save=True, force=True)
return True
if __name__ == "__main__":
sys.exit(int(not main()))

View File

@ -90,7 +90,9 @@ def __init__(self):
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
with tempfile.NamedTemporaryFile(dir=os.path.dirname(filename), delete=False) as f:
with tempfile.NamedTemporaryFile(
dir=os.path.dirname(filename), delete=False
) as f:
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
os.replace(f.name, filename)
@ -109,8 +111,14 @@ def copy(self):
Copy the grammar.
"""
new = self.__class__()
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
"tokens", "symbol2label"):
for dict_attr in (
"symbol2number",
"number2symbol",
"dfas",
"keywords",
"tokens",
"symbol2label",
):
setattr(new, dict_attr, getattr(self, dict_attr).copy())
new.labels = self.labels[:]
new.states = self.states[:]
@ -121,6 +129,7 @@ def copy(self):
def report(self):
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print("s2n")
pprint(self.symbol2number)
print("n2s")

View File

@ -5,16 +5,19 @@
import regex as re
simple_escapes = {"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\"}
simple_escapes = {
"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\",
}
def escape(m):
all, tail = m.group(0, 1)
@ -37,16 +40,18 @@ def escape(m):
raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
return chr(i)
def evalString(s):
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
if s[:3] == q*3:
q = q*3
assert s.endswith(q), repr(s[-len(q):])
assert len(s) >= 2*len(q)
s = s[len(q):-len(q)]
if s[:3] == q * 3:
q = q * 3
assert s.endswith(q), repr(s[-len(q) :])
assert len(s) >= 2 * len(q)
s = s[len(q) : -len(q)]
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
def test():
for i in range(256):
c = chr(i)

View File

@ -13,17 +13,20 @@
# Local imports
from . import token
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
Exception.__init__(
self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
)
self.msg = msg
self.type = type
self.value = value
self.context = context
class Parser(object):
"""Parser engine.
@ -108,7 +111,7 @@ def setup(self, start=None):
stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack = [stackentry]
self.rootnode = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
def addtoken(self, type, value, context):
"""Add a token; return True iff this is the end of the program."""
@ -145,15 +148,14 @@ def addtoken(self, type, value, context):
if ilabel in itsfirst:
# Push a symbol
self.push(t, self.grammar.dfas[t], newstate, context)
break # To continue the outer while loop
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
raise ParseError("too much input", type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)

View File

@ -4,11 +4,12 @@
# Pgen imports
from . import grammar, token, tokenize
class PgenGrammar(grammar.Grammar):
pass
class ParserGenerator(object):
class ParserGenerator(object):
def __init__(self, filename, stream=None):
close_stream = None
if stream is None:
@ -17,11 +18,11 @@ def __init__(self, filename, stream=None):
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.gettoken() # Initialize lookahead
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
if close_stream is not None:
close_stream()
self.first = {} # map from symbol name to set of tokens
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()
def make_grammar(self):
@ -96,7 +97,7 @@ def make_label(self, c, label):
return ilabel
else:
# An operator (any non-numeric token)
itoken = grammar.opmap[value] # Fails if unknown token
itoken = grammar.opmap[value] # Fails if unknown token
if itoken in c.tokens:
return c.tokens[itoken]
else:
@ -110,11 +111,11 @@ def addfirstsets(self):
for name in names:
if name not in self.first:
self.calcfirst(name)
#print name, self.first[name].keys()
# print name, self.first[name].keys()
def calcfirst(self, name):
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
self.first[name] = None # dummy to detect left recursion
state = dfa[0]
totalset = {}
overlapcheck = {}
@ -136,9 +137,11 @@ def calcfirst(self, name):
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError("rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s" %
(name, symbol, label, inverse[symbol]))
raise ValueError(
"rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s"
% (name, symbol, label, inverse[symbol])
)
inverse[symbol] = label
self.first[name] = totalset
@ -154,14 +157,14 @@ def parse(self):
self.expect(token.OP, ":")
a, z = self.parse_rhs()
self.expect(token.NEWLINE)
#self.dump_nfa(name, a, z)
# self.dump_nfa(name, a, z)
dfa = self.make_dfa(a, z)
#self.dump_dfa(name, dfa)
# self.dump_dfa(name, dfa)
oldlen = len(dfa)
self.simplify_dfa(dfa)
newlen = len(dfa)
dfas[name] = dfa
#print name, oldlen, newlen
# print name, oldlen, newlen
if startsymbol is None:
startsymbol = name
return dfas, startsymbol
@ -173,10 +176,12 @@ def make_dfa(self, start, finish):
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state):
base = {}
addclosure(state, base)
return base
def addclosure(state, base):
assert isinstance(state, NFAState)
if state in base:
@ -185,8 +190,9 @@ def addclosure(state, base):
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
for state in states: # NB states grows while we're iterating
arcs = {}
for nfastate in state.nfaset:
for label, next in nfastate.arcs:
@ -200,7 +206,7 @@ def addclosure(state, base):
st = DFAState(nfaset, finish)
states.append(st)
state.addarc(st, label)
return states # List of DFAState instances; first one is start
return states # List of DFAState instances; first one is start
def dump_nfa(self, name, start, finish):
print("Dump of NFA for", name)
@ -236,10 +242,10 @@ def simplify_dfa(self, dfa):
while changes:
changes = False
for i, state_i in enumerate(dfa):
for j in range(i+1, len(dfa)):
for j in range(i + 1, len(dfa)):
state_j = dfa[j]
if state_i == state_j:
#print " unify", i, j
# print " unify", i, j
del dfa[j]
for state in dfa:
state.unifystate(state_j, state_i)
@ -266,8 +272,7 @@ def parse_rhs(self):
def parse_alt(self):
# ALT: ITEM+
a, b = self.parse_item()
while (self.value in ("(", "[") or
self.type in (token.NAME, token.STRING)):
while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
c, d = self.parse_item()
b.addarc(c)
b = d
@ -307,13 +312,15 @@ def parse_atom(self):
self.gettoken()
return a, z
else:
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
self.raise_error(
"expected (...) or NAME or STRING, got %s/%s", self.type, self.value
)
def expect(self, type, value=None):
if self.type != type or (value is not None and self.value != value):
self.raise_error("expected %s/%s, got %s/%s",
type, value, self.type, self.value)
self.raise_error(
"expected %s/%s, got %s/%s", type, value, self.type, self.value
)
value = self.value
self.gettoken()
return value
@ -323,7 +330,7 @@ def gettoken(self):
while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup
#print token.tok_name[self.type], repr(self.value)
# print token.tok_name[self.type], repr(self.value)
def raise_error(self, msg, *args):
if args:
@ -331,28 +338,27 @@ def raise_error(self, msg, *args):
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line))
raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
class NFAState(object):
def __init__(self):
self.arcs = [] # list of (label, NFAState) pairs
self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next, label=None):
assert label is None or isinstance(label, str)
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState(object):
class DFAState(object):
def __init__(self, nfaset, final):
assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState)
assert isinstance(final, NFAState)
self.nfaset = nfaset
self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState
self.arcs = {} # map from label to DFAState
def addarc(self, next, label):
assert isinstance(label, str)
@ -379,7 +385,8 @@ def __eq__(self, other):
return False
return True
__hash__ = None # For Py3 compatibility.
__hash__ = None # For Py3 compatibility.
def generate_grammar(filename="Grammar.txt"):
p = ParserGenerator(filename)

View File

@ -3,7 +3,7 @@
# Taken from Python (r53757) and modified to include some tokens
# originally monkeypatched in by pgen2.tokenize
#--start constants--
# --start constants--
ENDMARKER = 0
NAME = 1
NUMBER = 2
@ -66,7 +66,7 @@
COLONEQUAL = 59
N_TOKENS = 60
NT_OFFSET = 256
#--end constants--
# --end constants--
tok_name = {}
for _name, _value in list(globals().items()):
@ -77,8 +77,10 @@
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER

View File

@ -25,17 +25,20 @@
function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found."""
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro"
import regex as re
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
from . import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
"generate_tokens", "untokenize"]
__all__ = [x for x in dir(token) if x[0] != "_"] + [
"tokenize",
"generate_tokens",
"untokenize",
]
del token
try:
@ -45,29 +48,40 @@
# valid Python 3 code.
bytes = str
def group(*choices): return '(' + '|'.join(choices) + ')'
def any(*choices): return group(*choices) + '*'
def maybe(*choices): return group(*choices) + '?'
def group(*choices):
return "(" + "|".join(choices) + ")"
def any(*choices):
return group(*choices) + "*"
def maybe(*choices):
return group(*choices) + "?"
def _combinations(*l):
return set(
x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()
)
return set(x + y for x in l for y in l + ("",) if x.casefold() != y.casefold())
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'\w+' # this is invalid but it's fine because Name comes after Number in all groups
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
Whitespace = r"[ \f\t]*"
Comment = r"#[^\r\n]*"
Ignore = Whitespace + any(r"\\\r?\n" + Whitespace) + maybe(Comment)
Name = r"\w+" # this is invalid but it's fine because Name comes after Number in all groups
Binnumber = r"0[bB]_?[01]+(?:_[01]+)*"
Hexnumber = r"0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?"
Octnumber = r"0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?"
Decnumber = group(r"[1-9]\d*(?:_\d+)*[lL]?", "0[lL]?")
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?\d+(?:_\d+)*'
Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
Expfloat = r'\d+(?:_\d+)*' + Exponent
Exponent = r"[eE][-+]?\d+(?:_\d+)*"
Pointfloat = group(r"\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?", r"\.\d+(?:_\d+)*") + maybe(
Exponent
)
Expfloat = r"\d+(?:_\d+)*" + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
Imagnumber = group(r"\d+(?:_\d+)*[jJ]", Floatnumber + r"[jJ]")
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
@ -81,30 +95,39 @@ def _combinations(*l):
_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
Triple = group(_litprefix + "'''", _litprefix + '"""')
# Single-line ' or " string.
String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
String = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
)
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
r"//=?", r"->",
r"[+\-*/%&@|^=<>:]=?",
r"~")
Operator = group(
r"\*\*=?",
r">>=?",
r"<<=?",
r"<>",
r"!=",
r"//=?",
r"->",
r"[+\-*/%&@|^=<>:]=?",
r"~",
)
Bracket = '[][(){}]'
Special = group(r'\r?\n', r'[:;.,`@]')
Bracket = "[][(){}]"
Special = group(r"\r?\n", r"[:;.,`@]")
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
ContStr = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
)
PseudoExtras = group(r"\\\r?\n", Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
tokenprog = re.compile(Token, re.UNICODE)
@ -113,39 +136,50 @@ def _combinations(*l):
double3prog = re.compile(Double3)
_strprefixes = (
_combinations('r', 'R', 'f', 'F') |
_combinations('r', 'R', 'b', 'B') |
{'u', 'U', 'ur', 'uR', 'Ur', 'UR'}
_combinations("r", "R", "f", "F")
| _combinations("r", "R", "b", "B")
| {"u", "U", "ur", "uR", "Ur", "UR"}
)
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
**{f"{prefix}'''": single3prog for prefix in _strprefixes},
**{f'{prefix}"""': double3prog for prefix in _strprefixes},
**{prefix: None for prefix in _strprefixes}}
endprogs = {
"'": re.compile(Single),
'"': re.compile(Double),
"'''": single3prog,
'"""': double3prog,
**{f"{prefix}'''": single3prog for prefix in _strprefixes},
**{f'{prefix}"""': double3prog for prefix in _strprefixes},
**{prefix: None for prefix in _strprefixes},
}
triple_quoted = (
{"'''", '"""'} |
{f"{prefix}'''" for prefix in _strprefixes} |
{f'{prefix}"""' for prefix in _strprefixes}
{"'''", '"""'}
| {f"{prefix}'''" for prefix in _strprefixes}
| {f'{prefix}"""' for prefix in _strprefixes}
)
single_quoted = (
{"'", '"'} |
{f"{prefix}'" for prefix in _strprefixes} |
{f'{prefix}"' for prefix in _strprefixes}
{"'", '"'}
| {f"{prefix}'" for prefix in _strprefixes}
| {f'{prefix}"' for prefix in _strprefixes}
)
tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
class TokenError(Exception):
pass
def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
class StopTokenizing(Exception):
pass
def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
(srow, scol) = xxx_todo_changeme
(erow, ecol) = xxx_todo_changeme1
print("%d,%d-%d,%d:\t%s\t%s" % \
(srow, scol, erow, ecol, tok_name[type], repr(token)))
print(
"%d,%d-%d,%d:\t%s\t%s" % (srow, scol, erow, ecol, tok_name[type], repr(token))
)
def tokenize(readline, tokeneater=printtoken):
"""
@ -165,13 +199,14 @@ def tokenize(readline, tokeneater=printtoken):
except StopTokenizing:
pass
# backwards compatible interface
def tokenize_loop(readline, tokeneater):
for token_info in generate_tokens(readline):
tokeneater(*token_info)
class Untokenizer:
class Untokenizer:
def __init__(self):
self.tokens = []
self.prev_row = 1
@ -204,14 +239,14 @@ def compat(self, token, iterable):
toks_append = self.tokens.append
toknum, tokval = token
if toknum in (NAME, NUMBER):
tokval += ' '
tokval += " "
if toknum in (NEWLINE, NL):
startline = True
for tok in iterable:
toknum, tokval = tok[:2]
if toknum in (NAME, NUMBER, ASYNC, AWAIT):
tokval += ' '
tokval += " "
if toknum == INDENT:
indents.append(tokval)
@ -226,8 +261,10 @@ def compat(self, token, iterable):
startline = False
toks_append(tokval)
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
@ -235,11 +272,13 @@ def _get_normal_name(orig_enc):
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or enc.startswith(
("latin-1-", "iso-8859-1-", "iso-latin-1-")
):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
@ -260,7 +299,8 @@ def detect_encoding(readline):
"""
bom_found = False
encoding = None
default = 'utf-8'
default = "utf-8"
def read_or_stop():
try:
return readline()
@ -269,7 +309,7 @@ def read_or_stop():
def find_cookie(line):
try:
line_string = line.decode('ascii')
line_string = line.decode("ascii")
except UnicodeDecodeError:
return None
match = cookie_re.match(line_string)
@ -283,17 +323,17 @@ def find_cookie(line):
raise SyntaxError("unknown encoding: " + encoding)
if bom_found:
if codec.name != 'utf-8':
if codec.name != "utf-8":
# This behaviour mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
encoding += '-sig'
raise SyntaxError("encoding problem: utf-8")
encoding += "-sig"
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
default = "utf-8-sig"
if not first:
return default, []
@ -313,6 +353,7 @@ def find_cookie(line):
return default, [first, second]
def untokenize(iterable):
"""Transform tokens back into Python source code.
@ -334,6 +375,7 @@ def untokenize(iterable):
ut = Untokenizer()
return ut.untokenize(iterable)
def generate_tokens(readline, grammar=None):
"""
The generate_tokens() generator requires one argument, readline, which
@ -351,8 +393,8 @@ def generate_tokens(readline, grammar=None):
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
numchars = '0123456789'
contstr, needcont = '', 0
numchars = "0123456789"
contstr, needcont = "", 0
contline = None
indents = [0]
@ -365,28 +407,38 @@ def generate_tokens(readline, grammar=None):
async_def_indent = 0
async_def_nl = False
while 1: # loop over lines in stream
while 1: # loop over lines in stream
try:
line = readline()
except StopIteration:
line = ''
line = ""
lnum = lnum + 1
pos, max = 0, len(line)
if contstr: # continued string
if contstr: # continued string
if not line:
raise TokenError("EOF in multi-line string", strstart)
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield (STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
yield (
STRING,
contstr + line[:end],
strstart,
(lnum, end),
contline + line,
)
contstr, needcont = "", 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
yield (ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
yield (
ERRORTOKEN,
contstr + line,
strstart,
(lnum, len(line)),
contline,
)
contstr = ""
contline = None
continue
else:
@ -395,42 +447,53 @@ def generate_tokens(readline, grammar=None):
continue
elif parenlev == 0 and not continued: # new statement
if not line: break
if not line:
break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == ' ': column = column + 1
elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
elif line[pos] == '\f': column = 0
else: break
while pos < max: # measure leading whitespace
if line[pos] == " ":
column = column + 1
elif line[pos] == "\t":
column = (column // tabsize + 1) * tabsize
elif line[pos] == "\f":
column = 0
else:
break
pos = pos + 1
if pos == max: break
if pos == max:
break
if stashed:
yield stashed
stashed = None
if line[pos] in '\r\n': # skip blank lines
if line[pos] in "\r\n": # skip blank lines
yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line)
continue
if line[pos] == '#': # skip comments
comment_token = line[pos:].rstrip('\r\n')
if line[pos] == "#": # skip comments
comment_token = line[pos:].rstrip("\r\n")
nl_pos = pos + len(comment_token)
yield (COMMENT, comment_token,
(lnum, pos), (lnum, pos + len(comment_token)), line)
yield (NL, line[nl_pos:],
(lnum, nl_pos), (lnum, len(line)), line)
yield (
COMMENT,
comment_token,
(lnum, pos),
(lnum, pos + len(comment_token)),
line,
)
yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents
if column > indents[-1]: # count indents
indents.append(column)
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]: # count dedents
while column < indents[-1]: # count dedents
if column not in indents:
raise IndentationError(
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
("<tokenize>", lnum, pos, line),
)
indents = indents[:-1]
if async_def and async_def_indent >= indents[-1]:
@ -438,29 +501,30 @@ def generate_tokens(readline, grammar=None):
async_def_nl = False
async_def_indent = 0
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
yield (DEDENT, "", (lnum, pos), (lnum, pos), line)
if async_def and async_def_nl and async_def_indent >= indents[-1]:
async_def = False
async_def_nl = False
async_def_indent = 0
else: # continued statement
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
continued = 0
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
if pseudomatch: # scan for tokens
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
if initial in numchars or (
initial == "." and token != "."
): # ordinary number
yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
elif initial in "\r\n":
newline = NEWLINE
if parenlev > 0:
newline = NL
@ -471,7 +535,7 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield (newline, token, spos, epos, line)
elif initial == '#':
elif initial == "#":
assert not token.endswith("\n")
if stashed:
yield stashed
@ -480,7 +544,7 @@ def generate_tokens(readline, grammar=None):
elif token in triple_quoted:
endprog = endprogs[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
if stashed:
@ -488,49 +552,61 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield (STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
strstart = (lnum, start) # multiple lines
contstr = line[start:]
contline = line
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
elif (
initial in single_quoted
or token[:2] in single_quoted
or token[:3] in single_quoted
):
if token[-1] == "\n": # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or
endprogs[token[2]])
endprog = (
endprogs[initial]
or endprogs[token[1]]
or endprogs[token[2]]
)
contstr, needcont = line[start:], 1
contline = line
break
else: # ordinary string
else: # ordinary string
if stashed:
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
elif initial.isidentifier(): # ordinary name
if token in ("async", "await"):
if async_keywords or async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
yield (
ASYNC if token == "async" else AWAIT,
token,
spos,
epos,
line,
)
continue
tok = (NAME, token, spos, epos, line)
if token == 'async' and not stashed:
if token == "async" and not stashed:
stashed = tok
continue
if token in ('def', 'for'):
if (stashed
and stashed[0] == NAME
and stashed[1] == 'async'):
if token in ("def", "for"):
if stashed and stashed[0] == NAME and stashed[1] == "async":
if token == 'def':
if token == "def":
async_def = True
async_def_indent = indents[-1]
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
stashed[4])
yield (
ASYNC,
stashed[1],
stashed[2],
stashed[3],
stashed[4],
)
stashed = None
if stashed:
@ -538,7 +614,7 @@ def generate_tokens(readline, grammar=None):
stashed = None
yield tok
elif initial == '\\': # continued stmt
elif initial == "\\": # continued stmt
# This yield is new; needed for better idempotency:
if stashed:
yield stashed
@ -546,26 +622,31 @@ def generate_tokens(readline, grammar=None):
yield (NL, token, spos, (lnum, pos), line)
continued = 1
else:
if initial in '([{': parenlev = parenlev + 1
elif initial in ')]}': parenlev = parenlev - 1
if initial in "([{":
parenlev = parenlev + 1
elif initial in ")]}":
parenlev = parenlev - 1
if stashed:
yield stashed
stashed = None
yield (OP, token, spos, epos, line)
else:
yield (ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
pos = pos + 1
if stashed:
yield stashed
stashed = None
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
if __name__ == '__main__': # testing
if __name__ == "__main__": # testing
import sys
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
else: tokenize(sys.stdin.readline)
if len(sys.argv) > 1:
tokenize(open(sys.argv[1]).readline)
else:
tokenize(sys.stdin.readline)

View File

@ -12,12 +12,10 @@
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
"PatternGrammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt")
class Symbols(object):
def __init__(self, grammar):
"""Initializer.
@ -38,8 +36,7 @@ def initialize(cache_dir=None):
global pattern_symbols
# Python 2
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE,
cache_dir)
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
python_symbols = Symbols(python_grammar)
@ -56,8 +53,11 @@ def initialize(cache_dir=None):
python_grammar_no_print_statement_no_exec_statement_async_keywords = (
python_grammar_no_print_statement_no_exec_statement.copy()
)
python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = (
True
)
pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
cache_dir)
pattern_grammar = driver.load_packaged_grammar(
"blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
)
pattern_symbols = Symbols(pattern_grammar)

View File

@ -18,16 +18,21 @@
HUGE = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs = {}
def type_repr(type_num):
global _type_reprs
if not _type_reprs:
from .pygram import python_symbols
# printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items():
for name, val in python_symbols.__dict__.items():
if type(val) == int: _type_reprs[val] = name
if type(val) == int:
_type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
class Base(object):
"""
@ -40,7 +45,7 @@ class Base(object):
"""
# Default values for instance variables
type = None # int: token number (< 256) or symbol number (>= 256)
type = None # int: token number (< 256) or symbol number (>= 256)
parent = None # Parent node pointer, or None
children = () # Tuple of subnodes
was_changed = False
@ -61,7 +66,7 @@ def __eq__(self, other):
return NotImplemented
return self._eq(other)
__hash__ = None # For Py3 compatibility.
__hash__ = None # For Py3 compatibility.
def _eq(self, other):
"""
@ -198,17 +203,16 @@ def get_suffix(self):
return next_sib.prefix
if sys.version_info < (3, 0):
def __str__(self):
return str(self).encode("ascii")
class Node(Base):
"""Concrete implementation for interior nodes."""
def __init__(self,type, children,
context=None,
prefix=None,
fixers_applied=None):
def __init__(self, type, children, context=None, prefix=None, fixers_applied=None):
"""
Initializer.
@ -233,9 +237,11 @@ def __init__(self,type, children,
def __repr__(self):
"""Return a canonical string representation."""
return "%s(%s, %r)" % (self.__class__.__name__,
type_repr(self.type),
self.children)
return "%s(%s, %r)" % (
self.__class__.__name__,
type_repr(self.type),
self.children,
)
def __unicode__(self):
"""
@ -254,8 +260,11 @@ def _eq(self, other):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Node(self.type, [ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied)
return Node(
self.type,
[ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied,
)
def post_order(self):
"""Return a post-order iterator for the tree."""
@ -328,19 +337,17 @@ def update_sibling_maps(self):
previous = current
_next[id(current)] = None
class Leaf(Base):
"""Concrete implementation for leaf nodes."""
# Default values for instance variables
_prefix = "" # Whitespace and comments preceding this token in the input
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token starts in the input
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token starts in the input
def __init__(self, type, value,
context=None,
prefix=None,
fixers_applied=[]):
def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
"""
Initializer.
@ -359,9 +366,12 @@ def __init__(self, type, value,
def __repr__(self):
"""Return a canonical string representation."""
from .pgen2.token import tok_name
return "%s(%s, %r)" % (self.__class__.__name__,
tok_name.get(self.type, self.type),
self.value)
return "%s(%s, %r)" % (
self.__class__.__name__,
tok_name.get(self.type, self.type),
self.value,
)
def __unicode__(self):
"""
@ -380,9 +390,12 @@ def _eq(self, other):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Leaf(self.type, self.value,
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied)
return Leaf(
self.type,
self.value,
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied,
)
def leaves(self):
yield self
@ -407,6 +420,7 @@ def prefix(self, prefix):
self.changed()
self._prefix = prefix
def convert(gr, raw_node):
"""
Convert raw node information to a Node or Leaf instance.
@ -443,9 +457,9 @@ class BasePattern(object):
"""
# Defaults for instance variables
type = None # Node type (token if < 256, symbol if >= 256)
type = None # Node type (token if < 256, symbol if >= 256)
content = None # Optional content matching pattern
name = None # Optional name used to store match in results dict
name = None # Optional name used to store match in results dict
def __new__(cls, *args, **kwds):
"""Constructor that prevents BasePattern from being instantiated."""
@ -513,7 +527,6 @@ def generate_matches(self, nodes):
class LeafPattern(BasePattern):
def __init__(self, type=None, content=None, name=None):
"""
Initializer. Takes optional type, content, and name.
@ -660,7 +673,7 @@ def __init__(self, content=None, min=0, max=HUGE, name=None):
# Check sanity of alternatives
assert len(content), repr(content) # Can't have zero alternatives
for alt in content:
assert len(alt), repr(alt) # Can have empty alternatives
assert len(alt), repr(alt) # Can have empty alternatives
self.content = content
self.min = min
self.max = max
@ -669,20 +682,29 @@ def __init__(self, content=None, min=0, max=HUGE, name=None):
def optimize(self):
"""Optimize certain stacked wildcard patterns."""
subpattern = None
if (self.content is not None and
len(self.content) == 1 and len(self.content[0]) == 1):
if (
self.content is not None
and len(self.content) == 1
and len(self.content[0]) == 1
):
subpattern = self.content[0][0]
if self.min == 1 and self.max == 1:
if self.content is None:
return NodePattern(name=self.name)
if subpattern is not None and self.name == subpattern.name:
if subpattern is not None and self.name == subpattern.name:
return subpattern.optimize()
if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
subpattern.min <= 1 and self.name == subpattern.name):
return WildcardPattern(subpattern.content,
self.min*subpattern.min,
self.max*subpattern.max,
subpattern.name)
if (
self.min <= 1
and isinstance(subpattern, WildcardPattern)
and subpattern.min <= 1
and self.name == subpattern.name
):
return WildcardPattern(
subpattern.content,
self.min * subpattern.min,
self.max * subpattern.max,
subpattern.name,
)
return self
def match(self, node, results=None):
@ -798,7 +820,7 @@ def _recursive_matches(self, nodes, count):
if count < self.max:
for alt in self.content:
for c0, r0 in generate_matches(alt, nodes):
for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
r = {}
r.update(r0)
r.update(r1)
@ -806,7 +828,6 @@ def _recursive_matches(self, nodes, count):
class NegatedPattern(BasePattern):
def __init__(self, content=None):
"""
Initializer.

View File

@ -158,6 +158,16 @@ def invokeBlack(
result = runner.invoke(black.main, args)
self.assertEqual(result.exit_code, exit_code, msg=runner.stderr_bytes.decode())
@patch("black.dump_to_file", dump_to_stderr)
def checkSourceFile(self, name: str) -> None:
path = THIS_DIR.parent / name
source, expected = read_data(str(path), data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(path))
@patch("black.dump_to_file", dump_to_stderr)
def test_empty(self) -> None:
source = expected = ""
@ -177,23 +187,44 @@ def test_empty_ff(self) -> None:
os.unlink(tmp_file)
self.assertFormatEqual(expected, actual)
@patch("black.dump_to_file", dump_to_stderr)
def test_self(self) -> None:
source, expected = read_data("test_black", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_FILE))
self.checkSourceFile("tests/test_black.py")
@patch("black.dump_to_file", dump_to_stderr)
def test_black(self) -> None:
source, expected = read_data("../black", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_DIR / ".." / "black.py"))
self.checkSourceFile("black.py")
def test_pygram(self) -> None:
self.checkSourceFile("blib2to3/pygram.py")
def test_pytree(self) -> None:
self.checkSourceFile("blib2to3/pytree.py")
def test_conv(self) -> None:
self.checkSourceFile("blib2to3/pgen2/conv.py")
def test_driver(self) -> None:
self.checkSourceFile("blib2to3/pgen2/driver.py")
def test_grammar(self) -> None:
self.checkSourceFile("blib2to3/pgen2/grammar.py")
def test_literals(self) -> None:
self.checkSourceFile("blib2to3/pgen2/literals.py")
def test_parse(self) -> None:
self.checkSourceFile("blib2to3/pgen2/parse.py")
def test_pgen(self) -> None:
self.checkSourceFile("blib2to3/pgen2/pgen.py")
def test_tokenize(self) -> None:
self.checkSourceFile("blib2to3/pgen2/tokenize.py")
def test_token(self) -> None:
self.checkSourceFile("blib2to3/pgen2/token.py")
def test_setup(self) -> None:
self.checkSourceFile("setup.py")
def test_piping(self) -> None:
source, expected = read_data("../black", data=False)
@ -230,15 +261,6 @@ def test_piping_diff(self) -> None:
actual = actual.rstrip() + "\n" # the diff output has a trailing space
self.assertEqual(expected, actual)
@patch("black.dump_to_file", dump_to_stderr)
def test_setup(self) -> None:
source, expected = read_data("../setup", data=False)
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
self.assertFalse(ff(THIS_DIR / ".." / "setup.py"))
@patch("black.dump_to_file", dump_to_stderr)
def test_function(self) -> None:
source, expected = read_data("function")