[blib2to3] Support non-ASCII identifiers

This support isn't *exactly* right per PEP 3131 as the regex engine is a bit
too limited for that and I didn't want to spend time on Other_ID_Start and
Other_ID_Continue unless they're actually needed.

Hopefully this doesn't slow it down too much.
This commit is contained in:
Łukasz Langa 2018-04-04 21:38:25 -07:00
parent 2e0bb0fa9f
commit e36b8c71bb
3 changed files with 20 additions and 8 deletions

View File

@ -29,7 +29,7 @@
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import string, re
import string, re, unicodedata
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
@ -52,7 +52,7 @@ def maybe(*choices): return group(*choices) + '?'
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*'
Name = r'[^\d\W]\w*'
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
@ -103,8 +103,10 @@ def maybe(*choices): return group(*choices) + '?'
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
tokenprog, pseudoprog, single3prog, double3prog = list(map(
re.compile, (Token, PseudoToken, Single3, Double3)))
tokenprog = re.compile(Token, re.UNICODE)
pseudoprog = re.compile(PseudoToken, re.UNICODE)
single3prog = re.compile(Single3)
double3prog = re.compile(Double3)
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog,
@ -358,6 +360,8 @@ def untokenize(iterable):
ut = Untokenizer()
return ut.untokenize(iterable)
InitialCategories = {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'}
def generate_tokens(readline):
"""
The generate_tokens() generator requires one argument, readline, which
@ -473,6 +477,8 @@ def generate_tokens(readline):
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
if not pseudomatch:
print('no pseudomatch')
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
@ -528,7 +534,8 @@ def generate_tokens(readline):
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
elif (initial in namechars or # ordinary name
unicodedata.category(initial) in InitialCategories):
if token in ('async', 'await'):
if async_def:
yield (ASYNC if token == 'async' else AWAIT,

View File

@ -103,7 +103,7 @@
]
slice[0]
slice[0:1]
@@ -114,71 +123,90 @@
@@ -114,73 +123,92 @@
numpy[-(c + 1):, d]
numpy[:, l[-2]]
numpy[:, ::-1]
@ -142,8 +142,10 @@
+).order_by(
+ models.Customer.id.asc()
+).all()
+
Ø = set()
authors.łukasz.say_thanks()
+
def gen():
yield from outside_of_generator
+
@ -235,4 +237,3 @@
+
last_call()
# standalone comment at ENDMARKER

View File

@ -135,6 +135,8 @@
what_is_up_with_those_new_coord_names = (coord_names + set(vars_to_create)) + set(vars_to_remove)
what_is_up_with_those_new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove)
result = session.query(models.Customer.id).filter(models.Customer.account_id == account_id, models.Customer.email == email_address).order_by(models.Customer.id.asc(),).all()
Ø = set()
authors.łukasz.say_thanks()
def gen():
yield from outside_of_generator
@ -340,6 +342,8 @@ async def f():
).order_by(
models.Customer.id.asc()
).all()
Ø = set()
authors.łukasz.say_thanks()
def gen():