[blib2to3] Support non-ASCII identifiers
This support isn't *exactly* right per PEP 3131 as the regex engine is a bit too limited for that and I didn't want to spend time on Other_ID_Start and Other_ID_Continue unless they're actually needed. Hopefully this doesn't slow it down too much.
This commit is contained in:
parent
2e0bb0fa9f
commit
e36b8c71bb
@ -29,7 +29,7 @@
|
|||||||
__credits__ = \
|
__credits__ = \
|
||||||
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
|
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
|
||||||
|
|
||||||
import string, re
|
import string, re, unicodedata
|
||||||
from codecs import BOM_UTF8, lookup
|
from codecs import BOM_UTF8, lookup
|
||||||
from blib2to3.pgen2.token import *
|
from blib2to3.pgen2.token import *
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ def maybe(*choices): return group(*choices) + '?'
|
|||||||
Whitespace = r'[ \f\t]*'
|
Whitespace = r'[ \f\t]*'
|
||||||
Comment = r'#[^\r\n]*'
|
Comment = r'#[^\r\n]*'
|
||||||
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
||||||
Name = r'[a-zA-Z_]\w*'
|
Name = r'[^\d\W]\w*'
|
||||||
|
|
||||||
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
|
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
|
||||||
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
|
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
|
||||||
@ -103,8 +103,10 @@ def maybe(*choices): return group(*choices) + '?'
|
|||||||
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
|
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
|
||||||
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
|
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
|
||||||
|
|
||||||
tokenprog, pseudoprog, single3prog, double3prog = list(map(
|
tokenprog = re.compile(Token, re.UNICODE)
|
||||||
re.compile, (Token, PseudoToken, Single3, Double3)))
|
pseudoprog = re.compile(PseudoToken, re.UNICODE)
|
||||||
|
single3prog = re.compile(Single3)
|
||||||
|
double3prog = re.compile(Double3)
|
||||||
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
|
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
|
||||||
"'''": single3prog, '"""': double3prog,
|
"'''": single3prog, '"""': double3prog,
|
||||||
"r'''": single3prog, 'r"""': double3prog,
|
"r'''": single3prog, 'r"""': double3prog,
|
||||||
@ -358,6 +360,8 @@ def untokenize(iterable):
|
|||||||
ut = Untokenizer()
|
ut = Untokenizer()
|
||||||
return ut.untokenize(iterable)
|
return ut.untokenize(iterable)
|
||||||
|
|
||||||
|
InitialCategories = {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'}
|
||||||
|
|
||||||
def generate_tokens(readline):
|
def generate_tokens(readline):
|
||||||
"""
|
"""
|
||||||
The generate_tokens() generator requires one argument, readline, which
|
The generate_tokens() generator requires one argument, readline, which
|
||||||
@ -473,6 +477,8 @@ def generate_tokens(readline):
|
|||||||
|
|
||||||
while pos < max:
|
while pos < max:
|
||||||
pseudomatch = pseudoprog.match(line, pos)
|
pseudomatch = pseudoprog.match(line, pos)
|
||||||
|
if not pseudomatch:
|
||||||
|
print('no pseudomatch')
|
||||||
if pseudomatch: # scan for tokens
|
if pseudomatch: # scan for tokens
|
||||||
start, end = pseudomatch.span(1)
|
start, end = pseudomatch.span(1)
|
||||||
spos, epos, pos = (lnum, start), (lnum, end), end
|
spos, epos, pos = (lnum, start), (lnum, end), end
|
||||||
@ -528,7 +534,8 @@ def generate_tokens(readline):
|
|||||||
yield stashed
|
yield stashed
|
||||||
stashed = None
|
stashed = None
|
||||||
yield (STRING, token, spos, epos, line)
|
yield (STRING, token, spos, epos, line)
|
||||||
elif initial in namechars: # ordinary name
|
elif (initial in namechars or # ordinary name
|
||||||
|
unicodedata.category(initial) in InitialCategories):
|
||||||
if token in ('async', 'await'):
|
if token in ('async', 'await'):
|
||||||
if async_def:
|
if async_def:
|
||||||
yield (ASYNC if token == 'async' else AWAIT,
|
yield (ASYNC if token == 'async' else AWAIT,
|
||||||
|
@ -103,7 +103,7 @@
|
|||||||
]
|
]
|
||||||
slice[0]
|
slice[0]
|
||||||
slice[0:1]
|
slice[0:1]
|
||||||
@@ -114,71 +123,90 @@
|
@@ -114,73 +123,92 @@
|
||||||
numpy[-(c + 1):, d]
|
numpy[-(c + 1):, d]
|
||||||
numpy[:, l[-2]]
|
numpy[:, l[-2]]
|
||||||
numpy[:, ::-1]
|
numpy[:, ::-1]
|
||||||
@ -142,8 +142,10 @@
|
|||||||
+).order_by(
|
+).order_by(
|
||||||
+ models.Customer.id.asc()
|
+ models.Customer.id.asc()
|
||||||
+).all()
|
+).all()
|
||||||
+
|
Ø = set()
|
||||||
|
authors.łukasz.say_thanks()
|
||||||
|
|
||||||
|
+
|
||||||
def gen():
|
def gen():
|
||||||
yield from outside_of_generator
|
yield from outside_of_generator
|
||||||
+
|
+
|
||||||
@ -235,4 +237,3 @@
|
|||||||
+
|
+
|
||||||
last_call()
|
last_call()
|
||||||
# standalone comment at ENDMARKER
|
# standalone comment at ENDMARKER
|
||||||
|
|
||||||
|
@ -135,6 +135,8 @@
|
|||||||
what_is_up_with_those_new_coord_names = (coord_names + set(vars_to_create)) + set(vars_to_remove)
|
what_is_up_with_those_new_coord_names = (coord_names + set(vars_to_create)) + set(vars_to_remove)
|
||||||
what_is_up_with_those_new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove)
|
what_is_up_with_those_new_coord_names = (coord_names | set(vars_to_create)) - set(vars_to_remove)
|
||||||
result = session.query(models.Customer.id).filter(models.Customer.account_id == account_id, models.Customer.email == email_address).order_by(models.Customer.id.asc(),).all()
|
result = session.query(models.Customer.id).filter(models.Customer.account_id == account_id, models.Customer.email == email_address).order_by(models.Customer.id.asc(),).all()
|
||||||
|
Ø = set()
|
||||||
|
authors.łukasz.say_thanks()
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
yield from outside_of_generator
|
yield from outside_of_generator
|
||||||
@ -340,6 +342,8 @@ async def f():
|
|||||||
).order_by(
|
).order_by(
|
||||||
models.Customer.id.asc()
|
models.Customer.id.asc()
|
||||||
).all()
|
).all()
|
||||||
|
Ø = set()
|
||||||
|
authors.łukasz.say_thanks()
|
||||||
|
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
|
Loading…
Reference in New Issue
Block a user