#455 Fix bug with tricky unicode symbols (#1047)

* add test for special unicode symbol which usual re can not process correctly
add regex lib which supports unicode 12.1.0 standard
replace re usage in project in favor to regex

* #455 fix dependency
This commit is contained in:
Andrey 2019-10-13 20:21:15 +03:00 committed by Zsolt Dollenstein
parent faaa2c8d59
commit 6aef6c9d45
11 changed files with 100 additions and 73 deletions

View File

@ -12,6 +12,7 @@ toml = ">=0.9.4"
black = {path = ".",extras = ["d"],editable = true} black = {path = ".",extras = ["d"],editable = true}
aiohttp-cors = "*" aiohttp-cors = "*"
typed-ast = ">=1.3.1" typed-ast = ">=1.3.1"
regex = "*"
[dev-packages] [dev-packages]
pre-commit = "*" pre-commit = "*"

142
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "1e7537ef8102c7c4b5746b53247645a75fca24db7e0d94721fdcc8a62eb8a090" "sha256": "5cceced346048c294218b3ecc9a550fd7667656d7115114cc5e7d3be18b40818"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -16,23 +16,22 @@
"default": { "default": {
"aiohttp": { "aiohttp": {
"hashes": [ "hashes": [
"sha256:1ab7ab0a710135133dcc2980dd48fdd92f6f6066b66ef0356f458f395aa375af", "sha256:022c400e30848b1994236e31fb38db1dc4b551efe049f737cbac690ab2cdf5c4",
"sha256:1cf5b433a0aa3cf45b0acd4adb14cb20d99166aaa967ab89f629635ac263ca64", "sha256:10f9316ef068536dec0b9f09531fa1cb6bfa8394f278022cb96e789c77811ad2",
"sha256:27b2bc8ca5555d5dadeee07cc2d6f8c06092c9d9c3f203c79c124d07474d3cf8", "sha256:2599b93fd5ba1120b3bd1366d67a7e26bd45b3d5d5548069e00b2fbef7f20ab0",
"sha256:315f55a8469284f3ee54534d76f525b5c104dc514999dca4a007524a458aaba2", "sha256:2a1c71e7fb8c50e60fb4c9bab8bd5cf7c07f91a6b27dc2556d7354cd2ebb3689",
"sha256:4f3c1572716ce2c8f22877a8185414ec213c057df35d27f7195f185691828608", "sha256:6a19d34cc01414d94dd5a4466f8f397293fcb8929df8eeb8989119cc5ef928bb",
"sha256:635bef0626e28446372511e1fd31585205db2f18dab37a43d8adb30b0483e1bf", "sha256:7aab39c2a61a5c6b15bb7e561218ef64770ca1fbf4cc1878c96e630e2b7cc3cc",
"sha256:6907359de725e7ccd04b458a0f3322c7d1ba78df3df02e2ceb5abb0e21c975e6", "sha256:8959e28bc1b87542b0ee4a8302128f633bee296252f261bf03e118c4dff725f0",
"sha256:772cfc0ff7c088d9e211377951a51c8a5173110cf56214f3e3d08a89be07badc", "sha256:89820f7c488f4e9b1f74371da33403181e11e006663ddf074317aacd690838a6",
"sha256:a91251585acf5203842551e37d2700c13c0bb411fa61b13485ab9e8d2dd400e9", "sha256:ab761cf0f0b0b90887e276b4a7918f11e323f2228bbb30814bbd538c122028bf",
"sha256:acbbf0c47aa713d7a4baf52f11a356b01b82cabb53da452328546acaa21c6605", "sha256:cc648ecaca79e37c6e26f370e802e7ae640a069913f661f66c0421084bef219a",
"sha256:af7809ce7de6709afc7770403a70dfdbc4e988c91451108c8e123fac46b870d9", "sha256:d6f26e80cd55ac88e1f0397fc8d547933225a5dc1add040e27788c2a028c64c6",
"sha256:de611d7b95c1067d9a415979c63503dbdc735b943d08779506886614b410644a", "sha256:e7d6ae4a36bfe6d7f93c6f42a0bfa1659f7d011006cb6e8207c85ef5acdb2986",
"sha256:e0fe698d1e6a852a27a88d2844a1a63839ee764d7cf214fd58cbea480407cc1d", "sha256:fc55b1fec0e4cc1134ffb09ea3970783ee2906dc5dfd7cd16917913f2cfed65b"
"sha256:fa155e309cc2277d6f9d099aecaf3ce78d86a31f5a62a994debc872e4c34ddf4"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.6.0" "version": "==3.6.1"
}, },
"aiohttp-cors": { "aiohttp-cors": {
"hashes": [ "hashes": [
@ -59,11 +58,11 @@
}, },
"attrs": { "attrs": {
"hashes": [ "hashes": [
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2",
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396"
], ],
"index": "pypi", "index": "pypi",
"version": "==19.1.0" "version": "==19.2.0"
}, },
"black": { "black": {
"editable": true, "editable": true,
@ -128,6 +127,23 @@
], ],
"version": "==4.5.2" "version": "==4.5.2"
}, },
"regex": {
"hashes": [
"sha256:1e9f9bc44ca195baf0040b1938e6801d2f3409661c15fe57f8164c678cfc663f",
"sha256:587b62d48ca359d2d4f02d486f1f0aa9a20fbaf23a9d4198c4bed72ab2f6c849",
"sha256:835ccdcdc612821edf132c20aef3eaaecfb884c9454fdc480d5887562594ac61",
"sha256:93f6c9da57e704e128d90736430c5c59dd733327882b371b0cae8833106c2a21",
"sha256:a46f27d267665016acb3ec8c6046ec5eae8cf80befe85ba47f43c6f5ec636dcd",
"sha256:c5c8999b3a341b21ac2c6ec704cfcccbc50f1fedd61b6a8ee915ca7fd4b0a557",
"sha256:d4d1829cf97632673aa49f378b0a2c3925acd795148c5ace8ef854217abbee89",
"sha256:d96479257e8e4d1d7800adb26bf9c5ca5bab1648a1eddcac84d107b73dc68327",
"sha256:f20f4912daf443220436759858f96fefbfc6c6ba9e67835fd6e4e9b73582791a",
"sha256:f2b37b5b2c2a9d56d9e88efef200ec09c36c7f323f9d58d0b985a90923df386d",
"sha256:fe765b809a1f7ce642c2edeee351e7ebd84391640031ba4b60af8d91a9045890"
],
"index": "pypi",
"version": "==2019.8.19"
},
"toml": { "toml": {
"hashes": [ "hashes": [
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
@ -191,11 +207,11 @@
}, },
"attrs": { "attrs": {
"hashes": [ "hashes": [
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2",
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396"
], ],
"index": "pypi", "index": "pypi",
"version": "==19.1.0" "version": "==19.2.0"
}, },
"babel": { "babel": {
"hashes": [ "hashes": [
@ -234,10 +250,10 @@
}, },
"commonmark": { "commonmark": {
"hashes": [ "hashes": [
"sha256:14c3df31e8c9c463377e287b2a1eefaa6019ab97b22dad36e2f32be59d61d68d", "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60",
"sha256:867fc5db078ede373ab811e16b6789e9d033b15ccd7296f370ca52d1ee792ce0" "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"
], ],
"version": "==0.9.0" "version": "==0.9.1"
}, },
"coverage": { "coverage": {
"hashes": [ "hashes": [
@ -316,12 +332,6 @@
"index": "pypi", "index": "pypi",
"version": "==17.8.0" "version": "==17.8.0"
}, },
"future": {
"hashes": [
"sha256:67045236dcfd6816dc439556d009594abf643e5eb48992e36beac09c2ca659b8"
],
"version": "==0.17.1"
},
"identify": { "identify": {
"hashes": [ "hashes": [
"sha256:4f1fe9a59df4e80fcb0213086fcf502bc1765a01ea4fe8be48da3b65afd2a017", "sha256:4f1fe9a59df4e80fcb0213086fcf502bc1765a01ea4fe8be48da3b65afd2a017",
@ -352,10 +362,10 @@
}, },
"jinja2": { "jinja2": {
"hashes": [ "hashes": [
"sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", "sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f",
"sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b" "sha256:9fe95f19286cfefaa917656583d020be14e7859c6b0252588391e47db34527de"
], ],
"version": "==2.10.1" "version": "==2.10.3"
}, },
"markupsafe": { "markupsafe": {
"hashes": [ "hashes": [
@ -406,27 +416,26 @@
}, },
"mypy": { "mypy": {
"hashes": [ "hashes": [
"sha256:0107bff4f46a289f0e4081d59b77cef1c48ea43da5a0dbf0005d54748b26df2a", "sha256:1d98fd818ad3128a5408148c9e4a5edce6ed6b58cc314283e631dd5d9216527b",
"sha256:07957f5471b3bb768c61f08690c96d8a09be0912185a27a68700f3ede99184e4", "sha256:22ee018e8fc212fe601aba65d3699689dd29a26410ef0d2cc1943de7bec7e3ac",
"sha256:10af62f87b6921eac50271e667cc234162a194e742d8e02fc4ddc121e129a5b0", "sha256:3a24f80776edc706ec8d05329e854d5b9e464cd332e25cde10c8da2da0a0db6c",
"sha256:11fd60d2f69f0cefbe53ce551acf5b1cec1a89e7ce2d47b4e95a84eefb2899ae", "sha256:42a78944e80770f21609f504ca6c8173f7768043205b5ac51c9144e057dcf879",
"sha256:15e43d3b1546813669bd1a6ec7e6a11d2888db938e0607f7b5eef6b976671339", "sha256:4b2b20106973548975f0c0b1112eceb4d77ed0cafe0a231a1318f3b3a22fc795",
"sha256:352c24ba054a89bb9a35dd064ee95ab9b12903b56c72a8d3863d882e2632dc76", "sha256:591a9625b4d285f3ba69f541c84c0ad9e7bffa7794da3fa0585ef13cf95cb021",
"sha256:437020a39417e85e22ea8edcb709612903a9924209e10b3ec6d8c9f05b79f498", "sha256:5b4b70da3d8bae73b908a90bb2c387b977e59d484d22c604a2131f6f4397c1a3",
"sha256:49925f9da7cee47eebf3420d7c0e00ec662ec6abb2780eb0a16260a7ba25f9c4", "sha256:84edda1ffeda0941b2ab38ecf49302326df79947fa33d98cdcfbf8ca9cf0bb23",
"sha256:6724fcd5777aa6cebfa7e644c526888c9d639bd22edd26b2a8038c674a7c34bd", "sha256:b2b83d29babd61b876ae375786960a5374bba0e4aba3c293328ca6ca5dc448dd",
"sha256:7a17613f7ea374ab64f39f03257f22b5755335b73251d0d253687a69029701ba", "sha256:cc4502f84c37223a1a5ab700649b5ab1b5e4d2bf2d426907161f20672a21930b",
"sha256:cdc1151ced496ca1496272da7fc356580e95f2682be1d32377c22ddebdf73c91" "sha256:e29e24dd6e7f39f200a5bb55dcaa645d38a397dd5a6674f6042ef02df5795046"
], ],
"index": "pypi", "index": "pypi",
"version": "==0.720" "version": "==0.730"
}, },
"mypy-extensions": { "mypy-extensions": {
"hashes": [ "hashes": [
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", "sha256:a161e3b917053de87dbe469987e173e49fb454eca10ef28b48b384538cc11458"
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
], ],
"version": "==0.4.1" "version": "==0.4.2"
}, },
"nodeenv": { "nodeenv": {
"hashes": [ "hashes": [
@ -436,10 +445,10 @@
}, },
"packaging": { "packaging": {
"hashes": [ "hashes": [
"sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9", "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47",
"sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe" "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108"
], ],
"version": "==19.1" "version": "==19.2"
}, },
"pkginfo": { "pkginfo": {
"hashes": [ "hashes": [
@ -486,10 +495,10 @@
}, },
"pytz": { "pytz": {
"hashes": [ "hashes": [
"sha256:26c0b32e437e54a18161324a2fca3c4b9846b74a8dccddd843113109e1116b32", "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
"sha256:c894d57500a4cd2d5c71114aaab77dbab5eabd9022308ce5ac9bb93a60a6f0c7" "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
], ],
"version": "==2019.2" "version": "==2019.3"
}, },
"pyyaml": { "pyyaml": {
"hashes": [ "hashes": [
@ -556,9 +565,10 @@
}, },
"snowballstemmer": { "snowballstemmer": {
"hashes": [ "hashes": [
"sha256:713e53b79cbcf97bc5245a06080a33d54a77e7cce2f789c835a143bcdb5c033e" "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0",
"sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52"
], ],
"version": "==1.9.1" "version": "==2.0.0"
}, },
"sphinx": { "sphinx": {
"hashes": [ "hashes": [
@ -620,18 +630,18 @@
}, },
"tqdm": { "tqdm": {
"hashes": [ "hashes": [
"sha256:4c34f077399736e5dbf403183b5f0f1bda46e06433a3f93812386a9d56b28004", "sha256:abc25d0ce2397d070ef07d8c7e706aede7920da163c64997585d42d3537ece3d",
"sha256:74d40d49cab95a93735323e450161f1e580dac42b25bf39770f6e3501d36ebfb" "sha256:dd3fcca8488bb1d416aa7469d2f277902f26260c45aa86b667b074cd44b3b115"
], ],
"version": "==4.36.0" "version": "==4.36.1"
}, },
"twine": { "twine": {
"hashes": [ "hashes": [
"sha256:630fadd6e342e725930be6c696537e3f9ccc54331742b16245dab292a17d0460", "sha256:5319dd3e02ac73fcddcd94f035b9631589ab5d23e1f4699d57365199d85261e1",
"sha256:a3d22aab467b4682a22de4a422632e79d07eebd07ff2a7079effb13f8a693787" "sha256:9fe7091715c7576df166df8ef6654e61bada39571783f2fd415bdcba867c6993"
], ],
"index": "pypi", "index": "pypi",
"version": "==1.15.0" "version": "==2.0.0"
}, },
"typed-ast": { "typed-ast": {
"hashes": [ "hashes": [
@ -664,10 +674,10 @@
}, },
"urllib3": { "urllib3": {
"hashes": [ "hashes": [
"sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
"sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
], ],
"version": "==1.25.3" "version": "==1.25.6"
}, },
"virtualenv": { "virtualenv": {
"hashes": [ "hashes": [

View File

@ -12,7 +12,7 @@
import os import os
from pathlib import Path from pathlib import Path
import pickle import pickle
import re import regex as re
import signal import signal
import sys import sys
import tempfile import tempfile
@ -3810,7 +3810,8 @@ def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
""" """
if "\n" in regex: if "\n" in regex:
regex = "(?x)" + regex regex = "(?x)" + regex
return re.compile(regex) compiled: Pattern[str] = re.compile(regex)
return compiled
def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]: def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:

View File

@ -27,7 +27,7 @@
""" """
# Python imports # Python imports
import re import regex as re
# Local imports # Local imports
from pgen2 import grammar, token from pgen2 import grammar, token

View File

@ -3,7 +3,7 @@
"""Safely evaluate Python string literals without using eval().""" """Safely evaluate Python string literals without using eval()."""
import re import regex as re
simple_escapes = {"a": "\a", simple_escapes = {"a": "\a",
"b": "\b", "b": "\b",

View File

@ -29,7 +29,7 @@
__credits__ = \ __credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import re import regex as re
from codecs import BOM_UTF8, lookup from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import * from blib2to3.pgen2.token import *

View File

@ -13,7 +13,7 @@
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
# #
from pathlib import Path from pathlib import Path
import re import regex as re
import shutil import shutil
import string import string

View File

@ -75,6 +75,7 @@ click = "^6.5"
toml = "^0.9.4" toml = "^0.9.4"
appdirs = "^1.4" appdirs = "^1.4"
aiohttp = { version = "^3.4", optional = true } aiohttp = { version = "^3.4", optional = true }
regex = "^2019.8"
[tool.poetry.extras] [tool.poetry.extras]
d = ["aiohttp"] d = ["aiohttp"]

View File

@ -39,6 +39,7 @@ def get_long_description() -> str:
"appdirs", "appdirs",
"toml>=0.9.4", "toml>=0.9.4",
"typed-ast>=1.3.1", "typed-ast>=1.3.1",
"regex",
], ],
extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]}, extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]},
test_suite="tests.test_black", test_suite="tests.test_black",

View File

@ -0,0 +1,6 @@
ä = 1
µ = 2
= 3
x󠄀 = 4
= 1
Q̇_per_meter = 4

View File

@ -7,7 +7,7 @@
from io import BytesIO, TextIOWrapper from io import BytesIO, TextIOWrapper
import os import os
from pathlib import Path from pathlib import Path
import re import regex as re
import sys import sys
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from typing import Any, BinaryIO, Generator, List, Tuple, Iterator, TypeVar from typing import Any, BinaryIO, Generator, List, Tuple, Iterator, TypeVar
@ -1245,6 +1245,13 @@ def test_read_cache_line_lengths(self) -> None:
two = black.read_cache(short_mode) two = black.read_cache(short_mode)
self.assertNotIn(path, two) self.assertNotIn(path, two)
def test_tricky_unicode_symbols(self) -> None:
source, expected = read_data("tricky_unicode_symbols")
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
def test_single_file_force_pyi(self) -> None: def test_single_file_force_pyi(self) -> None:
reg_mode = black.FileMode() reg_mode = black.FileMode()
pyi_mode = black.FileMode(is_pyi=True) pyi_mode = black.FileMode(is_pyi=True)