Remove numeric underscore normalization (#696)

This commit is contained in:
Zsolt Dollenstein 2019-02-07 03:16:49 +00:00 committed by Jelle Zijlstra
parent 36d3c516d3
commit 250ba7f04b
6 changed files with 26 additions and 89 deletions

View File

@ -87,9 +87,6 @@ Options:
piping source on standard input). piping source on standard input).
-S, --skip-string-normalization -S, --skip-string-normalization
Don't normalize string quotes or prefixes. Don't normalize string quotes or prefixes.
-N, --skip-numeric-underscore-normalization
Don't normalize underscores in numeric
literals.
--check Don't write the files back, just return the --check Don't write the files back, just return the
status. Return code 0 means nothing would status. Return code 0 means nothing would
change. Return code 1 means some files change. Return code 1 means some files
@ -395,14 +392,8 @@ an adoption helper, avoid using this for new projects.
*Black* standardizes most numeric literals to use lowercase letters for the *Black* standardizes most numeric literals to use lowercase letters for the
syntactic parts and uppercase letters for the digits themselves: `0xAB` syntactic parts and uppercase letters for the digits themselves: `0xAB`
instead of `0XAB` and `1e10` instead of `1E10`. Python 2 long literals are instead of `0XAB` and `1e10` instead of `1E10`. Python 2 long literals are
styled as `2L` instead of `2l` to avoid confusion between `l` and `1`. In styled as `2L` instead of `2l` to avoid confusion between `l` and `1`.
Python 3.6+, *Black* adds underscores to long numeric literals to aid
readability: `100000000` becomes `100_000_000`.
For regions where numerals are grouped differently (like [India](https://en.wikipedia.org/wiki/Indian_numbering_system)
and [China](https://en.wikipedia.org/wiki/Chinese_numerals#Whole_numbers)),
the `-N` or `--skip-numeric-underscore-normalization` command line option
makes *Black* preserve underscores in numeric literals.
### Line breaks & binary operators ### Line breaks & binary operators
@ -823,8 +814,6 @@ The headers controlling how code is formatted are:
- `X-Skip-String-Normalization`: corresponds to the `--skip-string-normalization` - `X-Skip-String-Normalization`: corresponds to the `--skip-string-normalization`
command line flag. If present and its value is not the empty string, no string command line flag. If present and its value is not the empty string, no string
normalization will be performed. normalization will be performed.
- `X-Skip-Numeric-Underscore-Normalization`: corresponds to the
`--skip-numeric-underscore-normalization` command line flag.
- `X-Fast-Or-Safe`: if set to `fast`, `blackd` will act as *Black* does when - `X-Fast-Or-Safe`: if set to `fast`, `blackd` will act as *Black* does when
passed the `--fast` command line flag. passed the `--fast` command line flag.
- `X-Python-Variant`: if set to `pyi`, `blackd` will act as *Black* does when - `X-Python-Variant`: if set to `pyi`, `blackd` will act as *Black* does when
@ -950,7 +939,9 @@ More details can be found in [CONTRIBUTING](CONTRIBUTING.md).
## Change Log ## Change Log
### 18.11b0 ### 19.2b0
* *Black* no longer normalizes numeric literals to include `_` separators.
* new option `--target-version` to control which Python versions * new option `--target-version` to control which Python versions
*Black*-formatted code should target *Black*-formatted code should target

View File

@ -168,7 +168,6 @@ class Feature(Enum):
class FileMode: class FileMode:
target_versions: Set[TargetVersion] = Factory(set) target_versions: Set[TargetVersion] = Factory(set)
line_length: int = DEFAULT_LINE_LENGTH line_length: int = DEFAULT_LINE_LENGTH
numeric_underscore_normalization: bool = True
string_normalization: bool = True string_normalization: bool = True
is_pyi: bool = False is_pyi: bool = False
@ -183,7 +182,6 @@ def get_cache_key(self) -> str:
parts = [ parts = [
version_str, version_str,
str(self.line_length), str(self.line_length),
str(int(self.numeric_underscore_normalization)),
str(int(self.string_normalization)), str(int(self.string_normalization)),
str(int(self.is_pyi)), str(int(self.is_pyi)),
] ]
@ -273,12 +271,6 @@ def read_pyproject_toml(
is_flag=True, is_flag=True,
help="Don't normalize string quotes or prefixes.", help="Don't normalize string quotes or prefixes.",
) )
@click.option(
"-N",
"--skip-numeric-underscore-normalization",
is_flag=True,
help="Don't normalize underscores in numeric literals.",
)
@click.option( @click.option(
"--check", "--check",
is_flag=True, is_flag=True,
@ -370,7 +362,6 @@ def main(
pyi: bool, pyi: bool,
py36: bool, py36: bool,
skip_string_normalization: bool, skip_string_normalization: bool,
skip_numeric_underscore_normalization: bool,
quiet: bool, quiet: bool,
verbose: bool, verbose: bool,
include: str, include: str,
@ -396,7 +387,6 @@ def main(
line_length=line_length, line_length=line_length,
is_pyi=pyi, is_pyi=pyi,
string_normalization=not skip_string_normalization, string_normalization=not skip_string_normalization,
numeric_underscore_normalization=not skip_numeric_underscore_normalization,
) )
if config and verbose: if config and verbose:
out(f"Using configuration from {config}.", bold=False, fg="blue") out(f"Using configuration from {config}.", bold=False, fg="blue")
@ -686,8 +676,6 @@ def format_str(src_contents: str, *, mode: FileMode) -> FileContent:
or supports_feature(versions, Feature.UNICODE_LITERALS), or supports_feature(versions, Feature.UNICODE_LITERALS),
is_pyi=mode.is_pyi, is_pyi=mode.is_pyi,
normalize_strings=mode.string_normalization, normalize_strings=mode.string_normalization,
allow_underscores=mode.numeric_underscore_normalization
and supports_feature(versions, Feature.NUMERIC_UNDERSCORES),
) )
elt = EmptyLineTracker(is_pyi=mode.is_pyi) elt = EmptyLineTracker(is_pyi=mode.is_pyi)
empty_line = Line() empty_line = Line()
@ -1492,7 +1480,6 @@ class LineGenerator(Visitor[Line]):
normalize_strings: bool = True normalize_strings: bool = True
current_line: Line = Factory(Line) current_line: Line = Factory(Line)
remove_u_prefix: bool = False remove_u_prefix: bool = False
allow_underscores: bool = False
def line(self, indent: int = 0) -> Iterator[Line]: def line(self, indent: int = 0) -> Iterator[Line]:
"""Generate a line. """Generate a line.
@ -1535,7 +1522,7 @@ def visit_default(self, node: LN) -> Iterator[Line]:
normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix) normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
normalize_string_quotes(node) normalize_string_quotes(node)
if node.type == token.NUMBER: if node.type == token.NUMBER:
normalize_numeric_literal(node, self.allow_underscores) normalize_numeric_literal(node)
if node.type not in WHITESPACE: if node.type not in WHITESPACE:
self.current_line.append(node) self.current_line.append(node)
yield from super().visit_default(node) yield from super().visit_default(node)
@ -2674,11 +2661,11 @@ def normalize_string_quotes(leaf: Leaf) -> None:
leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}" leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None: def normalize_numeric_literal(leaf: Leaf) -> None:
"""Normalizes numeric (float, int, and complex) literals. """Normalizes numeric (float, int, and complex) literals.
All letters used in the representation are normalized to lowercase (except All letters used in the representation are normalized to lowercase (except
in Python 2 long literals), and long number literals are split using underscores. in Python 2 long literals).
""" """
text = leaf.value.lower() text = leaf.value.lower()
if text.startswith(("0o", "0b")): if text.startswith(("0o", "0b")):
@ -2696,8 +2683,7 @@ def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
sign = "-" sign = "-"
elif after.startswith("+"): elif after.startswith("+"):
after = after[1:] after = after[1:]
before = format_float_or_int_string(before, allow_underscores) before = format_float_or_int_string(before)
after = format_int_string(after, allow_underscores)
text = f"{before}e{sign}{after}" text = f"{before}e{sign}{after}"
elif text.endswith(("j", "l")): elif text.endswith(("j", "l")):
number = text[:-1] number = text[:-1]
@ -2705,50 +2691,19 @@ def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
# Capitalize in "2L" because "l" looks too similar to "1". # Capitalize in "2L" because "l" looks too similar to "1".
if suffix == "l": if suffix == "l":
suffix = "L" suffix = "L"
text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}" text = f"{format_float_or_int_string(number)}{suffix}"
else: else:
text = format_float_or_int_string(text, allow_underscores) text = format_float_or_int_string(text)
leaf.value = text leaf.value = text
def format_float_or_int_string(text: str, allow_underscores: bool) -> str: def format_float_or_int_string(text: str) -> str:
"""Formats a float string like "1.0".""" """Formats a float string like "1.0"."""
if "." not in text: if "." not in text:
return format_int_string(text, allow_underscores) return text
before, after = text.split(".") before, after = text.split(".")
before = format_int_string(before, allow_underscores) if before else "0" return f"{before or 0}.{after or 0}"
if after:
after = format_int_string(after, allow_underscores, count_from_end=False)
else:
after = "0"
return f"{before}.{after}"
def format_int_string(
text: str, allow_underscores: bool, count_from_end: bool = True
) -> str:
"""Normalizes underscores in a string to e.g. 1_000_000.
Input must be a string of digits and optional underscores.
If count_from_end is False, we add underscores after groups of three digits
counting from the beginning instead of the end of the strings. This is used
for the fractional part of float literals.
"""
if not allow_underscores:
return text
text = text.replace("_", "")
if len(text) <= 5:
# No underscores for numbers <= 5 digits long.
return text
if count_from_end:
# Avoid removing leading zeros, which are important if we're formatting
# part of a number like "0.001".
return format(int("1" + text), "3_")[1:].lstrip("_")
else:
return "_".join(text[i : i + 3] for i in range(0, len(text), 3))
def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:

View File

@ -17,7 +17,6 @@
LINE_LENGTH_HEADER = "X-Line-Length" LINE_LENGTH_HEADER = "X-Line-Length"
PYTHON_VARIANT_HEADER = "X-Python-Variant" PYTHON_VARIANT_HEADER = "X-Python-Variant"
SKIP_STRING_NORMALIZATION_HEADER = "X-Skip-String-Normalization" SKIP_STRING_NORMALIZATION_HEADER = "X-Skip-String-Normalization"
SKIP_NUMERIC_UNDERSCORE_NORMALIZATION_HEADER = "X-Skip-Numeric-Underscore-Normalization"
FAST_OR_SAFE_HEADER = "X-Fast-Or-Safe" FAST_OR_SAFE_HEADER = "X-Fast-Or-Safe"
BLACK_HEADERS = [ BLACK_HEADERS = [
@ -25,7 +24,6 @@
LINE_LENGTH_HEADER, LINE_LENGTH_HEADER,
PYTHON_VARIANT_HEADER, PYTHON_VARIANT_HEADER,
SKIP_STRING_NORMALIZATION_HEADER, SKIP_STRING_NORMALIZATION_HEADER,
SKIP_NUMERIC_UNDERSCORE_NORMALIZATION_HEADER,
FAST_OR_SAFE_HEADER, FAST_OR_SAFE_HEADER,
] ]
@ -95,9 +93,6 @@ async def handle(request: web.Request, executor: Executor) -> web.Response:
skip_string_normalization = bool( skip_string_normalization = bool(
request.headers.get(SKIP_STRING_NORMALIZATION_HEADER, False) request.headers.get(SKIP_STRING_NORMALIZATION_HEADER, False)
) )
skip_numeric_underscore_normalization = bool(
request.headers.get(SKIP_NUMERIC_UNDERSCORE_NORMALIZATION_HEADER, False)
)
fast = False fast = False
if request.headers.get(FAST_OR_SAFE_HEADER, "safe") == "fast": if request.headers.get(FAST_OR_SAFE_HEADER, "safe") == "fast":
fast = True fast = True
@ -106,7 +101,6 @@ async def handle(request: web.Request, executor: Executor) -> web.Response:
is_pyi=pyi, is_pyi=pyi,
line_length=line_length, line_length=line_length,
string_normalization=not skip_string_normalization, string_normalization=not skip_string_normalization,
numeric_underscore_normalization=not skip_numeric_underscore_normalization,
) )
req_bytes = await request.content.read() req_bytes = await request.content.read()
charset = request.charset if request.charset is not None else "utf8" charset = request.charset if request.charset is not None else "utf8"

View File

@ -144,7 +144,7 @@ def function_signature_stress_test(
def spaces(a=1, b=(), c=[], d={}, e=True, f=-1, g=1 if False else 2, h="", i=r""): def spaces(a=1, b=(), c=[], d={}, e=True, f=-1, g=1 if False else 2, h="", i=r""):
offset = attr.ib(default=attr.Factory(lambda: _r.uniform(10000, 200_000))) offset = attr.ib(default=attr.Factory(lambda: _r.uniform(10000, 200000)))
assert task._cancel_stack[: len(old_stack)] == old_stack assert task._cancel_stack[: len(old_stack)] == old_stack

View File

@ -6,7 +6,7 @@
x = 1. x = 1.
x = 1E+1 x = 1E+1
x = 1E-1 x = 1E-1
x = 1.00000001 x = 1.000_000_01
x = 123456789.123456789 x = 123456789.123456789
x = 123456789.123456789E123456789 x = 123456789.123456789E123456789
x = 123456789E123456789 x = 123456789E123456789
@ -24,21 +24,21 @@
#!/usr/bin/env python3.6 #!/usr/bin/env python3.6
x = 123_456_789 x = 123456789
x = 123_456 x = 123456
x = 0.1 x = 0.1
x = 1.0 x = 1.0
x = 1e1 x = 1e1
x = 1e-1 x = 1e-1
x = 1.000_000_01 x = 1.000_000_01
x = 123_456_789.123_456_789 x = 123456789.123456789
x = 123_456_789.123_456_789e123_456_789 x = 123456789.123456789e123456789
x = 123_456_789e123_456_789 x = 123456789e123456789
x = 123_456_789j x = 123456789j
x = 123_456_789.123_456_789j x = 123456789.123456789j
x = 0xB1ACC x = 0xB1ACC
x = 0b1011 x = 0b1011
x = 0o777 x = 0o777
x = 0.000_000_006 x = 0.000000006
x = 10000 x = 10000
x = 133_333 x = 133333

View File

@ -437,9 +437,7 @@ def test_numeric_literals(self) -> None:
@patch("black.dump_to_file", dump_to_stderr) @patch("black.dump_to_file", dump_to_stderr)
def test_numeric_literals_ignoring_underscores(self) -> None: def test_numeric_literals_ignoring_underscores(self) -> None:
source, expected = read_data("numeric_literals_skip_underscores") source, expected = read_data("numeric_literals_skip_underscores")
mode = black.FileMode( mode = black.FileMode(target_versions=black.PY36_VERSIONS)
numeric_underscore_normalization=False, target_versions=black.PY36_VERSIONS
)
actual = fs(source, mode=mode) actual = fs(source, mode=mode)
self.assertFormatEqual(expected, actual) self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual) black.assert_equivalent(source, actual)
@ -828,8 +826,7 @@ def test_get_features_used(self) -> None:
) )
node = black.lib2to3_parse(expected) node = black.lib2to3_parse(expected)
self.assertEqual( self.assertEqual(
black.get_features_used(node), black.get_features_used(node), {Feature.TRAILING_COMMA, Feature.F_STRINGS}
{Feature.TRAILING_COMMA, Feature.F_STRINGS, Feature.NUMERIC_UNDERSCORES},
) )
source, expected = read_data("expression") source, expected = read_data("expression")
node = black.lib2to3_parse(source) node = black.lib2to3_parse(source)