67 lines
2.3 KiB
Python
67 lines
2.3 KiB
Python
"""Generates a width table for Unicode characters.
|
|
|
|
This script generates a width table for Unicode characters that are not
|
|
narrow (width 1). The table is written to src/black/_width_table.py (note
|
|
that although this file is generated, it is checked into Git) and is used
|
|
by the char_width() function in src/black/strings.py.
|
|
|
|
You should run this script when you upgrade wcwidth, which is expected to
|
|
happen when a new Unicode version is released. The generated table contains
|
|
the version of wcwidth and Unicode that it was generated for.
|
|
|
|
In order to run this script, you need to install the latest version of wcwidth.
|
|
You can do this by running:
|
|
|
|
pip install -U wcwidth
|
|
|
|
"""
|
|
|
|
import sys
|
|
from collections.abc import Iterable
|
|
from os.path import basename, dirname, join
|
|
|
|
import wcwidth # type: ignore[import-not-found]
|
|
|
|
|
|
def make_width_table() -> Iterable[tuple[int, int, int]]:
|
|
start_codepoint = -1
|
|
end_codepoint = -1
|
|
range_width = -2
|
|
for codepoint in range(0, sys.maxunicode + 1):
|
|
width = wcwidth.wcwidth(chr(codepoint))
|
|
if width <= 1:
|
|
# Ignore narrow characters along with zero-width characters so that
|
|
# they are treated as single-width. Note that treating zero-width
|
|
# characters as single-width is consistent with the heuristics built
|
|
# on top of str.isascii() in the str_width() function in strings.py.
|
|
continue
|
|
if start_codepoint < 0:
|
|
start_codepoint = codepoint
|
|
range_width = width
|
|
elif width != range_width or codepoint != end_codepoint + 1:
|
|
yield (start_codepoint, end_codepoint, range_width)
|
|
start_codepoint = codepoint
|
|
range_width = width
|
|
end_codepoint = codepoint
|
|
if start_codepoint >= 0:
|
|
yield (start_codepoint, end_codepoint, range_width)
|
|
|
|
|
|
def main() -> None:
|
|
table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py")
|
|
with open(table_path, "w") as f:
|
|
f.write(f"""# Generated by {basename(__file__)}
|
|
# wcwidth {wcwidth.__version__}
|
|
# Unicode {wcwidth.list_versions()[-1]}
|
|
from typing import Final
|
|
|
|
WIDTH_TABLE: Final[list[tuple[int, int, int]]] = [
|
|
""")
|
|
for triple in make_width_table():
|
|
f.write(f" {triple!r},\n")
|
|
f.write("]\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|