Initial commit

This commit is contained in:
Łukasz Langa 2018-03-14 12:55:32 -07:00
commit e74117f172
48 changed files with 6595 additions and 0 deletions

8
.flake8 Normal file
View File

@ -0,0 +1,8 @@
# This is an example .flake8 config, used when developing *Black* itself.
# Keep in sync with setup.cfg which is used for source packages.
[flake8]
ignore = E266, E501
max-line-length = 80
max-complexity = 12
select = B,C,E,F,W,T4,B9

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.coverage

15
.travis.yml Normal file
View File

@ -0,0 +1,15 @@
sudo: false
language: python
before_script:
- pip install -e .
# test script
script: python setup.py test
notifications:
on_success: change
on_failure: always
matrix:
include:
- python: 3.6
- python: 3.6-dev
- python: 3.7-dev
- python: 3.8-dev

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2018 Łukasz Langa
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include *.rst *.md LICENSE
recursive-include tests *.txt *.py

17
Pipfile Normal file
View File

@ -0,0 +1,17 @@
[[source]]
url = "https://pypi.python.org/simple"
verify_ssl = true
name = "pypi"
[packages]
attrs = "*"
click = "*"
[dev-packages]
coverage = "*"
flake8 = "*"
flake8-bugbear = "*"
flake8-mypy = "*"
mypy = "*"
pypandoc = "*"
twine = "*"

243
Pipfile.lock generated Normal file
View File

@ -0,0 +1,243 @@
{
"_meta": {
"hash": {
"sha256": "e2dc877c2f32df83197fc3dc0f49e0a66d0d099aab106b99d64fdbe5b14cc91b"
},
"host-environment-markers": {
"implementation_name": "cpython",
"implementation_version": "3.6.4",
"os_name": "posix",
"platform_machine": "x86_64",
"platform_python_implementation": "CPython",
"platform_release": "17.4.0",
"platform_system": "Darwin",
"platform_version": "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64",
"python_full_version": "3.6.4",
"python_version": "3.6",
"sys_platform": "darwin"
},
"pipfile-spec": 6,
"requires": {},
"sources": [
{
"name": "pypi",
"url": "https://pypi.python.org/simple",
"verify_ssl": true
}
]
},
"default": {
"attrs": {
"hashes": [
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
"sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
],
"version": "==17.4.0"
},
"click": {
"hashes": [
"sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d",
"sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b"
],
"version": "==6.7"
}
},
"develop": {
"attrs": {
"hashes": [
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
"sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
],
"version": "==17.4.0"
},
"certifi": {
"hashes": [
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
"sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
],
"version": "==2018.1.18"
},
"chardet": {
"hashes": [
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
],
"version": "==3.0.4"
},
"coverage": {
"hashes": [
"sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc",
"sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694",
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80",
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
"sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249",
"sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1",
"sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9",
"sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5",
"sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508",
"sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f",
"sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba",
"sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e",
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
"sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162",
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
"sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558",
"sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c",
"sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062",
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
"sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99",
"sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287",
"sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000",
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
"sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc",
"sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653",
"sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a",
"sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1",
"sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91",
"sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2",
"sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d",
"sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a",
"sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4",
"sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd",
"sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77",
"sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e"
],
"version": "==4.5.1"
},
"flake8": {
"hashes": [
"sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37",
"sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0"
],
"version": "==3.5.0"
},
"flake8-bugbear": {
"hashes": [
"sha256:541746f0f3b2f1a8d7278e1d2d218df298996b60b02677708560db7c7e620e3b",
"sha256:5f14a99d458e29cb92be9079c970030e0dd398b2decb179d76d39a5266ea1578"
],
"version": "==18.2.0"
},
"flake8-mypy": {
"hashes": [
"sha256:cff009f4250e8391bf48990093cff85802778c345c8449d6498b62efefeebcbc",
"sha256:47120db63aff631ee1f84bac6fe8e64731dc66da3efc1c51f85e15ade4a3ba18"
],
"version": "==17.8.0"
},
"idna": {
"hashes": [
"sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
"sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f"
],
"version": "==2.6"
},
"mccabe": {
"hashes": [
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
],
"version": "==0.6.1"
},
"mypy": {
"hashes": [
"sha256:884f18f3a40cfcf24cdd5860b84958cfb35e6563e439c5adc1503878df221dc3",
"sha256:83d798f66323f2de6191d66d9ae5ab234e4ee5b400010e19c58d75d308049f25"
],
"version": "==0.570"
},
"pkginfo": {
"hashes": [
"sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024",
"sha256:bb1a6aeabfc898f5df124e7e00303a5b3ec9a489535f346bfbddb081af93f89e"
],
"version": "==1.4.1"
},
"pycodestyle": {
"hashes": [
"sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9",
"sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766"
],
"version": "==2.3.1"
},
"pyflakes": {
"hashes": [
"sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f",
"sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805"
],
"version": "==1.6.0"
},
"pypandoc": {
"hashes": [
"sha256:e914e6d5f84a76764887e4d909b09d63308725f0cbb5293872c2c92f07c11a5b"
],
"version": "==1.4"
},
"requests": {
"hashes": [
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
"sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
],
"version": "==2.18.4"
},
"requests-toolbelt": {
"hashes": [
"sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237",
"sha256:f6a531936c6fa4c6cfce1b9c10d5c4f498d16528d2a54a22ca00011205a187b5"
],
"version": "==0.8.0"
},
"tqdm": {
"hashes": [
"sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9",
"sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df"
],
"version": "==4.19.6"
},
"twine": {
"hashes": [
"sha256:d3ce5c480c22ccfb761cd358526e862b32546d2fe4bc93d46b5cf04ea3cc46ca",
"sha256:caa45b7987fc96321258cd7668e3be2ff34064f5c66d2d975b641adca659c1ab"
],
"version": "==1.9.1"
},
"typed-ast": {
"hashes": [
"sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
"sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
"sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
"sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded",
"sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
"sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
"sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
"sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6",
"sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559",
"sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
"sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
"sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87",
"sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe",
"sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
"sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
"sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9",
"sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd",
"sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa"
],
"version": "==1.1.0"
},
"urllib3": {
"hashes": [
"sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
"sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
],
"version": "==1.22"
},
"wheel": {
"hashes": [
"sha256:e721e53864f084f956f40f96124a74da0631ac13fbbd1ba99e8e2b5e9cafdf64",
"sha256:9515fe0a94e823fd90b08d22de45d7bde57c90edce705b22f5e1ecf7e1b653c8"
],
"version": "==0.30.0"
}
}
}

260
README.md Normal file
View File

@ -0,0 +1,260 @@
# black
[![Build Status](https://travis-ci.org/ambv/black.svg?branch=master)](https://travis-ci.org/ambv/black)
> Any color you like.
*Black* is the uncompromising Python code formatter. By using it, you
agree to cease control over minutiae of hand-formatting. In return,
*Black* gives you speed, determinism, and freedom from `pycodestyle`
nagging about formatting. You will save time and mental energy for
more important matters.
Blackened code looks the same regardless of the project you're reading.
Formatting becomes transparent after a while and you can focus on the
content instead.
*Black* makes code review faster by producing the smallest diffs
possible.
## NOTE: This is an early pre-release
*Black* can already successfully format itself and the standard library.
It also sports a decent test suite. However, it is still very new.
Things will probably be wonky for a while. This is made explicit by the
"Alpha" trove classifier, as well as by the "a" in the version number.
What this means for you is that **until the formatter becomes stable,
you should expect some formatting to change in the future**.
Also, as a temporary safety measure, *Black* will check that the
reformatted code still produces a valid AST that is equivalent to the
original. This slows it down. If you're feeling confident, use
``--fast``.
## Usage
*Black* can be installed by running `pip install black`.
```
black [OPTIONS] [SRC]...
Options:
-l, --line-length INTEGER Where to wrap around. [default: 88]
--fast / --safe If --fast given, skip temporary sanity checks.
[default: --safe]
--version Show the version and exit.
--help Show this message and exit.
```
## The philosophy behind *Black*
*Black* reformats entire files in place. It is not configurable. It
doesn't take previous formatting into account. It doesn't reformat
blocks that start with `# fmt: off` and end with `# fmt: on`. It also
recognizes [YAPF](https://github.com/google/yapf)'s block comments to
the same effect, as a courtesy for straddling code.
### How *Black* formats files
*Black* ignores previous formatting and applies uniform horizontal
and vertical whitespace to your code. The rules for horizontal
whitespace are pretty obvious and can be summarized as: do whatever
makes `pycodestyle` happy.
As for vertical whitespace, *Black* tries to render one full expression
or simple statement per line. If this fits the allotted line length,
great.
```!py3
# in:
l = [1,
2,
3,
]
# out:
l = [1, 2, 3]
```
If not, *Black* will look at the contents of the first outer matching
brackets and put that in a separate indented line.
```!py3
# in:
l = [[n for n in list_bosses()], [n for n in list_employees()]]
# out:
l = [
[n for n in list_bosses()], [n for n in list_employees()]
]
```
If that still doesn't fit the bill, it will decompose the internal
expression further using the same rule, indenting matching brackets
every time. If the contents of the matching brackets pair are
comma-separated (like an argument list, or a dict literal, and so on)
then *Black* will first try to keep them on the same line with the
matching brackets. If that doesn't work, it will put all of them in
separate lines.
```!py3
# in:
def very_important_function(template: str, *variables, *, file: os.PathLike, debug: bool = False):
"""Applies `variables` to the `template` and writes to `file`."""
with open(file, 'w') as f:
...
# out:
def very_important_function(
template: str,
*variables,
*,
file: os.PathLike,
debug: bool = False,
):
"""Applies `variables` to the `template` and writes to `file`."""
with open(file, 'w') as f:
...
```
You might have noticed that closing brackets are always dedented and
that a trailing comma is always added. Such formatting produces smaller
diffs; when you add or remove an element, it's always just one line.
Also, having the closing bracket dedented provides a clear delimiter
between two distinct sections of the code that otherwise share the same
indentation level (like the arguments list and the docstring in the
example above).
Unnecessary trailing commas are removed if an expression fits in one
line. This makes it 1% more likely that your line won't exceed the
allotted line length limit.
*Black* avoids spurious vertical whitespace. This is in the spirit of
PEP 8 which says that in-function vertical whitespace should only be
used sparingly. One exception is control flow statements: *Black* will
always emit an extra empty line after ``return``, ``raise``, ``break``,
``continue``, and ``yield``. This is to make changes in control flow
more prominent to readers of your code.
That's it. The rest of the whitespace formatting rules follow PEP 8 and
are designed to keep `pycodestyle` quiet.
### Line length
You probably noticed the peculiar default line length. *Black* defaults
to 88 characters per line, which happens to be 10% over 80. This number
was found to produce significantly shorter files than sticking with 80
(the most popular), or even 79 (used by the standard library). In
general, [90-ish seems like the wise choice](https://youtu.be/wf-BqAjZb8M?t=260).
If you're paid by the line of code you write, you can pass
`--line-length` with a lower number. *Black* will try to respect that.
However, sometimes it won't be able to without breaking other rules. In
those rare cases, auto-formatted code will exceed your allotted limit.
You can also increase it, but remember that people with sight disabilities
find it harder to work with line lengths exceeding 100 characters.
It also adversely affects side-by-side diff review on typical screen
resolutions. Long lines also make it harder to present code neatly
in documentation or talk slides.
If you're using Flake8, you can bump `max-line-length` to 88 and forget
about it. Alternatively, use [Bugbear](https://github.com/PyCQA/flake8-bugbear)'s
B950 warning instead of E501 and keep the max line length at 80 which
you are probably already using. You'd do it like this:
```!ini
[flake8]
max-line-length = 80
...
select = C,E,F,W,B,B950
ignore = E501
```
You'll find *Black*'s own .flake8 config file is configured like this.
If you're curious about the reasoning behind B950, Bugbear's documentation
explains it. The tl;dr is "it's like highway speed limits, we won't
bother you if you overdo it by a few km/h".
### Editor integration
There is currently no integration with any text editors. Vim and
Atom/Nuclide integration is planned by the author, others will require
external contributions.
Patches welcome! ✨ 🍰 ✨
## Testimonials
**Dusty Phillips**, [writer](https://smile.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=dusty+phillips):
> Black is opinionated so you don't have to be.
**Hynek Schlawack**, [creator of `attrs`](http://www.attrs.org/), core
developer of Twisted and CPython:
> An auto-formatter that doesn't suck is all I want for Xmas!
**Carl Meyer**, [Django](https://www.djangoproject.com/) core developer:
> At least the name is good.
## Tests
Just run:
```
python setup.py test
```
## This tool requires Python 3.6.0+ to run
But you can reformat Python 2 code with it, too. *Black* is able to parse
all of the new syntax supported on Python 3.6 but also *effectively all*
the Python 2 syntax at the same time, as long as you're not using print
statements.
By making the code exclusively Python 3.6+, I'm able to focus on the
quality of the formatting and re-use all the nice features of the new
releases (check out [pathlib](docs.python.org/3/library/pathlib.html) or
f-strings) instead of wasting cycles on Unicode compatibility, and so on.
## License
MIT
## Contributing
In terms of inspiration, *Black* is about as configurable as *gofmt* and
*rustfmt* are. This is deliberate.
Bug reports and fixes are always welcome! However, before you suggest a
new feature or configuration knob, ask yourself why you want it. If it
enables better integration with some workflow, fixes an inconsistency,
speeds things up, and so on - go for it! On the other hand, if your
answer is "because I don't like a particular formatting" then you're not
ready to embrace *Black* yet. Such changes are unlikely to get accepted.
You can still try but prepare to be disappointed.
## Change Log
### 18.3a0
* first published version, Happy 🍰 Day 2018!
* alpha quality
* date-versioned (see: http://calver.org/)
## Authors
Glued together by [Łukasz Langa](mailto:lukasz@langa.pl).

1478
black.py Normal file

File diff suppressed because it is too large Load Diff

173
blib2to3/Grammar.txt Normal file
View File

@ -0,0 +1,173 @@
# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# file_input is a module or sequence of commands read from an input file;
# single_input is a single interactive statement;
# eval_input is the input for the eval() and input() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
file_input: (NEWLINE | stmt)* ENDMARKER
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: ((tfpdef ['=' test] ',')*
('*' [tname] (',' tname ['=' test])* [',' ['**' tname [',']]] | '**' tname [','])
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
tname: NAME [':' test]
tfpdef: tname | '(' tfplist ')'
tfplist: tfpdef (',' tfpdef)* [',']
varargslist: ((vfpdef ['=' test] ',')*
('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]] | '**' vname [','])
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
vname: NAME
vfpdef: vname | '(' vfplist ')'
vfplist: vfpdef (',' vfpdef)* [',']
stmt: simple_stmt | compound_stmt
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | exec_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' test]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
print_stmt: 'print' ( [ test (',' test)* [','] ] |
'>>' test [ (',' test)+ [','] ] )
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
import_from: ('from' ('.'* dotted_name | '.'+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
exec_stmt: 'exec' expr ['in' test [',' test]]
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
with_var: 'as' expr
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test [(',' | 'as') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
# Backward compatibility cruft to support:
# [ x for x in lambda: True, lambda: False if x() ]
# even while also allowing:
# lambda x: 5 if x else 2
# (But not a mix of the two)
testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test
test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: ['await'] atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+ | '.' '.' '.')
listmaker: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] )
testlist_gexp: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test '=' test |
'**' expr |
star_expr )
comp_iter: comp_for | comp_if
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' old_test [comp_iter]
# As noted above, testlist_safe extends the syntax allowed in list
# comprehensions and generators. We can't use it indiscriminately in all
# derivations using a comp_for-like pattern because the testlist_safe derivation
# contains comma which clashes with trailing comma in arglist.
#
# This was an issue because the parser would not follow the correct derivation
# when parsing syntactically valid Python code. Since testlist_safe was created
# specifically to handle list comprehensions and generator expressions enclosed
# with parentheses, it's safe to only use it in those. That avoids the issue; we
# can parse code like set(x for x in [],).
#
# The syntax supported by this set of rules is not a valid Python 3 syntax,
# hence the prefix "old".
#
# See https://bugs.python.org/issue27494
old_comp_iter: old_comp_for | old_comp_if
old_comp_for: ['async'] 'for' exprlist 'in' testlist_safe [old_comp_iter]
old_comp_if: 'if' old_test [old_comp_iter]
testlist1: test (',' test)*
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist

Binary file not shown.

View File

@ -0,0 +1,28 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# A grammar to describe tree matching patterns.
# Not shown here:
# - 'TOKEN' stands for any token (leaf node)
# - 'any' stands for any node (leaf or interior)
# With 'any' we can still specify the sub-structure.
# The start symbol is 'Matcher'.
Matcher: Alternatives ENDMARKER
Alternatives: Alternative ('|' Alternative)*
Alternative: (Unit | NegatedUnit)+
Unit: [NAME '='] ( STRING [Repeater]
| NAME [Details] [Repeater]
| '(' Alternatives ')' [Repeater]
| '[' Alternatives ']'
)
NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')')
Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}'
Details: '<' Alternatives '>'

Binary file not shown.

7
blib2to3/README Normal file
View File

@ -0,0 +1,7 @@
A subset of lib2to3 taken from Python 3.7.0b2.
Commit hash: 9c17e3a1987004b8bcfbe423953aad84493a7984
Reasons for forking:
- consistent handling of f-strings for users of Python < 3.6.2
- better ability to debug
- ability to Cythonize

1
blib2to3/__init__.py Normal file
View File

@ -0,0 +1 @@
#empty

1
blib2to3/__init__.pyi Normal file
View File

@ -0,0 +1 @@
# Stubs for lib2to3 (Python 3.6)

View File

@ -0,0 +1,4 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""The pgen2 package."""

View File

@ -0,0 +1,10 @@
# Stubs for lib2to3.pgen2 (Python 3.6)
import os
import sys
from typing import Text, Union
if sys.version_info >= (3, 6):
_Path = Union[Text, os.PathLike]
else:
_Path = Text

257
blib2to3/pgen2/conv.py Normal file
View File

@ -0,0 +1,257 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Convert graminit.[ch] spit out by pgen to Python code.
Pgen is the Python parser generator. It is useful to quickly create a
parser from a grammar file in Python's grammar notation. But I don't
want my parsers to be written in C (yet), so I'm translating the
parsing tables to Python data structures and writing a Python parse
engine.
Note that the token numbers are constants determined by the standard
Python tokenizer. The standard token module defines these numbers and
their names (the names are not used much). The token numbers are
hardcoded into the Python tokenizer and into pgen. A Python
implementation of the Python tokenizer is also available, in the
standard tokenize module.
On the other hand, symbol numbers (representing the grammar's
non-terminals) are assigned by pgen based on the actual grammar
input.
Note: this module is pretty much obsolete; the pgen module generates
equivalent grammar tables directly from the Grammar.txt input file
without having to invoke the Python pgen C program.
"""
# Python imports
import re
# Local imports
from pgen2 import grammar, token
class Converter(grammar.Grammar):
"""Grammar subclass that reads classic pgen output files.
The run() method reads the tables as produced by the pgen parser
generator, typically contained in two C files, graminit.h and
graminit.c. The other methods are for internal use only.
See the base class for more documentation.
"""
def run(self, graminit_h, graminit_c):
"""Load the grammar tables from the text files written by pgen."""
self.parse_graminit_h(graminit_h)
self.parse_graminit_c(graminit_c)
self.finish_off()
def parse_graminit_h(self, filename):
"""Parse the .h file written by pgen. (Internal)
This file is a sequence of #define statements defining the
nonterminals of the grammar as numbers. We build two tables
mapping the numbers to names and back.
"""
try:
f = open(filename)
except OSError as err:
print("Can't open %s: %s" % (filename, err))
return False
self.symbol2number = {}
self.number2symbol = {}
lineno = 0
for line in f:
lineno += 1
mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
if not mo and line.strip():
print("%s(%s): can't parse %s" % (filename, lineno,
line.strip()))
else:
symbol, number = mo.groups()
number = int(number)
assert symbol not in self.symbol2number
assert number not in self.number2symbol
self.symbol2number[symbol] = number
self.number2symbol[number] = symbol
return True
def parse_graminit_c(self, filename):
"""Parse the .c file written by pgen. (Internal)
The file looks as follows. The first two lines are always this:
#include "pgenheaders.h"
#include "grammar.h"
After that come four blocks:
1) one or more state definitions
2) a table defining dfas
3) a table defining labels
4) a struct defining the grammar
A state definition has the following form:
- one or more arc arrays, each of the form:
static arc arcs_<n>_<m>[<k>] = {
{<i>, <j>},
...
};
- followed by a state array, of the form:
static state states_<s>[<t>] = {
{<k>, arcs_<n>_<m>},
...
};
"""
try:
f = open(filename)
except OSError as err:
print("Can't open %s: %s" % (filename, err))
return False
# The code below essentially uses f's iterator-ness!
lineno = 0
# Expect the two #include lines
lineno, line = lineno+1, next(f)
assert line == '#include "pgenheaders.h"\n', (lineno, line)
lineno, line = lineno+1, next(f)
assert line == '#include "grammar.h"\n', (lineno, line)
# Parse the state definitions
lineno, line = lineno+1, next(f)
allarcs = {}
states = []
while line.startswith("static arc "):
while line.startswith("static arc "):
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
line)
assert mo, (lineno, line)
n, m, k = list(map(int, mo.groups()))
arcs = []
for _ in range(k):
lineno, line = lineno+1, next(f)
mo = re.match(r"\s+{(\d+), (\d+)},$", line)
assert mo, (lineno, line)
i, j = list(map(int, mo.groups()))
arcs.append((i, j))
lineno, line = lineno+1, next(f)
assert line == "};\n", (lineno, line)
allarcs[(n, m)] = arcs
lineno, line = lineno+1, next(f)
mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
s, t = list(map(int, mo.groups()))
assert s == len(states), (lineno, line)
state = []
for _ in range(t):
lineno, line = lineno+1, next(f)
mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
assert mo, (lineno, line)
k, n, m = list(map(int, mo.groups()))
arcs = allarcs[n, m]
assert k == len(arcs), (lineno, line)
state.append(arcs)
states.append(state)
lineno, line = lineno+1, next(f)
assert line == "};\n", (lineno, line)
lineno, line = lineno+1, next(f)
self.states = states
# Parse the dfas
dfas = {}
mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
assert mo, (lineno, line)
ndfas = int(mo.group(1))
for i in range(ndfas):
lineno, line = lineno+1, next(f)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
line)
assert mo, (lineno, line)
symbol = mo.group(2)
number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
assert self.symbol2number[symbol] == number, (lineno, line)
assert self.number2symbol[number] == symbol, (lineno, line)
assert x == 0, (lineno, line)
state = states[z]
assert y == len(state), (lineno, line)
lineno, line = lineno+1, next(f)
mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
assert mo, (lineno, line)
first = {}
rawbitset = eval(mo.group(1))
for i, c in enumerate(rawbitset):
byte = ord(c)
for j in range(8):
if byte & (1<<j):
first[i*8 + j] = 1
dfas[number] = (state, first)
lineno, line = lineno+1, next(f)
assert line == "};\n", (lineno, line)
self.dfas = dfas
# Parse the labels
labels = []
lineno, line = lineno+1, next(f)
mo = re.match(r"static label labels\[(\d+)\] = {$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
for i in range(nlabels):
lineno, line = lineno+1, next(f)
mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
assert mo, (lineno, line)
x, y = mo.groups()
x = int(x)
if y == "0":
y = None
else:
y = eval(y)
labels.append((x, y))
lineno, line = lineno+1, next(f)
assert line == "};\n", (lineno, line)
self.labels = labels
# Parse the grammar struct
lineno, line = lineno+1, next(f)
assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
lineno, line = lineno+1, next(f)
mo = re.match(r"\s+(\d+),$", line)
assert mo, (lineno, line)
ndfas = int(mo.group(1))
assert ndfas == len(self.dfas)
lineno, line = lineno+1, next(f)
assert line == "\tdfas,\n", (lineno, line)
lineno, line = lineno+1, next(f)
mo = re.match(r"\s+{(\d+), labels},$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
assert nlabels == len(self.labels), (lineno, line)
lineno, line = lineno+1, next(f)
mo = re.match(r"\s+(\d+)$", line)
assert mo, (lineno, line)
start = int(mo.group(1))
assert start in self.number2symbol, (lineno, line)
self.start = start
lineno, line = lineno+1, next(f)
assert line == "};\n", (lineno, line)
try:
lineno, line = lineno+1, next(f)
except StopIteration:
pass
else:
assert 0, (lineno, line)
def finish_off(self):
"""Create additional useful structures. (Internal)."""
self.keywords = {} # map from keyword strings to arc labels
self.tokens = {} # map from numeric token values to arc labels
for ilabel, (type, value) in enumerate(self.labels):
if type == token.NAME and value is not None:
self.keywords[value] = ilabel
elif value is None:
self.tokens[type] = ilabel

178
blib2to3/pgen2/driver.py Normal file
View File

@ -0,0 +1,178 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser driver.
This provides a high-level interface to parse a file into a syntax tree.
"""
__author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import codecs
import io
import os
import logging
import pkgutil
import sys
# Pgen imports
from . import grammar, parse, token, tokenize, pgen
class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger()
self.logger = logger
self.convert = convert
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
p.setup()
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
for quintuple in tokens:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
if lineno < s_lineno:
prefix += "\n" * (s_lineno - lineno)
lineno = s_lineno
column = 0
if column < s_column:
prefix += line_text[column:s_column]
column = s_column
if type in (tokenize.COMMENT, tokenize.NL):
prefix += value
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
continue
if type == token.OP:
type = grammar.opmap[value]
if debug:
self.logger.debug("%s %r (prefix=%r)",
token.tok_name[type], value, prefix)
if p.addtoken(type, value, (prefix, start)):
if debug:
self.logger.debug("Stop.")
break
prefix = ""
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input",
type, value, (prefix, start))
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, encoding=None, debug=False):
"""Parse a file and return the syntax tree."""
with io.open(filename, "r", encoding=encoding) as stream:
return self.parse_stream(stream, debug)
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(io.StringIO(text).readline)
return self.parse_tokens(tokens, debug)
def _generate_pickle_name(gt):
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
if save:
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
except OSError as e:
logger.info("Writing failed: %s", e)
else:
g = grammar.Grammar()
g.load(gp)
return g
def _newer(a, b):
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
if not os.path.exists(b):
return True
return os.path.getmtime(a) >= os.path.getmtime(b)
def load_packaged_grammar(package, grammar_source):
"""Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the
Python version and using a ``.pickle`` extension.
However, if *grammar_source* is an extant file, load_grammar(grammar_source)
is called instead. This facilitates using a packaged grammar file when needed
but preserves load_grammar's automatic regeneration behavior when possible.
"""
if os.path.isfile(grammar_source):
return load_grammar(grammar_source)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
data = pkgutil.get_data(package, pickled_name)
g = grammar.Grammar()
g.loads(data)
return g
def main(*args):
"""Main program, when run as a script: produce grammar pickle files.
Calls load_grammar for each argument, a path to a grammar text file.
"""
if not args:
args = sys.argv[1:]
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
format='%(message)s')
for gt in args:
load_grammar(gt, save=True, force=True)
return True
if __name__ == "__main__":
sys.exit(int(not main()))

24
blib2to3/pgen2/driver.pyi Normal file
View File

@ -0,0 +1,24 @@
# Stubs for lib2to3.pgen2.driver (Python 3.6)
import os
import sys
from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union
from logging import Logger
from blib2to3.pytree import _Convert, _NL
from blib2to3.pgen2 import _Path
from blib2to3.pgen2.grammar import Grammar
class Driver:
grammar: Grammar
logger: Logger
convert: _Convert
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ...
def parse_string(self, text: Text, debug: bool = ...) -> _NL: ...
def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ...

211
blib2to3/pgen2/grammar.py Normal file
View File

@ -0,0 +1,211 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""This module defines the data structures used to represent a grammar.
These are a bit arcane because they are derived from the data
structures used by Python's 'pgen' parser generator.
There's also a table here mapping operators to their names in the
token module; the Python tokenize module reports all operators as the
fallback token code OP, but the parser needs the actual token code.
"""
# Python imports
import collections
import pickle
# Local imports
from . import token
class Grammar(object):
"""Pgen parsing tables conversion class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule (represented by a dict
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self):
self.symbol2number = {}
self.number2symbol = {}
self.states = []
self.dfas = {}
self.labels = [(0, "EMPTY")]
self.keywords = {}
self.tokens = {}
self.symbol2label = {}
self.start = 256
def dump(self, filename):
"""Dump the grammar tables to a pickle file.
dump() recursively changes all dict to OrderedDict, so the pickled file
is not exactly the same as what was passed in to dump(). load() uses the
pickled file to create the tables, but only changes OrderedDict to dict
at the top level; it does not recursively change OrderedDict to dict.
So, the loaded tables are different from the original tables that were
passed to load() in that some of the OrderedDict (from the pickled file)
are not changed back to dict. For parsing, this has no effect on
performance because OrderedDict uses dict's __getitem__ with nothing in
between.
"""
with open(filename, "wb") as f:
d = _make_deterministic(self.__dict__)
pickle.dump(d, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
with open(filename, "rb") as f:
d = pickle.load(f)
self.__dict__.update(d)
def loads(self, pkl):
"""Load the grammar tables from a pickle bytes object."""
self.__dict__.update(pickle.loads(pkl))
def copy(self):
"""
Copy the grammar.
"""
new = self.__class__()
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
"tokens", "symbol2label"):
setattr(new, dict_attr, getattr(self, dict_attr).copy())
new.labels = self.labels[:]
new.states = self.states[:]
new.start = self.start
return new
def report(self):
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print("s2n")
pprint(self.symbol2number)
print("n2s")
pprint(self.number2symbol)
print("states")
pprint(self.states)
print("dfas")
pprint(self.dfas)
print("labels")
pprint(self.labels)
print("start", self.start)
def _make_deterministic(top):
if isinstance(top, dict):
return collections.OrderedDict(
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
if isinstance(top, list):
return [_make_deterministic(e) for e in top]
if isinstance(top, tuple):
return tuple(_make_deterministic(e) for e in top)
return top
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
@= ATEQUAL
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
"""
opmap = {}
for line in opmap_raw.splitlines():
if line:
op, name = line.split()
opmap[op] = getattr(token, name)

View File

@ -0,0 +1,29 @@
# Stubs for lib2to3.pgen2.grammar (Python 3.6)
from blib2to3.pgen2 import _Path
from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar
_P = TypeVar('_P')
_Label = Tuple[int, Optional[Text]]
_DFA = List[List[Tuple[int, int]]]
_DFAS = Tuple[_DFA, Dict[int, int]]
class Grammar:
symbol2number: Dict[Text, int]
number2symbol: Dict[int, Text]
states: List[_DFA]
dfas: Dict[int, _DFAS]
labels: List[_Label]
keywords: Dict[Text, int]
tokens: Dict[int, int]
symbol2label: Dict[Text, int]
start: int
def __init__(self) -> None: ...
def dump(self, filename: _Path) -> None: ...
def load(self, filename: _Path) -> None: ...
def copy(self: _P) -> _P: ...
def report(self) -> None: ...
opmap_raw: Text
opmap: Dict[Text, Text]

View File

@ -0,0 +1,60 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Safely evaluate Python string literals without using eval()."""
import re
simple_escapes = {"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\"}
def escape(m):
all, tail = m.group(0, 1)
assert all.startswith("\\")
esc = simple_escapes.get(tail)
if esc is not None:
return esc
if tail.startswith("x"):
hexes = tail[1:]
if len(hexes) < 2:
raise ValueError("invalid hex string escape ('\\%s')" % tail)
try:
i = int(hexes, 16)
except ValueError:
raise ValueError("invalid hex string escape ('\\%s')" % tail) from None
else:
try:
i = int(tail, 8)
except ValueError:
raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
return chr(i)
def evalString(s):
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
if s[:3] == q*3:
q = q*3
assert s.endswith(q), repr(s[-len(q):])
assert len(s) >= 2*len(q)
s = s[len(q):-len(q)]
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
def test():
for i in range(256):
c = chr(i)
s = repr(c)
e = evalString(s)
if e != c:
print(i, c, s, e)
if __name__ == "__main__":
test()

View File

@ -0,0 +1,9 @@
# Stubs for lib2to3.pgen2.literals (Python 3.6)
from typing import Dict, Match, Text
simple_escapes: Dict[Text, Text]
def escape(m: Match) -> Text: ...
def evalString(s: Text) -> Text: ...
def test() -> None: ...

201
blib2to3/pgen2/parse.py Normal file
View File

@ -0,0 +1,201 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
# Local imports
from . import token
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
self.msg = msg
self.type = type
self.value = value
self.context = context
class Parser(object):
"""Parser engine.
The proper usage sequence is:
p = Parser(grammar, [converter]) # create instance
p.setup([start]) # prepare for parsing
<for each input token>:
if p.addtoken(...): # parse a token; may raise ParseError
break
root = p.rootnode # root of abstract syntax tree
A Parser instance may be reused by calling setup() repeatedly.
A Parser instance contains state pertaining to the current token
sequence, and should not be used concurrently by different threads
to parse separate token sequences.
See driver.py for how to get input tokens by tokenizing a file or
string.
Parsing is complete when addtoken() returns True; the root of the
abstract syntax tree can then be retrieved from the rootnode
instance variable. When a syntax error occurs, addtoken() raises
the ParseError exception. There is no error recovery; the parser
cannot be used after a syntax error was reported (but it can be
reinitialized by calling setup()).
"""
def __init__(self, grammar, convert=None):
"""Constructor.
The grammar argument is a grammar.Grammar instance; see the
grammar module for more information.
The parser is not ready yet for parsing; you must call the
setup() method to get it started.
The optional convert argument is a function mapping concrete
syntax tree nodes to abstract syntax tree nodes. If not
given, no conversion is done and the syntax tree produced is
the concrete syntax tree. If given, it must be a function of
two arguments, the first being the grammar (a grammar.Grammar
instance), and the second being the concrete syntax tree node
to be converted. The syntax tree is converted from the bottom
up.
A concrete syntax tree node is a (type, value, context, nodes)
tuple, where type is the node type (a token or symbol number),
value is None for symbols and a string for tokens, context is
None or an opaque value used for error reporting (typically a
(lineno, offset) pair), and nodes is a list of children for
symbols, and None for tokens.
An abstract syntax tree node may be anything; this is entirely
up to the converter function.
"""
self.grammar = grammar
self.convert = convert or (lambda grammar, node: node)
def setup(self, start=None):
"""Prepare for parsing.
This *must* be called before starting to parse.
The optional argument is an alternative start symbol; it
defaults to the grammar's start symbol.
You can use a Parser instance to parse any number of programs;
each time you call setup() the parser is reset to an initial
state determined by the (implicit or explicit) start symbol.
"""
if start is None:
start = self.grammar.start
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None.
newnode = (start, None, None, [])
stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack = [stackentry]
self.rootnode = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
def addtoken(self, type, value, context):
"""Add a token; return True iff this is the end of the program."""
# Map from token to label
ilabel = self.classify(type, value, context)
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t, v = self.grammar.labels[i]
if ilabel == i:
# Look it up in the list of labels
assert t < 256
# Shift a token; we're done with it
self.shift(type, value, newstate, context)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
self.pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
return False
elif t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = self.grammar.dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
self.push(t, self.grammar.dfas[t], newstate, context)
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)
def classify(self, type, value, context):
"""Turn a token into a label. (Internal)"""
if type == token.NAME:
# Keep a listing of all used names
self.used_names.add(value)
# Check for reserved words
ilabel = self.grammar.keywords.get(value)
if ilabel is not None:
return ilabel
ilabel = self.grammar.tokens.get(type)
if ilabel is None:
raise ParseError("bad token", type, value, context)
return ilabel
def shift(self, type, value, newstate, context):
"""Shift a token. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, value, context, None)
newnode = self.convert(self.grammar, newnode)
if newnode is not None:
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
def push(self, type, newdfa, newstate, context):
"""Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
def pop(self):
"""Pop a nonterminal. (Internal)"""
popdfa, popstate, popnode = self.stack.pop()
newnode = self.convert(self.grammar, popnode)
if newnode is not None:
if self.stack:
dfa, state, node = self.stack[-1]
node[-1].append(newnode)
else:
self.rootnode = newnode
self.rootnode.used_names = self.used_names

29
blib2to3/pgen2/parse.pyi Normal file
View File

@ -0,0 +1,29 @@
# Stubs for lib2to3.pgen2.parse (Python 3.6)
from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple
from blib2to3.pgen2.grammar import Grammar, _DFAS
from blib2to3.pytree import _NL, _Convert, _RawNode
_Context = Sequence[Any]
class ParseError(Exception):
msg: Text
type: int
value: Optional[Text]
context: _Context
def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ...
class Parser:
grammar: Grammar
convert: _Convert
stack: List[Tuple[_DFAS, int, _RawNode]]
rootnode: Optional[_NL]
used_names: Set[Text]
def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ...
def setup(self, start: Optional[int] = ...) -> None: ...
def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ...
def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ...
def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ...
def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ...
def pop(self) -> None: ...

386
blib2to3/pgen2/pgen.py Normal file
View File

@ -0,0 +1,386 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Pgen imports
from . import grammar, token, tokenize
class PgenGrammar(grammar.Grammar):
pass
class ParserGenerator(object):
def __init__(self, filename, stream=None):
close_stream = None
if stream is None:
stream = open(filename)
close_stream = stream.close
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
if close_stream is not None:
close_stream()
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()
def make_grammar(self):
c = PgenGrammar()
names = list(self.dfas.keys())
names.sort()
names.remove(self.startsymbol)
names.insert(0, self.startsymbol)
for name in names:
i = 256 + len(c.symbol2number)
c.symbol2number[name] = i
c.number2symbol[i] = name
for name in names:
dfa = self.dfas[name]
states = []
for state in dfa:
arcs = []
for label, next in sorted(state.arcs.items()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
states.append(arcs)
c.states.append(states)
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
c.start = c.symbol2number[self.startsymbol]
return c
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
return first
def make_label(self, c, label):
# XXX Maybe this should be a method on a subclass of converter?
ilabel = len(c.labels)
if label[0].isalpha():
# Either a symbol name or a named token
if label in c.symbol2number:
# A symbol name (a non-terminal)
if label in c.symbol2label:
return c.symbol2label[label]
else:
c.labels.append((c.symbol2number[label], None))
c.symbol2label[label] = ilabel
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(token, label, None)
assert isinstance(itoken, int), label
assert itoken in token.tok_name, label
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
else:
# Either a keyword or an operator
assert label[0] in ('"', "'"), label
value = eval(label)
if value[0].isalpha():
# A keyword
if value in c.keywords:
return c.keywords[value]
else:
c.labels.append((token.NAME, value))
c.keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = grammar.opmap[value] # Fails if unknown token
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
def addfirstsets(self):
names = list(self.dfas.keys())
names.sort()
for name in names:
if name not in self.first:
self.calcfirst(name)
#print name, self.first[name].keys()
def calcfirst(self, name):
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
state = dfa[0]
totalset = {}
overlapcheck = {}
for label, next in state.arcs.items():
if label in self.dfas:
if label in self.first:
fset = self.first[label]
if fset is None:
raise ValueError("recursion for rule %r" % name)
else:
self.calcfirst(label)
fset = self.first[label]
totalset.update(fset)
overlapcheck[label] = fset
else:
totalset[label] = 1
overlapcheck[label] = {label: 1}
inverse = {}
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError("rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s" %
(name, symbol, label, inverse[symbol]))
inverse[symbol] = label
self.first[name] = totalset
def parse(self):
dfas = {}
startsymbol = None
# MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
self.gettoken()
# RULE: NAME ':' RHS NEWLINE
name = self.expect(token.NAME)
self.expect(token.OP, ":")
a, z = self.parse_rhs()
self.expect(token.NEWLINE)
#self.dump_nfa(name, a, z)
dfa = self.make_dfa(a, z)
#self.dump_dfa(name, dfa)
oldlen = len(dfa)
self.simplify_dfa(dfa)
newlen = len(dfa)
dfas[name] = dfa
#print name, oldlen, newlen
if startsymbol is None:
startsymbol = name
return dfas, startsymbol
def make_dfa(self, start, finish):
# To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some
# state reduction. Let's represent sets as dicts with 1 for
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state):
base = {}
addclosure(state, base)
return base
def addclosure(state, base):
assert isinstance(state, NFAState)
if state in base:
return
base[state] = 1
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
arcs = {}
for nfastate in state.nfaset:
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
for label, nfaset in sorted(arcs.items()):
for st in states:
if st.nfaset == nfaset:
break
else:
st = DFAState(nfaset, finish)
states.append(st)
state.addarc(st, label)
return states # List of DFAState instances; first one is start
def dump_nfa(self, name, start, finish):
print("Dump of NFA for", name)
todo = [start]
for i, state in enumerate(todo):
print(" State", i, state is finish and "(final)" or "")
for label, next in state.arcs:
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
print(" -> %d" % j)
else:
print(" %s -> %d" % (label, j))
def dump_dfa(self, name, dfa):
print("Dump of DFA for", name)
for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "")
for label, next in sorted(state.arcs.items()):
print(" %s -> %d" % (label, dfa.index(next)))
def simplify_dfa(self, dfa):
# This is not theoretically optimal, but works well enough.
# Algorithm: repeatedly look for two states that have the same
# set of arcs (same labels pointing to the same nodes) and
# unify them, until things stop changing.
# dfa is a list of DFAState instances
changes = True
while changes:
changes = False
for i, state_i in enumerate(dfa):
for j in range(i+1, len(dfa)):
state_j = dfa[j]
if state_i == state_j:
#print " unify", i, j
del dfa[j]
for state in dfa:
state.unifystate(state_j, state_i)
changes = True
break
def parse_rhs(self):
# RHS: ALT ('|' ALT)*
a, z = self.parse_alt()
if self.value != "|":
return a, z
else:
aa = NFAState()
zz = NFAState()
aa.addarc(a)
z.addarc(zz)
while self.value == "|":
self.gettoken()
a, z = self.parse_alt()
aa.addarc(a)
z.addarc(zz)
return aa, zz
def parse_alt(self):
# ALT: ITEM+
a, b = self.parse_item()
while (self.value in ("(", "[") or
self.type in (token.NAME, token.STRING)):
c, d = self.parse_item()
b.addarc(c)
b = d
return a, b
def parse_item(self):
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
if self.value == "[":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, "]")
a.addarc(z)
return a, z
else:
a, z = self.parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self.gettoken()
z.addarc(a)
if value == "+":
return a, z
else:
return a, a
def parse_atom(self):
# ATOM: '(' RHS ')' | NAME | STRING
if self.value == "(":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, ")")
return a, z
elif self.type in (token.NAME, token.STRING):
a = NFAState()
z = NFAState()
a.addarc(z, self.value)
self.gettoken()
return a, z
else:
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
def expect(self, type, value=None):
if self.type != type or (value is not None and self.value != value):
self.raise_error("expected %s/%s, got %s/%s",
type, value, self.type, self.value)
value = self.value
self.gettoken()
return value
def gettoken(self):
tup = next(self.generator)
while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup
#print token.tok_name[self.type], repr(self.value)
def raise_error(self, msg, *args):
if args:
try:
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line))
class NFAState(object):
def __init__(self):
self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next, label=None):
assert label is None or isinstance(label, str)
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState(object):
def __init__(self, nfaset, final):
assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState)
assert isinstance(final, NFAState)
self.nfaset = nfaset
self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState
def addarc(self, next, label):
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(next, DFAState)
self.arcs[label] = next
def unifystate(self, old, new):
for label, next in self.arcs.items():
if next is old:
self.arcs[label] = new
def __eq__(self, other):
# Equality test -- ignore the nfaset instance variable
assert isinstance(other, DFAState)
if self.isfinal != other.isfinal:
return False
# Can't just return self.arcs == other.arcs, because that
# would invoke this method recursively, with cycles...
if len(self.arcs) != len(other.arcs):
return False
for label, next in self.arcs.items():
if next is not other.arcs.get(label):
return False
return True
__hash__ = None # For Py3 compatibility.
def generate_grammar(filename="Grammar.txt"):
p = ParserGenerator(filename)
return p.make_grammar()

49
blib2to3/pgen2/pgen.pyi Normal file
View File

@ -0,0 +1,49 @@
# Stubs for lib2to3.pgen2.pgen (Python 3.6)
from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple
from mypy_extensions import NoReturn
from blib2to3.pgen2 import _Path, grammar
from blib2to3.pgen2.tokenize import _TokenInfo
class PgenGrammar(grammar.Grammar): ...
class ParserGenerator:
filename: _Path
stream: IO[Text]
generator: Iterator[_TokenInfo]
first: Dict[Text, Dict[Text, int]]
def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ...
def make_grammar(self) -> PgenGrammar: ...
def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ...
def make_label(self, c: PgenGrammar, label: Text) -> int: ...
def addfirstsets(self) -> None: ...
def calcfirst(self, name: Text) -> None: ...
def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ...
def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ...
def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ...
def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ...
def simplify_dfa(self, dfa: List[DFAState]) -> None: ...
def parse_rhs(self) -> Tuple[NFAState, NFAState]: ...
def parse_alt(self) -> Tuple[NFAState, NFAState]: ...
def parse_item(self) -> Tuple[NFAState, NFAState]: ...
def parse_atom(self) -> Tuple[NFAState, NFAState]: ...
def expect(self, type: int, value: Optional[Any] = ...) -> Text: ...
def gettoken(self) -> None: ...
def raise_error(self, msg: str, *args: Any) -> NoReturn: ...
class NFAState:
arcs: List[Tuple[Optional[Text], NFAState]]
def __init__(self) -> None: ...
def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ...
class DFAState:
nfaset: Dict[NFAState, Any]
isfinal: bool
arcs: Dict[Text, DFAState]
def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ...
def addarc(self, next: DFAState, label: Text) -> None: ...
def unifystate(self, old: DFAState, new: DFAState) -> None: ...
def __eq__(self, other: Any) -> bool: ...
def generate_grammar(filename: _Path = ...) -> PgenGrammar: ...

83
blib2to3/pgen2/token.py Executable file
View File

@ -0,0 +1,83 @@
#! /usr/bin/env python3
"""Token constants (from "token.h")."""
# Taken from Python (r53757) and modified to include some tokens
# originally monkeypatched in by pgen2.tokenize
#--start constants--
ENDMARKER = 0
NAME = 1
NUMBER = 2
STRING = 3
NEWLINE = 4
INDENT = 5
DEDENT = 6
LPAR = 7
RPAR = 8
LSQB = 9
RSQB = 10
COLON = 11
COMMA = 12
SEMI = 13
PLUS = 14
MINUS = 15
STAR = 16
SLASH = 17
VBAR = 18
AMPER = 19
LESS = 20
GREATER = 21
EQUAL = 22
DOT = 23
PERCENT = 24
BACKQUOTE = 25
LBRACE = 26
RBRACE = 27
EQEQUAL = 28
NOTEQUAL = 29
LESSEQUAL = 30
GREATEREQUAL = 31
TILDE = 32
CIRCUMFLEX = 33
LEFTSHIFT = 34
RIGHTSHIFT = 35
DOUBLESTAR = 36
PLUSEQUAL = 37
MINEQUAL = 38
STAREQUAL = 39
SLASHEQUAL = 40
PERCENTEQUAL = 41
AMPEREQUAL = 42
VBAREQUAL = 43
CIRCUMFLEXEQUAL = 44
LEFTSHIFTEQUAL = 45
RIGHTSHIFTEQUAL = 46
DOUBLESTAREQUAL = 47
DOUBLESLASH = 48
DOUBLESLASHEQUAL = 49
AT = 50
ATEQUAL = 51
OP = 52
COMMENT = 53
NL = 54
RARROW = 55
ERRORTOKEN = 56
N_TOKENS = 57
NT_OFFSET = 256
#--end constants--
tok_name = {}
for _name, _value in list(globals().items()):
if type(_value) is type(0):
tok_name[_value] = _name
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER

73
blib2to3/pgen2/token.pyi Normal file
View File

@ -0,0 +1,73 @@
# Stubs for lib2to3.pgen2.token (Python 3.6)
import sys
from typing import Dict, Text
ENDMARKER: int
NAME: int
NUMBER: int
STRING: int
NEWLINE: int
INDENT: int
DEDENT: int
LPAR: int
RPAR: int
LSQB: int
RSQB: int
COLON: int
COMMA: int
SEMI: int
PLUS: int
MINUS: int
STAR: int
SLASH: int
VBAR: int
AMPER: int
LESS: int
GREATER: int
EQUAL: int
DOT: int
PERCENT: int
BACKQUOTE: int
LBRACE: int
RBRACE: int
EQEQUAL: int
NOTEQUAL: int
LESSEQUAL: int
GREATEREQUAL: int
TILDE: int
CIRCUMFLEX: int
LEFTSHIFT: int
RIGHTSHIFT: int
DOUBLESTAR: int
PLUSEQUAL: int
MINEQUAL: int
STAREQUAL: int
SLASHEQUAL: int
PERCENTEQUAL: int
AMPEREQUAL: int
VBAREQUAL: int
CIRCUMFLEXEQUAL: int
LEFTSHIFTEQUAL: int
RIGHTSHIFTEQUAL: int
DOUBLESTAREQUAL: int
DOUBLESLASH: int
DOUBLESLASHEQUAL: int
OP: int
COMMENT: int
NL: int
if sys.version_info >= (3,):
RARROW: int
if sys.version_info >= (3, 5):
AT: int
ATEQUAL: int
AWAIT: int
ASYNC: int
ERRORTOKEN: int
N_TOKENS: int
NT_OFFSET: int
tok_name: Dict[int, Text]
def ISTERMINAL(x: int) -> bool: ...
def ISNONTERMINAL(x: int) -> bool: ...
def ISEOF(x: int) -> bool: ...

518
blib2to3/pgen2/tokenize.py Normal file
View File

@ -0,0 +1,518 @@
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
# All rights reserved.
"""Tokenization help for Python programs.
generate_tokens(readline) is a generator that breaks a stream of
text into Python tokens. It accepts a readline-like method which is called
repeatedly to get the next line of input (or "" for EOF). It generates
5-tuples with these members:
the token type (see token.py)
the token (a string)
the starting (row, column) indices of the token (a 2-tuple of ints)
the ending (row, column) indices of the token (a 2-tuple of ints)
the original line (string)
It is designed to match the working of the Python tokenizer exactly, except
that it produces COMMENT tokens for comments and gives type OP for all
operators
Older entry points
tokenize_loop(readline, tokeneater)
tokenize(readline, tokeneater=printtoken)
are the same, except instead of generating tokens, tokeneater is a callback
function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found."""
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import string, re
from codecs import BOM_UTF8, lookup
from lib2to3.pgen2.token import *
from . import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
"generate_tokens", "untokenize"]
del token
try:
bytes
except NameError:
# Support bytes type in Python <= 2.5, so 2to3 turns itself into
# valid Python 3 code.
bytes = str
def group(*choices): return '(' + '|'.join(choices) + ')'
def any(*choices): return group(*choices) + '*'
def maybe(*choices): return group(*choices) + '?'
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*'
Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?\d+(?:_\d+)*'
Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
Expfloat = r'\d+(?:_\d+)*' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?"
Triple = group(_litprefix + "'''", _litprefix + '"""')
# Single-line ' or " string.
String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
r"//=?", r"->",
r"[+\-*/%&@|^=<>]=?",
r"~")
Bracket = '[][(){}]'
Special = group(r'\r?\n', r'[:;.,`@]')
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
tokenprog, pseudoprog, single3prog, double3prog = list(map(
re.compile, (Token, PseudoToken, Single3, Double3)))
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog,
"u'''": single3prog, 'u"""': double3prog,
"b'''": single3prog, 'b"""': double3prog,
"f'''": single3prog, 'f"""': double3prog,
"ur'''": single3prog, 'ur"""': double3prog,
"br'''": single3prog, 'br"""': double3prog,
"rb'''": single3prog, 'rb"""': double3prog,
"R'''": single3prog, 'R"""': double3prog,
"U'''": single3prog, 'U"""': double3prog,
"B'''": single3prog, 'B"""': double3prog,
"F'''": single3prog, 'F"""': double3prog,
"uR'''": single3prog, 'uR"""': double3prog,
"Ur'''": single3prog, 'Ur"""': double3prog,
"UR'''": single3prog, 'UR"""': double3prog,
"bR'''": single3prog, 'bR"""': double3prog,
"Br'''": single3prog, 'Br"""': double3prog,
"BR'''": single3prog, 'BR"""': double3prog,
"rB'''": single3prog, 'rB"""': double3prog,
"Rb'''": single3prog, 'Rb"""': double3prog,
"RB'''": single3prog, 'RB"""': double3prog,
'r': None, 'R': None,
'u': None, 'U': None,
'f': None, 'F': None,
'b': None, 'B': None}
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
"b'''", 'b"""', "B'''", 'B"""',
"f'''", 'f"""', "F'''", 'F"""',
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
"uR'''", 'uR"""', "UR'''", 'UR"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""',
"rb'''", 'rb"""', "Rb'''", 'Rb"""',
"rB'''", 'rB"""', "RB'''", 'RB"""',):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
"b'", 'b"', "B'", 'B"',
"f'", 'f"', "F'", 'F"',
"ur'", 'ur"', "Ur'", 'Ur"',
"uR'", 'uR"', "UR'", 'UR"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"',
"rb'", 'rb"', "Rb'", 'Rb"',
"rB'", 'rB"', "RB'", 'RB"',):
single_quoted[t] = t
tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
(srow, scol) = xxx_todo_changeme
(erow, ecol) = xxx_todo_changeme1
print("%d,%d-%d,%d:\t%s\t%s" % \
(srow, scol, erow, ecol, tok_name[type], repr(token)))
def tokenize(readline, tokeneater=printtoken):
"""
The tokenize() function accepts two parameters: one representing the
input stream, and one providing an output mechanism for tokenize().
The first parameter, readline, must be a callable object which provides
the same interface as the readline() method of built-in file objects.
Each call to the function should return one line of input as a string.
The second parameter, tokeneater, must also be a callable object. It is
called once for each token, with five arguments, corresponding to the
tuples generated by generate_tokens().
"""
try:
tokenize_loop(readline, tokeneater)
except StopTokenizing:
pass
# backwards compatible interface
def tokenize_loop(readline, tokeneater):
for token_info in generate_tokens(readline):
tokeneater(*token_info)
class Untokenizer:
def __init__(self):
self.tokens = []
self.prev_row = 1
self.prev_col = 0
def add_whitespace(self, start):
row, col = start
assert row <= self.prev_row
col_offset = col - self.prev_col
if col_offset:
self.tokens.append(" " * col_offset)
def untokenize(self, iterable):
for t in iterable:
if len(t) == 2:
self.compat(t, iterable)
break
tok_type, token, start, end, line = t
self.add_whitespace(start)
self.tokens.append(token)
self.prev_row, self.prev_col = end
if tok_type in (NEWLINE, NL):
self.prev_row += 1
self.prev_col = 0
return "".join(self.tokens)
def compat(self, token, iterable):
startline = False
indents = []
toks_append = self.tokens.append
toknum, tokval = token
if toknum in (NAME, NUMBER):
tokval += ' '
if toknum in (NEWLINE, NL):
startline = True
for tok in iterable:
toknum, tokval = tok[:2]
if toknum in (NAME, NUMBER):
tokval += ' '
if toknum == INDENT:
indents.append(tokval)
continue
elif toknum == DEDENT:
indents.pop()
continue
elif toknum in (NEWLINE, NL):
startline = True
elif startline and indents:
toks_append(indents[-1])
startline = False
toks_append(tokval)
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argument, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read
in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present, but
disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
charset, raise a SyntaxError. Note that if a utf-8 bom is found,
'utf-8-sig' is returned.
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
bom_found = False
encoding = None
default = 'utf-8'
def read_or_stop():
try:
return readline()
except StopIteration:
return bytes()
def find_cookie(line):
try:
line_string = line.decode('ascii')
except UnicodeDecodeError:
return None
match = cookie_re.match(line_string)
if not match:
return None
encoding = _get_normal_name(match.group(1))
try:
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
raise SyntaxError("unknown encoding: " + encoding)
if bom_found:
if codec.name != 'utf-8':
# This behaviour mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
encoding += '-sig'
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
if not first:
return default, []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
if not blank_re.match(first):
return default, [first]
second = read_or_stop()
if not second:
return default, [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return default, [first, second]
def untokenize(iterable):
"""Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence
with at least two elements, a token number and token value. If
only two tokens are passed, the resulting output is poor.
Round-trip invariant for full input:
Untokenized source will match input source exactly
Round-trip invariant for limited intput:
# Output text will tokenize the back to the input
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
newcode = untokenize(t1)
readline = iter(newcode.splitlines(1)).next
t2 = [tok[:2] for tokin generate_tokens(readline)]
assert t1 == t2
"""
ut = Untokenizer()
return ut.untokenize(iterable)
def generate_tokens(readline):
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
readline() method of built-in file objects. Each call to the function
should return one line of input as a string. Alternately, readline
can be a callable function terminating with StopIteration:
readline = open(myfile).next # Example of alternate readline
The generator produces 5-tuples with these members: the token type; the
token string; a 2-tuple (srow, scol) of ints specifying the row and
column where the token begins in the source; a 2-tuple (erow, ecol) of
ints specifying the row and column where the token ends in the source;
and the line on which the token was found. The line passed is the
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
namechars, numchars = string.ascii_letters + '_', '0123456789'
contstr, needcont = '', 0
contline = None
indents = [0]
while 1: # loop over lines in stream
try:
line = readline()
except StopIteration:
line = ''
lnum = lnum + 1
pos, max = 0, len(line)
if contstr: # continued string
if not line:
raise TokenError("EOF in multi-line string", strstart)
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield (STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
yield (ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
contline = None
continue
else:
contstr = contstr + line
contline = contline + line
continue
elif parenlev == 0 and not continued: # new statement
if not line: break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == ' ': column = column + 1
elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
elif line[pos] == '\f': column = 0
else: break
pos = pos + 1
if pos == max: break
if line[pos] in '#\r\n': # skip comments or blank lines
if line[pos] == '#':
comment_token = line[pos:].rstrip('\r\n')
nl_pos = pos + len(comment_token)
yield (COMMENT, comment_token,
(lnum, pos), (lnum, pos + len(comment_token)), line)
yield (NL, line[nl_pos:],
(lnum, nl_pos), (lnum, len(line)), line)
else:
yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
(lnum, pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents or dedents
indents.append(column)
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
if column not in indents:
raise IndentationError(
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
continued = 0
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
newline = NEWLINE
if parenlev > 0:
newline = NL
yield (newline, token, spos, epos, line)
elif initial == '#':
assert not token.endswith("\n")
yield (COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = endprogs[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield (STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
contline = line
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or
endprogs[token[2]])
contstr, needcont = line[start:], 1
contline = line
break
else: # ordinary string
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
yield (NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt
# This yield is new; needed for better idempotency:
yield (NL, token, spos, (lnum, pos), line)
continued = 1
else:
if initial in '([{': parenlev = parenlev + 1
elif initial in ')]}': parenlev = parenlev - 1
yield (OP, token, spos, epos, line)
else:
yield (ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
pos = pos + 1
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
if __name__ == '__main__': # testing
import sys
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
else: tokenize(sys.stdin.readline)

View File

@ -0,0 +1,30 @@
# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
# NOTE: Only elements from __all__ are present.
from typing import Callable, Iterable, Iterator, List, Text, Tuple
from blib2to3.pgen2.token import * # noqa
_Coord = Tuple[int, int]
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
class TokenError(Exception): ...
class StopTokenizing(Exception): ...
def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ...
class Untokenizer:
tokens: List[Text]
prev_row: int
prev_col: int
def __init__(self) -> None: ...
def add_whitespace(self, start: _Coord) -> None: ...
def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ...
def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ...
def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
def generate_tokens(
readline: Callable[[], Text]
) -> Iterator[_TokenInfo]: ...

40
blib2to3/pygram.py Normal file
View File

@ -0,0 +1,40 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Export the Python grammar and symbols."""
# Python imports
import os
# Local imports
from .pgen2 import token
from .pgen2 import driver
from . import pytree
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
"PatternGrammar.txt")
class Symbols(object):
def __init__(self, grammar):
"""Initializer.
Creates an attribute for each grammar symbol (nonterminal),
whose value is the symbol's type (an int >= 256).
"""
for name, symbol in grammar.symbol2number.items():
setattr(self, name, symbol)
python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE)
python_symbols = Symbols(python_grammar)
python_grammar_no_print_statement = python_grammar.copy()
del python_grammar_no_print_statement.keywords["print"]
pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE)
pattern_symbols = Symbols(pattern_grammar)

119
blib2to3/pygram.pyi Normal file
View File

@ -0,0 +1,119 @@
# Stubs for lib2to3.pygram (Python 3.6)
from typing import Any
from blib2to3.pgen2.grammar import Grammar
class Symbols:
def __init__(self, grammar: Grammar) -> None: ...
class python_symbols(Symbols):
and_expr: int
and_test: int
annassign: int
arglist: int
argument: int
arith_expr: int
assert_stmt: int
async_funcdef: int
async_stmt: int
atom: int
augassign: int
break_stmt: int
classdef: int
comp_for: int
comp_if: int
comp_iter: int
comp_op: int
comparison: int
compound_stmt: int
continue_stmt: int
decorated: int
decorator: int
decorators: int
del_stmt: int
dictsetmaker: int
dotted_as_name: int
dotted_as_names: int
dotted_name: int
encoding_decl: int
eval_input: int
except_clause: int
exec_stmt: int
expr: int
expr_stmt: int
exprlist: int
factor: int
file_input: int
flow_stmt: int
for_stmt: int
funcdef: int
global_stmt: int
if_stmt: int
import_as_name: int
import_as_names: int
import_from: int
import_name: int
import_stmt: int
lambdef: int
listmaker: int
not_test: int
old_comp_for: int
old_comp_if: int
old_comp_iter: int
old_lambdef: int
old_test: int
or_test: int
parameters: int
pass_stmt: int
power: int
print_stmt: int
raise_stmt: int
return_stmt: int
shift_expr: int
simple_stmt: int
single_input: int
sliceop: int
small_stmt: int
star_expr: int
stmt: int
subscript: int
subscriptlist: int
suite: int
term: int
test: int
testlist: int
testlist1: int
testlist_gexp: int
testlist_safe: int
testlist_star_expr: int
tfpdef: int
tfplist: int
tname: int
trailer: int
try_stmt: int
typedargslist: int
varargslist: int
vfpdef: int
vfplist: int
vname: int
while_stmt: int
with_item: int
with_stmt: int
with_var: int
xor_expr: int
yield_arg: int
yield_expr: int
yield_stmt: int
class pattern_symbols(Symbols):
Alternative: int
Alternatives: int
Details: int
Matcher: int
NegatedUnit: int
Repeater: int
Unit: int
python_grammar: Grammar
python_grammar_no_print_statement: Grammar
pattern_grammar: Grammar

854
blib2to3/pytree.py Normal file
View File

@ -0,0 +1,854 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""
Python parse tree definitions.
This is a very concrete parse tree; we need to keep every token and
even the comments and whitespace between tokens.
There's also a pattern matching implementation here.
"""
__author__ = "Guido van Rossum <guido@python.org>"
import sys
from io import StringIO
HUGE = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs = {}
def type_repr(type_num):
global _type_reprs
if not _type_reprs:
from .pygram import python_symbols
# printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items():
for name, val in python_symbols.__dict__.items():
if type(val) == int: _type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
class Base(object):
"""
Abstract base class for Node and Leaf.
This provides some default functionality and boilerplate using the
template pattern.
A node may be a subnode of at most one parent.
"""
# Default values for instance variables
type = None # int: token number (< 256) or symbol number (>= 256)
parent = None # Parent node pointer, or None
children = () # Tuple of subnodes
was_changed = False
was_checked = False
def __new__(cls, *args, **kwds):
"""Constructor that prevents Base from being instantiated."""
assert cls is not Base, "Cannot instantiate Base"
return object.__new__(cls)
def __eq__(self, other):
"""
Compare two nodes for equality.
This calls the method _eq().
"""
if self.__class__ is not other.__class__:
return NotImplemented
return self._eq(other)
__hash__ = None # For Py3 compatibility.
def _eq(self, other):
"""
Compare two nodes for equality.
This is called by __eq__ and __ne__. It is only called if the two nodes
have the same type. This must be implemented by the concrete subclass.
Nodes should be considered equal if they have the same structure,
ignoring the prefix string and other context information.
"""
raise NotImplementedError
def clone(self):
"""
Return a cloned (deep) copy of self.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def post_order(self):
"""
Return a post-order iterator for the tree.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def pre_order(self):
"""
Return a pre-order iterator for the tree.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def replace(self, new):
"""Replace this node with a new one in the parent."""
assert self.parent is not None, str(self)
assert new is not None
if not isinstance(new, list):
new = [new]
l_children = []
found = False
for ch in self.parent.children:
if ch is self:
assert not found, (self.parent.children, self, new)
if new is not None:
l_children.extend(new)
found = True
else:
l_children.append(ch)
assert found, (self.children, self, new)
self.parent.changed()
self.parent.children = l_children
for x in new:
x.parent = self.parent
self.parent = None
def get_lineno(self):
"""Return the line number which generated the invocant node."""
node = self
while not isinstance(node, Leaf):
if not node.children:
return
node = node.children[0]
return node.lineno
def changed(self):
if self.parent:
self.parent.changed()
self.was_changed = True
def remove(self):
"""
Remove the node from the tree. Returns the position of the node in its
parent's children before it was removed.
"""
if self.parent:
for i, node in enumerate(self.parent.children):
if node is self:
self.parent.changed()
del self.parent.children[i]
self.parent = None
return i
@property
def next_sibling(self):
"""
The node immediately following the invocant in their parent's children
list. If the invocant does not have a next sibling, it is None
"""
if self.parent is None:
return None
# Can't use index(); we need to test by identity
for i, child in enumerate(self.parent.children):
if child is self:
try:
return self.parent.children[i+1]
except IndexError:
return None
@property
def prev_sibling(self):
"""
The node immediately preceding the invocant in their parent's children
list. If the invocant does not have a previous sibling, it is None.
"""
if self.parent is None:
return None
# Can't use index(); we need to test by identity
for i, child in enumerate(self.parent.children):
if child is self:
if i == 0:
return None
return self.parent.children[i-1]
def leaves(self):
for child in self.children:
yield from child.leaves()
def depth(self):
if self.parent is None:
return 0
return 1 + self.parent.depth()
def get_suffix(self):
"""
Return the string immediately following the invocant node. This is
effectively equivalent to node.next_sibling.prefix
"""
next_sib = self.next_sibling
if next_sib is None:
return ""
return next_sib.prefix
if sys.version_info < (3, 0):
def __str__(self):
return str(self).encode("ascii")
class Node(Base):
"""Concrete implementation for interior nodes."""
def __init__(self,type, children,
context=None,
prefix=None,
fixers_applied=None):
"""
Initializer.
Takes a type constant (a symbol number >= 256), a sequence of
child nodes, and an optional context keyword argument.
As a side effect, the parent pointers of the children are updated.
"""
assert type >= 256, type
self.type = type
self.children = list(children)
for ch in self.children:
assert ch.parent is None, repr(ch)
ch.parent = self
if prefix is not None:
self.prefix = prefix
if fixers_applied:
self.fixers_applied = fixers_applied[:]
else:
self.fixers_applied = None
def __repr__(self):
"""Return a canonical string representation."""
return "%s(%s, %r)" % (self.__class__.__name__,
type_repr(self.type),
self.children)
def __unicode__(self):
"""
Return a pretty string representation.
This reproduces the input source exactly.
"""
return "".join(map(str, self.children))
if sys.version_info > (3, 0):
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality."""
return (self.type, self.children) == (other.type, other.children)
def clone(self):
"""Return a cloned (deep) copy of self."""
return Node(self.type, [ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied)
def post_order(self):
"""Return a post-order iterator for the tree."""
for child in self.children:
yield from child.post_order()
yield self
def pre_order(self):
"""Return a pre-order iterator for the tree."""
yield self
for child in self.children:
yield from child.pre_order()
@property
def prefix(self):
"""
The whitespace and comments preceding this node in the input.
"""
if not self.children:
return ""
return self.children[0].prefix
@prefix.setter
def prefix(self, prefix):
if self.children:
self.children[0].prefix = prefix
def set_child(self, i, child):
"""
Equivalent to 'node.children[i] = child'. This method also sets the
child's parent attribute appropriately.
"""
child.parent = self
self.children[i].parent = None
self.children[i] = child
self.changed()
def insert_child(self, i, child):
"""
Equivalent to 'node.children.insert(i, child)'. This method also sets
the child's parent attribute appropriately.
"""
child.parent = self
self.children.insert(i, child)
self.changed()
def append_child(self, child):
"""
Equivalent to 'node.children.append(child)'. This method also sets the
child's parent attribute appropriately.
"""
child.parent = self
self.children.append(child)
self.changed()
class Leaf(Base):
"""Concrete implementation for leaf nodes."""
# Default values for instance variables
_prefix = "" # Whitespace and comments preceding this token in the input
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token tarts in the input
def __init__(self, type, value,
context=None,
prefix=None,
fixers_applied=[]):
"""
Initializer.
Takes a type constant (a token number < 256), a string value, and an
optional context keyword argument.
"""
assert 0 <= type < 256, type
if context is not None:
self._prefix, (self.lineno, self.column) = context
self.type = type
self.value = value
if prefix is not None:
self._prefix = prefix
self.fixers_applied = fixers_applied[:]
def __repr__(self):
"""Return a canonical string representation."""
from .pgen2.token import tok_name
return "%s(%s, %r)" % (self.__class__.__name__,
tok_name.get(self.type, self.type),
self.value)
def __unicode__(self):
"""
Return a pretty string representation.
This reproduces the input source exactly.
"""
return self.prefix + str(self.value)
if sys.version_info > (3, 0):
__str__ = __unicode__
def _eq(self, other):
"""Compare two nodes for equality."""
return (self.type, self.value) == (other.type, other.value)
def clone(self):
"""Return a cloned (deep) copy of self."""
return Leaf(self.type, self.value,
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied)
def leaves(self):
yield self
def post_order(self):
"""Return a post-order iterator for the tree."""
yield self
def pre_order(self):
"""Return a pre-order iterator for the tree."""
yield self
@property
def prefix(self):
"""
The whitespace and comments preceding this token in the input.
"""
return self._prefix
@prefix.setter
def prefix(self, prefix):
self.changed()
self._prefix = prefix
def convert(gr, raw_node):
"""
Convert raw node information to a Node or Leaf instance.
This is passed to the parser driver which calls it whenever a reduction of a
grammar rule produces a new complete node, so that the tree is build
strictly bottom-up.
"""
type, value, context, children = raw_node
if children or type in gr.number2symbol:
# If there's exactly one child, return that child instead of
# creating a new node.
if len(children) == 1:
return children[0]
return Node(type, children, context=context)
else:
return Leaf(type, value, context=context)
class BasePattern(object):
"""
A pattern is a tree matching pattern.
It looks for a specific node type (token or symbol), and
optionally for a specific content.
This is an abstract base class. There are three concrete
subclasses:
- LeafPattern matches a single leaf node;
- NodePattern matches a single node (usually non-leaf);
- WildcardPattern matches a sequence of nodes of variable length.
"""
# Defaults for instance variables
type = None # Node type (token if < 256, symbol if >= 256)
content = None # Optional content matching pattern
name = None # Optional name used to store match in results dict
def __new__(cls, *args, **kwds):
"""Constructor that prevents BasePattern from being instantiated."""
assert cls is not BasePattern, "Cannot instantiate BasePattern"
return object.__new__(cls)
def __repr__(self):
args = [type_repr(self.type), self.content, self.name]
while args and args[-1] is None:
del args[-1]
return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
def optimize(self):
"""
A subclass can define this as a hook for optimizations.
Returns either self or another node with the same effect.
"""
return self
def match(self, node, results=None):
"""
Does this pattern exactly match a node?
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
Default implementation for non-wildcard patterns.
"""
if self.type is not None and node.type != self.type:
return False
if self.content is not None:
r = None
if results is not None:
r = {}
if not self._submatch(node, r):
return False
if r:
results.update(r)
if results is not None and self.name:
results[self.name] = node
return True
def match_seq(self, nodes, results=None):
"""
Does this pattern exactly match a sequence of nodes?
Default implementation for non-wildcard patterns.
"""
if len(nodes) != 1:
return False
return self.match(nodes[0], results)
def generate_matches(self, nodes):
"""
Generator yielding all matches for this pattern.
Default implementation for non-wildcard patterns.
"""
r = {}
if nodes and self.match(nodes[0], r):
yield 1, r
class LeafPattern(BasePattern):
def __init__(self, type=None, content=None, name=None):
"""
Initializer. Takes optional type, content, and name.
The type, if given must be a token type (< 256). If not given,
this matches any *leaf* node; the content may still be required.
The content, if given, must be a string.
If a name is given, the matching node is stored in the results
dict under that key.
"""
if type is not None:
assert 0 <= type < 256, type
if content is not None:
assert isinstance(content, str), repr(content)
self.type = type
self.content = content
self.name = name
def match(self, node, results=None):
"""Override match() to insist on a leaf node."""
if not isinstance(node, Leaf):
return False
return BasePattern.match(self, node, results)
def _submatch(self, node, results=None):
"""
Match the pattern's content to the node's children.
This assumes the node type matches and self.content is not None.
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
When returning False, the results dict may still be updated.
"""
return self.content == node.value
class NodePattern(BasePattern):
wildcards = False
def __init__(self, type=None, content=None, name=None):
"""
Initializer. Takes optional type, content, and name.
The type, if given, must be a symbol type (>= 256). If the
type is None this matches *any* single node (leaf or not),
except if content is not None, in which it only matches
non-leaf nodes that also match the content pattern.
The content, if not None, must be a sequence of Patterns that
must match the node's children exactly. If the content is
given, the type must not be None.
If a name is given, the matching node is stored in the results
dict under that key.
"""
if type is not None:
assert type >= 256, type
if content is not None:
assert not isinstance(content, str), repr(content)
content = list(content)
for i, item in enumerate(content):
assert isinstance(item, BasePattern), (i, item)
if isinstance(item, WildcardPattern):
self.wildcards = True
self.type = type
self.content = content
self.name = name
def _submatch(self, node, results=None):
"""
Match the pattern's content to the node's children.
This assumes the node type matches and self.content is not None.
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
When returning False, the results dict may still be updated.
"""
if self.wildcards:
for c, r in generate_matches(self.content, node.children):
if c == len(node.children):
if results is not None:
results.update(r)
return True
return False
if len(self.content) != len(node.children):
return False
for subpattern, child in zip(self.content, node.children):
if not subpattern.match(child, results):
return False
return True
class WildcardPattern(BasePattern):
"""
A wildcard pattern can match zero or more nodes.
This has all the flexibility needed to implement patterns like:
.* .+ .? .{m,n}
(a b c | d e | f)
(...)* (...)+ (...)? (...){m,n}
except it always uses non-greedy matching.
"""
def __init__(self, content=None, min=0, max=HUGE, name=None):
"""
Initializer.
Args:
content: optional sequence of subsequences of patterns;
if absent, matches one node;
if present, each subsequence is an alternative [*]
min: optional minimum number of times to match, default 0
max: optional maximum number of times to match, default HUGE
name: optional name assigned to this match
[*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
equivalent to (a b c | d e | f g h); if content is None,
this is equivalent to '.' in regular expression terms.
The min and max parameters work as follows:
min=0, max=maxint: .*
min=1, max=maxint: .+
min=0, max=1: .?
min=1, max=1: .
If content is not None, replace the dot with the parenthesized
list of alternatives, e.g. (a b c | d e | f g h)*
"""
assert 0 <= min <= max <= HUGE, (min, max)
if content is not None:
content = tuple(map(tuple, content)) # Protect against alterations
# Check sanity of alternatives
assert len(content), repr(content) # Can't have zero alternatives
for alt in content:
assert len(alt), repr(alt) # Can have empty alternatives
self.content = content
self.min = min
self.max = max
self.name = name
def optimize(self):
"""Optimize certain stacked wildcard patterns."""
subpattern = None
if (self.content is not None and
len(self.content) == 1 and len(self.content[0]) == 1):
subpattern = self.content[0][0]
if self.min == 1 and self.max == 1:
if self.content is None:
return NodePattern(name=self.name)
if subpattern is not None and self.name == subpattern.name:
return subpattern.optimize()
if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
subpattern.min <= 1 and self.name == subpattern.name):
return WildcardPattern(subpattern.content,
self.min*subpattern.min,
self.max*subpattern.max,
subpattern.name)
return self
def match(self, node, results=None):
"""Does this pattern exactly match a node?"""
return self.match_seq([node], results)
def match_seq(self, nodes, results=None):
"""Does this pattern exactly match a sequence of nodes?"""
for c, r in self.generate_matches(nodes):
if c == len(nodes):
if results is not None:
results.update(r)
if self.name:
results[self.name] = list(nodes)
return True
return False
def generate_matches(self, nodes):
"""
Generator yielding matches for a sequence of nodes.
Args:
nodes: sequence of nodes
Yields:
(count, results) tuples where:
count: the match comprises nodes[:count];
results: dict containing named submatches.
"""
if self.content is None:
# Shortcut for special case (see __init__.__doc__)
for count in range(self.min, 1 + min(len(nodes), self.max)):
r = {}
if self.name:
r[self.name] = nodes[:count]
yield count, r
elif self.name == "bare_name":
yield self._bare_name_matches(nodes)
else:
# The reason for this is that hitting the recursion limit usually
# results in some ugly messages about how RuntimeErrors are being
# ignored. We only have to do this on CPython, though, because other
# implementations don't have this nasty bug in the first place.
if hasattr(sys, "getrefcount"):
save_stderr = sys.stderr
sys.stderr = StringIO()
try:
for count, r in self._recursive_matches(nodes, 0):
if self.name:
r[self.name] = nodes[:count]
yield count, r
except RuntimeError:
# We fall back to the iterative pattern matching scheme if the recursive
# scheme hits the recursion limit.
for count, r in self._iterative_matches(nodes):
if self.name:
r[self.name] = nodes[:count]
yield count, r
finally:
if hasattr(sys, "getrefcount"):
sys.stderr = save_stderr
def _iterative_matches(self, nodes):
"""Helper to iteratively yield the matches."""
nodelen = len(nodes)
if 0 >= self.min:
yield 0, {}
results = []
# generate matches that use just one alt from self.content
for alt in self.content:
for c, r in generate_matches(alt, nodes):
yield c, r
results.append((c, r))
# for each match, iterate down the nodes
while results:
new_results = []
for c0, r0 in results:
# stop if the entire set of nodes has been matched
if c0 < nodelen and c0 <= self.max:
for alt in self.content:
for c1, r1 in generate_matches(alt, nodes[c0:]):
if c1 > 0:
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r
new_results.append((c0 + c1, r))
results = new_results
def _bare_name_matches(self, nodes):
"""Special optimized matcher for bare_name."""
count = 0
r = {}
done = False
max = len(nodes)
while not done and count < max:
done = True
for leaf in self.content:
if leaf[0].match(nodes[count], r):
count += 1
done = False
break
r[self.name] = nodes[:count]
return count, r
def _recursive_matches(self, nodes, count):
"""Helper to recursively yield the matches."""
assert self.content is not None
if count >= self.min:
yield 0, {}
if count < self.max:
for alt in self.content:
for c0, r0 in generate_matches(alt, nodes):
for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r
class NegatedPattern(BasePattern):
def __init__(self, content=None):
"""
Initializer.
The argument is either a pattern or None. If it is None, this
only matches an empty sequence (effectively '$' in regex
lingo). If it is not None, this matches whenever the argument
pattern doesn't have any matches.
"""
if content is not None:
assert isinstance(content, BasePattern), repr(content)
self.content = content
def match(self, node):
# We never match a node in its entirety
return False
def match_seq(self, nodes):
# We only match an empty sequence of nodes in its entirety
return len(nodes) == 0
def generate_matches(self, nodes):
if self.content is None:
# Return a match if there is an empty sequence
if len(nodes) == 0:
yield 0, {}
else:
# Return a match if the argument pattern has no matches
for c, r in self.content.generate_matches(nodes):
return
yield 0, {}
def generate_matches(patterns, nodes):
"""
Generator yielding matches for a sequence of patterns and nodes.
Args:
patterns: a sequence of patterns
nodes: a sequence of nodes
Yields:
(count, results) tuples where:
count: the entire sequence of patterns matches nodes[:count];
results: dict containing named submatches.
"""
if not patterns:
yield 0, {}
else:
p, rest = patterns[0], patterns[1:]
for c0, r0 in p.generate_matches(nodes):
if not rest:
yield c0, r0
else:
for c1, r1 in generate_matches(rest, nodes[c0:]):
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r

86
blib2to3/pytree.pyi Normal file
View File

@ -0,0 +1,86 @@
# Stubs for lib2to3.pytree (Python 3.6)
import sys
from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union
from blib2to3.pgen2.grammar import Grammar
_P = TypeVar('_P')
_NL = Union[Node, Leaf]
_Context = Tuple[Text, int, int]
_Results = Dict[Text, _NL]
_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]]
_Convert = Callable[[Grammar, _RawNode], Any]
HUGE: int
def type_repr(type_num: int) -> Text: ...
class Base:
type: int
parent: Optional[Node]
prefix: Text
children: List[_NL]
was_changed: bool
was_checked: bool
def __eq__(self, other: Any) -> bool: ...
def _eq(self: _P, other: _P) -> bool: ...
def clone(self: _P) -> _P: ...
def post_order(self) -> Iterator[_NL]: ...
def pre_order(self) -> Iterator[_NL]: ...
def replace(self, new: Union[_NL, List[_NL]]) -> None: ...
def get_lineno(self) -> int: ...
def changed(self) -> None: ...
def remove(self) -> Optional[int]: ...
@property
def next_sibling(self) -> Optional[_NL]: ...
@property
def prev_sibling(self) -> Optional[_NL]: ...
def leaves(self) -> Iterator[Leaf]: ...
def depth(self) -> int: ...
def get_suffix(self) -> Text: ...
if sys.version_info < (3,):
def get_prefix(self) -> Text: ...
def set_prefix(self, prefix: Text) -> None: ...
class Node(Base):
fixers_applied: List[Any]
def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ...
def set_child(self, i: int, child: _NL) -> None: ...
def insert_child(self, i: int, child: _NL) -> None: ...
def append_child(self, child: _NL) -> None: ...
class Leaf(Base):
lineno: int
column: int
value: Text
fixers_applied: List[Any]
def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ...
def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ...
class BasePattern:
type: int
content: Optional[Text]
name: Optional[Text]
def optimize(self) -> BasePattern: ... # sic, subclasses are free to optimize themselves into different patterns
def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ...
def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ...
def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...
class LeafPattern(BasePattern):
def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
class NodePattern(BasePattern):
wildcards: bool
def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
class WildcardPattern(BasePattern):
min: int
max: int
def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ...
class NegatedPattern(BasePattern):
def __init__(self, content: Optional[Text] = ...) -> None: ...
def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...

31
mypy.ini Normal file
View File

@ -0,0 +1,31 @@
[mypy]
# Specify the target platform details in config, so your developers are
# free to run mypy on Windows, Linux, or macOS and get consistent
# results.
python_version=3.6
platform=linux
# flake8-mypy expects the two following for sensible formatting
show_column_numbers=True
# show error messages from unrelated files
follow_imports=normal
# suppress errors about unsatisfied imports
ignore_missing_imports=True
# be strict
disallow_untyped_calls=True
warn_return_any=True
strict_optional=True
warn_no_return=True
warn_redundant_casts=True
warn_unused_ignores=True
# The following are off by default. Flip them on if you feel
# adventurous.
disallow_untyped_defs=True
check_untyped_defs=True
# No incremental mode
cache_dir=/dev/null

67
setup.py Normal file
View File

@ -0,0 +1,67 @@
# Copyright (C) 2018 Łukasz Langa
import ast
import re
from setuptools import setup
import sys
assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+"
from pathlib import Path # noqa E402
CURRENT_DIR = Path(__file__).parent
def get_long_description():
readme_md = CURRENT_DIR / 'README.md'
try:
import pypandoc
return pypandoc.convert_file(str(readme_md), 'rst')
except (IOError, ImportError):
print()
print(
'\x1b[31m\x1b[1mwarning:\x1b[0m\x1b[31m pandoc not found, '
'long description will be ugly (PyPI does not support .md).'
'\x1b[0m'
)
print()
with open(readme_md, encoding='utf8') as ld_file:
return ld_file.read()
def get_version():
black_py = CURRENT_DIR / 'black.py'
_version_re = re.compile(r'__version__\s+=\s+(?P<version>.*)')
with open(black_py, 'r', encoding='utf8') as f:
version = _version_re.search(f.read()).group('version')
return str(ast.literal_eval(version))
setup(
name='black',
version=get_version(),
description="The uncompromising code formatter.",
long_description=get_long_description(),
keywords='automation formatter yapf autopep8 pyfmt gofmt rustfmt',
author='Łukasz Langa',
author_email='lukasz@langa.pl',
url='https://github.com/ambv/black',
license='MIT',
py_modules=['black'],
packages=['blib2to3', 'blib2to3.pgen2'],
python_requires=">=3.6",
zip_safe=False,
install_requires=['click', 'attrs'],
test_suite='tests.test_black',
classifiers=[
'Development Status :: 3 - Alpha',
'Environment :: Console',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3.6',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Software Development :: Quality Assurance',
],
entry_points={'console_scripts': ['black=black:main']},
)

8
tests/.flake8 Normal file
View File

@ -0,0 +1,8 @@
# Like the base Black .flake8 but also ignores F811 which is used deliberately
# in test files.
[flake8]
ignore = E266, E501, F811
max-line-length = 80
max-complexity = 12
select = B,C,E,F,W,T4,B9

27
tests/cantfit.py Normal file
View File

@ -0,0 +1,27 @@
# long variable name
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 0
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 1 # with a comment
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = [
1, 2, 3
]
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function()
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function(
arg1, arg2, arg3
)
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function(
[1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
)
# long function name
normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying()
normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
arg1, arg2, arg3
)
normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
[1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
)
# long arguments
normal_name = normal_function_name(
"but with super long string arguments that on their own exceed the line limit so there's no way it can ever fit",
"eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs",
this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it=0,
)

60
tests/comments.py Normal file
View File

@ -0,0 +1,60 @@
#!/usr/bin/env python3
# Some license here.
#
# Has many lines. Many, many lines.
# Many, many, many lines.
"""Module docstring.
Possibly also many, many lines.
"""
import os.path
import sys
import a
from b.c import X # some noqa comment
try:
import fast
except ImportError:
import slow as fast
# Some comment before a function.
def function(default=None):
"""Docstring comes first.
Possibly many lines.
"""
# FIXME: Some comment about why this function is crap but still in production.
import inner_imports
if inner_imports.are_evil():
# Explains why we have this if.
# In great detail indeed.
x = X()
return x.method1() # type: ignore
# This return is also commented for some reason.
return default
# Explains why we use global state.
GLOBAL_STATE = {'a': a(1), 'b': a(2), 'c': a(3)}
# Another comment
@fast(really=True)
async def wat():
async with X.open_async() as x: # Some more comments
result = await x.method1()
# Comment after ending a block.
if result:
print('A OK', file=sys.stdout)
# Comment between things.
print()
# Some closing comments.
# Maybe Vim or Emacs directives for formatting.
# Who knows.

202
tests/comments2.py Normal file
View File

@ -0,0 +1,202 @@
# Please keep __all__ alphabetized within each category.
__all__ = [
# Super-special typing primitives.
'Any',
'Callable',
'ClassVar',
# ABCs (from collections.abc).
'AbstractSet', # collections.abc.Set.
'ByteString',
'Container',
# Concrete collection types.
'Counter',
'Deque',
'Dict',
'DefaultDict',
'List',
'Set',
'FrozenSet',
'NamedTuple', # Not really a type.
'Generator',
]
def inline_comments_in_brackets_ruin_everything():
if typedargslist:
parameters.children = [
parameters.children[0], # (1
body,
parameters.children[-1], # )1
]
else:
parameters.children = [
parameters.children[0], # (2 what if this was actually long
body,
parameters.children[-1], # )2
]
if (self._proc is not None and
# has the child process finished?
self._returncode is None and
# the child process has finished, but the
# transport hasn't been notified yet?
self._proc.poll() is None):
pass
short = [
# one
1,
# two
2]
call(arg1, arg2, """
short
""", arg3=True)
############################################################################
call2(
#short
arg1,
#but
arg2,
#multiline
"""
short
""",
# yup
arg3=True)
lcomp = [
element # yup
for element in collection # yup
if element is not None # right
]
lcomp2 = [
# hello
element
# yup
for element in collection
# right
if element is not None
]
lcomp3 = [
# This one is actually too long to fit in a single line.
element.split('\n', 1)[0]
# yup
for element in collection.select_elements()
# right
if element is not None
]
return Node(
syms.simple_stmt,
[
Node(statement, result),
Leaf(token.NEWLINE, '\n'), # FIXME: \r\n?
],
)
instruction()
# END COMMENTS
# MORE END COMMENTS
# output
# Please keep __all__ alphabetized within each category.
__all__ = [
# Super-special typing primitives.
'Any',
'Callable',
'ClassVar',
# ABCs (from collections.abc).
'AbstractSet', # collections.abc.Set.
'ByteString',
'Container',
# Concrete collection types.
'Counter',
'Deque',
'Dict',
'DefaultDict',
'List',
'Set',
'FrozenSet',
'NamedTuple', # Not really a type.
'Generator',
]
def inline_comments_in_brackets_ruin_everything():
if typedargslist:
parameters.children = [
parameters.children[0], body, parameters.children[-1] # (1 # )1
]
else:
parameters.children = [
parameters.children[0], # (2 what if this was actually long
body,
parameters.children[-1], # )2
]
if (
self._proc is not None and
# has the child process finished?
self._returncode is None and
# the child process has finished, but the
# transport hasn't been notified yet?
self._proc.poll() is None
):
pass
short = [
# one
1,
# two
2,
]
call(
arg1,
arg2,
"""
short
""",
arg3=True,
)
############################################################################
call2(
# short
arg1,
# but
arg2,
# multiline
"""
short
""",
# yup
arg3=True,
)
lcomp = [
element for element in collection if element is not None # yup # yup # right
]
lcomp2 = [
# hello
element
# yup
for element in collection
# right
if element is not None
]
lcomp3 = [
# This one is actually too long to fit in a single line.
element.split('\n', 1)[0]
# yup
for element in collection.select_elements()
# right
if element is not None
]
return Node(
syms.simple_stmt,
[Node(statement, result), Leaf(token.NEWLINE, '\n')], # FIXME: \r\n?
)
instruction()
# END COMMENTS
# MORE END COMMENTS

21
tests/composition.py Normal file
View File

@ -0,0 +1,21 @@
class C:
def test(self) -> None:
with patch("black.out", print):
self.assertEqual(
unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.'
)
self.assertEqual(
unstyle(str(report)),
'1 file reformatted, 1 file left unchanged, 1 file failed to reformat.',
)
self.assertEqual(
unstyle(str(report)),
'2 files reformatted, 1 file left unchanged, '
'1 file failed to reformat.',
)
self.assertEqual(
unstyle(str(report)),
'2 files reformatted, 2 files left unchanged, '
'2 files failed to reformat.',
)

240
tests/expression.py Normal file
View File

@ -0,0 +1,240 @@
...
'some_string'
b'\\xa3'
Name
None
True
False
1
1.0
1j
True or False
True or False or None
True and False
True and False and None
(Name1 and Name2) or Name3
Name1 and Name2 or Name3
Name1 or (Name2 and Name3)
Name1 or Name2 and Name3
(Name1 and Name2) or (Name3 and Name4)
Name1 and Name2 or Name3 and Name4
Name1 or (Name2 and Name3) or Name4
Name1 or Name2 and Name3 or Name4
v1 << 2
1 >> v2
1 % finished
1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8
((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8)
not great
~great
+value
-1
~int and not v1 ^ 123 + v2 | True
(~int) and (not ((v1 ^ (123 + v2)) | True))
lambda arg: None
lambda a=True: a
lambda a, b, c=True: a
lambda a, b, c=True, *, d=(1 << v2), e='str': a
lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b
1 if True else 2
str or None if True else str or bytes or None
(str or None) if True else (str or bytes or None)
str or None if (1 if True else 2) else str or bytes or None
(str or None) if (1 if True else 2) else (str or bytes or None)
{'2.7': dead, '3.7': (long_live or die_hard)}
{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}}
{**a, **b, **c}
{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')}
({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None
()
(1,)
(1, 2)
(1, 2, 3)
[]
[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)]
{i for i in (1, 2, 3)}
{(i ** 2) for i in (1, 2, 3)}
{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))}
{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)}
[i for i in (1, 2, 3)]
[(i ** 2) for i in (1, 2, 3)]
[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))]
[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)]
{i: 0 for i in (1, 2, 3)}
{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))}
Python3 > Python2 > COBOL
Life is Life
call()
call(arg)
call(kwarg='hey')
call(arg, kwarg='hey')
call(arg, another, kwarg='hey', **kwargs)
lukasz.langa.pl
call.me(maybe)
1 .real
1.0 .real
....__class__
list[str]
dict[str, int]
tuple[str, ...]
tuple[str, int, float, dict[str, int]]
slice[0]
slice[0:1]
slice[0:1:2]
slice[:]
slice[:-1]
slice[1:]
slice[::-1]
(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None)
f'f-string without formatted values is just a string'
f'{{NOT a formatted value}}'
f'some f-string with {a} {few():.2f} {formatted.values!r}'
f"{f'{nested} inner'} outer"
f'space between opening braces: { {a for a in (1, 2, 3)}}'
{'2.7': dead, '3.7': long_live or die_hard}
{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'}
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C]
(SomeName)
SomeName
(Good, Bad, Ugly)
(i for i in (1, 2, 3))
((i ** 2) for i in (1, 2, 3))
((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c')))
(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3))
(*starred)
a = (1,)
b = 1,
c = 1
d = (1,) + a + (2,)
def gen():
yield from outside_of_generator
a = (yield)
async def f():
await some.complicated[0].call(with_args=(True or (1 is not 1)))
# output
...
'some_string'
b'\\xa3'
Name
None
True
False
1
1.0
1j
True or False
True or False or None
True and False
True and False and None
(Name1 and Name2) or Name3
Name1 and Name2 or Name3
Name1 or (Name2 and Name3)
Name1 or Name2 and Name3
(Name1 and Name2) or (Name3 and Name4)
Name1 and Name2 or Name3 and Name4
Name1 or (Name2 and Name3) or Name4
Name1 or Name2 and Name3 or Name4
v1 << 2
1 >> v2
1 % finished
1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8
((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8)
not great
~great
+value
-1
~int and not v1 ^ 123 + v2 | True
(~int) and (not ((v1 ^ (123 + v2)) | True))
lambda arg: None
lambda a=True: a
lambda a, b, c=True: a
lambda a, b, c=True, *, d=(1 << v2), e='str': a
lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b
1 if True else 2
str or None if True else str or bytes or None
(str or None) if True else (str or bytes or None)
str or None if (1 if True else 2) else str or bytes or None
(str or None) if (1 if True else 2) else (str or bytes or None)
{'2.7': dead, '3.7': (long_live or die_hard)}
{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}}
{**a, **b, **c}
{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')}
({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None
()
(1,)
(1, 2)
(1, 2, 3)
[]
[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)]
{i for i in (1, 2, 3)}
{(i ** 2) for i in (1, 2, 3)}
{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))}
{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)}
[i for i in (1, 2, 3)]
[(i ** 2) for i in (1, 2, 3)]
[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))]
[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)]
{i: 0 for i in (1, 2, 3)}
{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))}
Python3 > Python2 > COBOL
Life is Life
call()
call(arg)
call(kwarg='hey')
call(arg, kwarg='hey')
call(arg, another, kwarg='hey', **kwargs)
lukasz.langa.pl
call.me(maybe)
1 .real
1.0 .real
....__class__
list[str]
dict[str, int]
tuple[str, ...]
tuple[str, int, float, dict[str, int]]
slice[0]
slice[0:1]
slice[0:1:2]
slice[:]
slice[:-1]
slice[1:]
slice[::-1]
(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None)
f'f-string without formatted values is just a string'
f'{{NOT a formatted value}}'
f'some f-string with {a} {few():.2f} {formatted.values!r}'
f"{f'{nested} inner'} outer"
f'space between opening braces: { {a for a in (1, 2, 3)}}'
{'2.7': dead, '3.7': long_live or die_hard}
{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'}
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C]
(SomeName)
SomeName
(Good, Bad, Ugly)
(i for i in (1, 2, 3))
((i ** 2) for i in (1, 2, 3))
((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c')))
(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3))
(*starred)
a = (1,)
b = 1,
c = 1
d = (1,) + a + (2,)
def gen():
yield from outside_of_generator
a = (yield)
async def f():
await some.complicated[0].call(with_args=(True or (1 is not 1)))

137
tests/function.py Normal file
View File

@ -0,0 +1,137 @@
#!/usr/bin/env python3
import asyncio
import sys
from third_party import X, Y, Z
from library import some_connection, \
some_decorator
def func_no_args():
a; b; c
if True: raise RuntimeError
if False: ...
for i in range(10):
print(i)
continue
return None
async def coroutine(arg):
"Single-line docstring. Multiline is harder to reformat."
async with some_connection() as conn:
await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2)
await asyncio.sleep(1)
@asyncio.coroutine
@some_decorator(
with_args=True,
many_args=[1,2,3]
)
def function_signature_stress_test(number:int,no_annotation=None,text:str="default",* ,debug:bool=False,**kwargs) -> str:
return text[number:-1]
def long_lines():
if True:
typedargslist.extend(
gen_annotated_params(ast_args.kwonlyargs, ast_args.kw_defaults, parameters, implicit_default=True)
)
_type_comment_re = re.compile(
r"""
^
[\t ]*
\#[ ]type:[ ]*
(?P<type>
[^#\t\n]+?
)
(?<!ignore) # note: this will force the non-greedy + in <type> to match
# a trailing space which is why we need the silliness below
(?<!ignore[ ]{1})(?<!ignore[ ]{2})(?<!ignore[ ]{3})(?<!ignore[ ]{4})
(?<!ignore[ ]{5})(?<!ignore[ ]{6})(?<!ignore[ ]{7})(?<!ignore[ ]{8})
(?<!ignore[ ]{9})(?<!ignore[ ]{10})
[\t ]*
(?P<nl>
(?:\#[^\n]*)?
\n?
)
$
""", re.MULTILINE | re.VERBOSE
)
# output
#!/usr/bin/env python3
import asyncio
import sys
from third_party import X, Y, Z
from library import some_connection, some_decorator
def func_no_args():
a
b
c
if True:
raise RuntimeError
if False:
...
for i in range(10):
print(i)
continue
return None
async def coroutine(arg):
"Single-line docstring. Multiline is harder to reformat."
async with some_connection() as conn:
await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2)
await asyncio.sleep(1)
@asyncio.coroutine
@some_decorator(with_args=True, many_args=[1, 2, 3])
def function_signature_stress_test(
number: int,
no_annotation=None,
text: str = "default",
*,
debug: bool = False,
**kwargs,
) -> str:
return text[number:-1]
def long_lines():
if True:
typedargslist.extend(
gen_annotated_params(
ast_args.kwonlyargs,
ast_args.kw_defaults,
parameters,
implicit_default=True,
)
)
_type_comment_re = re.compile(
r"""
^
[\t ]*
\#[ ]type:[ ]*
(?P<type>
[^#\t\n]+?
)
(?<!ignore) # note: this will force the non-greedy + in <type> to match
# a trailing space which is why we need the silliness below
(?<!ignore[ ]{1})(?<!ignore[ ]{2})(?<!ignore[ ]{3})(?<!ignore[ ]{4})
(?<!ignore[ ]{5})(?<!ignore[ ]{6})(?<!ignore[ ]{7})(?<!ignore[ ]{8})
(?<!ignore[ ]{9})(?<!ignore[ ]{10})
[\t ]*
(?P<nl>
(?:\#[^\n]*)?
\n?
)
$
""",
re.MULTILINE | re.VERBOSE,
)

77
tests/import_spacing.py Normal file
View File

@ -0,0 +1,77 @@
"""The asyncio package, tracking PEP 3156."""
# flake8: noqa
import sys
# This relies on each of the submodules having an __all__ variable.
from .base_events import *
from .coroutines import *
from .events import * # comment here
from .futures import *
from .locks import * # comment here
from .protocols import *
from .runners import * # comment here
from .queues import *
from .streams import *
from .subprocess import *
from .tasks import *
from .transports import *
__all__ = (
base_events.__all__ +
coroutines.__all__ +
events.__all__ +
futures.__all__ +
locks.__all__ +
protocols.__all__ +
runners.__all__ +
queues.__all__ +
streams.__all__ +
subprocess.__all__ +
tasks.__all__ +
transports.__all__
)
# output
"""The asyncio package, tracking PEP 3156."""
# flake8: noqa
import sys
# This relies on each of the submodules having an __all__ variable.
from .base_events import *
from .coroutines import *
from .events import * # comment here
from .futures import *
from .locks import * # comment here
from .protocols import *
from .runners import * # comment here
from .queues import *
from .streams import *
from .subprocess import *
from .tasks import *
from .transports import *
__all__ = (
base_events.__all__ +
coroutines.__all__ +
events.__all__ +
futures.__all__ +
locks.__all__ +
protocols.__all__ +
runners.__all__ +
queues.__all__ +
streams.__all__ +
subprocess.__all__ +
tasks.__all__ +
transports.__all__
)

220
tests/test_black.py Normal file
View File

@ -0,0 +1,220 @@
#!/usr/bin/env python3
from functools import partial
from pathlib import Path
from typing import List, Tuple
import unittest
from unittest.mock import patch
from click import unstyle
import black
ll = 88
ff = partial(black.format_file, line_length=ll, fast=True)
fs = partial(black.format_str, line_length=ll)
THIS_FILE = Path(__file__)
THIS_DIR = THIS_FILE.parent
def dump_to_stderr(*output: str) -> str:
return '\n' + '\n'.join(output) + '\n'
def read_data(name: str) -> Tuple[str, str]:
"""read_data('test_name') -> 'input', 'output'"""
if not name.endswith('.py'):
name += '.py'
_input: List[str] = []
_output: List[str] = []
with open(THIS_DIR / name, 'r', encoding='utf8') as test:
lines = test.readlines()
result = _input
for line in lines:
if line.rstrip() == '# output':
result = _output
continue
result.append(line)
if _input and not _output:
# If there's no output marker, treat the entire file as already pre-formatted.
_output = _input[:]
return ''.join(_input).strip() + '\n', ''.join(_output).strip() + '\n'
class BlackTestCase(unittest.TestCase):
maxDiff = None
def assertFormatEqual(self, expected: str, actual: str) -> None:
if actual != expected:
black.out('Expected tree:', fg='green')
try:
exp_node = black.lib2to3_parse(expected)
bdv = black.DebugVisitor()
list(bdv.visit(exp_node))
except Exception as ve:
black.err(str(ve))
black.out('Actual tree:', fg='red')
try:
exp_node = black.lib2to3_parse(actual)
bdv = black.DebugVisitor()
list(bdv.visit(exp_node))
except Exception as ve:
black.err(str(ve))
self.assertEqual(expected, actual)
@patch("black.dump_to_file", dump_to_stderr)
def test_self(self) -> None:
source, expected = read_data('test_black')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
with self.assertRaises(black.NothingChanged):
ff(THIS_FILE)
@patch("black.dump_to_file", dump_to_stderr)
def test_black(self) -> None:
source, expected = read_data('../black')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
with self.assertRaises(black.NothingChanged):
ff(THIS_FILE)
@patch("black.dump_to_file", dump_to_stderr)
def test_setup(self) -> None:
source, expected = read_data('../setup')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
with self.assertRaises(black.NothingChanged):
ff(THIS_FILE)
@patch("black.dump_to_file", dump_to_stderr)
def test_function(self) -> None:
source, expected = read_data('function')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_expression(self) -> None:
source, expected = read_data('expression')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_comments(self) -> None:
source, expected = read_data('comments')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_comments2(self) -> None:
source, expected = read_data('comments2')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_cantfit(self) -> None:
source, expected = read_data('cantfit')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_import_spacing(self) -> None:
source, expected = read_data('import_spacing')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
@patch("black.dump_to_file", dump_to_stderr)
def test_composition(self) -> None:
source, expected = read_data('composition')
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, line_length=ll)
def test_report(self) -> None:
report = black.Report()
out_lines = []
err_lines = []
def out(msg: str, **kwargs):
out_lines.append(msg)
def err(msg: str, **kwargs):
err_lines.append(msg)
with patch("black.out", out), patch("black.err", err):
report.done(Path('f1'), changed=True)
self.assertEqual(len(out_lines), 1)
self.assertEqual(len(err_lines), 0)
self.assertEqual(out_lines[-1], 'reformatted f1')
self.assertEqual(unstyle(str(report)), '1 file reformatted.')
self.assertEqual(report.return_code, 0)
report.failed(Path('e1'), 'boom')
self.assertEqual(len(out_lines), 1)
self.assertEqual(len(err_lines), 1)
self.assertEqual(err_lines[-1], 'error: cannot format e1: boom')
self.assertEqual(
unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.'
)
self.assertEqual(report.return_code, 1)
report.done(Path('f2'), changed=False)
self.assertEqual(len(out_lines), 2)
self.assertEqual(len(err_lines), 1)
self.assertEqual(out_lines[-1], 'f2 already well formatted, good job.')
self.assertEqual(
unstyle(str(report)),
'1 file reformatted, 1 file left unchanged, '
'1 file failed to reformat.',
)
self.assertEqual(report.return_code, 1)
report.done(Path('f3'), changed=True)
self.assertEqual(len(out_lines), 3)
self.assertEqual(len(err_lines), 1)
self.assertEqual(out_lines[-1], 'reformatted f3')
self.assertEqual(
unstyle(str(report)),
'2 files reformatted, 1 file left unchanged, '
'1 file failed to reformat.',
)
self.assertEqual(report.return_code, 1)
report.failed(Path('e2'), 'boom')
self.assertEqual(len(out_lines), 3)
self.assertEqual(len(err_lines), 2)
self.assertEqual(err_lines[-1], 'error: cannot format e2: boom')
self.assertEqual(
unstyle(str(report)),
'2 files reformatted, 1 file left unchanged, '
'2 files failed to reformat.',
)
self.assertEqual(report.return_code, 1)
report.done(Path('f4'), changed=False)
self.assertEqual(len(out_lines), 4)
self.assertEqual(len(err_lines), 2)
self.assertEqual(out_lines[-1], 'f4 already well formatted, good job.')
self.assertEqual(
unstyle(str(report)),
'2 files reformatted, 2 files left unchanged, '
'2 files failed to reformat.',
)
self.assertEqual(report.return_code, 1)
if __name__ == '__main__':
unittest.main()