Top PyPI Packages: Use 30-days data, 365 is no longer available (#2995)

This commit is contained in:
Hugo van Kemenade 2022-04-06 15:48:50 +03:00 committed by GitHub
parent 421383d560
commit 9b307405fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,10 +10,9 @@
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache, partial from functools import lru_cache, partial
from pathlib import Path from pathlib import Path
from typing import ( # type: ignore # typing can't see Literal from typing import (
Generator, Generator,
List, List,
Literal,
NamedTuple, NamedTuple,
Optional, Optional,
Tuple, Tuple,
@ -24,12 +23,11 @@
PYPI_INSTANCE = "https://pypi.org/pypi" PYPI_INSTANCE = "https://pypi.org/pypi"
PYPI_TOP_PACKAGES = ( PYPI_TOP_PACKAGES = (
"https://hugovk.github.io/top-pypi-packages/top-pypi-packages-{days}-days.json" "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json"
) )
INTERNAL_BLACK_REPO = f"{tempfile.gettempdir()}/__black" INTERNAL_BLACK_REPO = f"{tempfile.gettempdir()}/__black"
ArchiveKind = Union[tarfile.TarFile, zipfile.ZipFile] ArchiveKind = Union[tarfile.TarFile, zipfile.ZipFile]
Days = Union[Literal[30], Literal[365]]
subprocess.run = partial(subprocess.run, check=True) # type: ignore subprocess.run = partial(subprocess.run, check=True) # type: ignore
# https://github.com/python/mypy/issues/1484 # https://github.com/python/mypy/issues/1484
@ -64,8 +62,8 @@ def get_pypi_download_url(package: str, version: Optional[str]) -> str:
return cast(str, source["url"]) return cast(str, source["url"])
def get_top_packages(days: Days) -> List[str]: def get_top_packages() -> List[str]:
with urlopen(PYPI_TOP_PACKAGES.format(days=days)) as page: with urlopen(PYPI_TOP_PACKAGES) as page:
result = json.load(page) result = json.load(page)
return [package["project"] for package in result["rows"]] return [package["project"] for package in result["rows"]]
@ -128,13 +126,12 @@ def get_package(
def download_and_extract_top_packages( def download_and_extract_top_packages(
directory: Path, directory: Path,
days: Days = 365,
workers: int = 8, workers: int = 8,
limit: slice = DEFAULT_SLICE, limit: slice = DEFAULT_SLICE,
) -> Generator[Path, None, None]: ) -> Generator[Path, None, None]:
with ThreadPoolExecutor(max_workers=workers) as executor: with ThreadPoolExecutor(max_workers=workers) as executor:
bound_downloader = partial(get_package, version=None, directory=directory) bound_downloader = partial(get_package, version=None, directory=directory)
for package in executor.map(bound_downloader, get_top_packages(days)[limit]): for package in executor.map(bound_downloader, get_top_packages()[limit]):
if package is not None: if package is not None:
yield package yield package