"""A JupyterLite addon for resolving remote ``pyodide-lock.json``."""
# Copyright (c) jupyterlite-pyodide-lock contributors.
# Distributed under the terms of the BSD-3-Clause License.
from __future__ import annotations
import json
import re
import urllib.parse
from copy import deepcopy
from hashlib import sha256
from typing import TYPE_CHECKING, Any, ClassVar
from doit.tools import config_changed
from jupyterlite_core.constants import JSON_FMT, JUPYTERLITE_JSON, UTF8
from jupyterlite_core.trait_types import TypedTuple
from jupyterlite_pyodide_kernel.constants import PYODIDE
from packaging.utils import canonicalize_name
from traitlets import Bool, Unicode, default
from jupyterlite_pyodide_lock import __version__
from jupyterlite_pyodide_lock.addons._base import BaseAddon
from jupyterlite_pyodide_lock.constants import (
PYODIDE_LOCK_OFFLINE,
PYODIDE_LOCK_STEM,
RE_REMOTE_URL,
)
if TYPE_CHECKING:
from logging import Logger
from pathlib import Path
from jupyterlite_core.manager import LiteManager
from jupyterlite_pyodide_lock.addons._base import TTaskGenerator
[docs]
class PyodideLockOfflineAddon(BaseAddon):
"""Rewrite ``pyodide-lock.json`` with locally-downloaded packages."""
#: advertise JupyterLite lifecycle hooks
__all__: ClassVar = ["status", "post_build"]
log: Logger
includes: tuple[str] = TypedTuple(
Unicode(),
help="regular expressions for package names to download for offline usage",
).tag(config=True)
extra_includes: tuple[str] = TypedTuple(
Unicode(),
help="more regular expressions for package names to download for offline usage",
).tag(config=True)
excludes: tuple[str] = TypedTuple(
Unicode(),
help="regular expressions to exclude from downloading",
).tag(config=True)
extra_excludes: tuple[str] = TypedTuple(
Unicode(), help="more regular expressions to exclude from downloading"
).tag(config=True)
prune: bool = Bool(
default_value=False, help="prune packages not requested to be offline"
).tag(config=True) # type: ignore[assignment]
@default("excludes")
def _default_excludes(self) -> tuple[str, ...]:
"""Provide default patterns of package names to ignore."""
return (".*-tests$",)
@default("includes")
def _default_includes(self) -> tuple[str, ...]:
"""Provide default patterns of package names to always include.
The list of default packages are buried inside `jupyterlite-pyodide-kernel`:
* ``packages/pyodide-kernel/src/worker.ts:initKernel``
And can be extended by users:
* ``packages/pyodide-kernel/src/tokens.ts:loadPyodideOptions``
"""
return (
"^comm$",
"^ipykernel*",
"^micropip$",
"^piplite$",
"^pyodide-kernel$",
"^sqlite3$",
"^ssl$",
)
# properties
@property
def offline_lockfile(self) -> Path:
"""A convenience property for a derived offline ``pyodide-lock`` output."""
return self.lockfile.parent / PYODIDE_LOCK_OFFLINE
@property
def all_includes(self) -> list[str]:
"""Get all inclusion patterns."""
return sorted({
*self.includes,
*self.extra_includes,
*self.pyodide_lock_addon.extra_preload_packages,
})
@property
def all_excludes(self) -> list[str]:
"""Get all exclusion patterns."""
return sorted({
*self.excludes,
*self.extra_excludes,
})
# JupyterLite API methods
[docs]
def status(self, manager: LiteManager) -> TTaskGenerator:
"""Report on the status of offline ``pyodide-lock``."""
def _status() -> None:
from textwrap import indent
lines = [
f"""enabled: {self.enabled}""",
f"""includes: {" ".join(self.all_includes)}""",
f"""excludes: {" ".join(self.all_excludes)}""",
f"""version: {__version__}""",
]
print(indent("\n".join(lines), " "), flush=True)
yield self.task(name="offline", actions=[_status])
[docs]
def post_build(self, manager: LiteManager) -> TTaskGenerator:
"""Collect all the packages and generate a ``pyodide-lock.json`` file."""
if not self.enabled: # pragma: no cover
return
config_str = f"""
includes: {self.all_includes}
excludes: {self.all_excludes}
prune: {self.prune}
"""
yield dict(
name="offline",
actions=[self.resolve_offline],
file_dep=[self.lockfile],
targets=[self.offline_lockfile],
uptodate=[config_changed(config_str)],
)
jupyterlite_json = self.output_dir / JUPYTERLITE_JSON
yield self.task(
name="patch",
actions=[(self.patch_config, [jupyterlite_json, self.offline_lockfile])],
file_dep=[jupyterlite_json, self.lockfile, self.offline_lockfile],
uptodate=[config_changed(config_str)],
)
# offline logic
[docs]
def resolve_offline(self) -> bool:
"""Download and rewrite lockfile with selected packages and dependencies."""
lock_data = json.loads(self.lockfile.read_text(**UTF8))
raw_packages: dict[str, dict[str, Any]] = lock_data["packages"]
leaf_included, dep_included = self.get_included_names(raw_packages)
new_packages = self.get_pruned_packges(
raw_packages, leaf_included, dep_included
)
lock_data["packages"] = new_packages
out_dir = self.lockfile.parent
stem = f"../../static/{PYODIDE_LOCK_STEM}"
for pkg_name in sorted({*leaf_included, *dep_included}):
self.resolve_one_offline(pkg_name, out_dir, stem, new_packages)
self.offline_lockfile.write_text(json.dumps(lock_data, **JSON_FMT))
return True
[docs]
def resolve_one_offline(
self,
pkg_name: str,
out_dir: Path,
stem: str,
packages: dict[str, dict[str, Any]],
) -> None:
"""Rewrite a single package's info (if needed)."""
pkg_info = packages[pkg_name]
if not re.match(RE_REMOTE_URL, pkg_info["file_name"]):
self.log.debug("[offline] [%s] already available locally %s")
return
url = urllib.parse.urlparse(pkg_info["file_name"])
whl_name = url.path.split("/")[-1]
cache_whl = self.package_cache / whl_name
pyodide_whl = self.pyodide_addon.output_pyodide / whl_name
dest = out_dir / whl_name
dest_url: str | None = None
if not dest.exists() and pyodide_whl.exists():
dest = pyodide_whl
dest_url = f"../../static/{PYODIDE}/{whl_name}"
if not dest.exists():
if not cache_whl.exists(): # pragma: no cover
self.fetch_one(pkg_info["file_name"], cache_whl)
self.copy_one(cache_whl, dest)
pkg_info["file_name"] = dest_url or f"""{stem}/{whl_name}"""
old_sha256 = pkg_info["sha256"]
whl_sha256 = sha256(dest.read_bytes()).hexdigest()
if old_sha256 != whl_sha256:
self.log.warning(
"[offline] fixing sha256 for %s: lock:%s observed:%s wheel:%s",
pkg_name,
old_sha256,
whl_sha256,
whl_name,
)
pkg_info["sha256"] = whl_sha256
[docs]
def get_pruned_packges(
self,
raw_packages: dict[str, dict[str, Any]],
leaf_included: set[str],
dep_included: set[str],
) -> dict[str, dict[str, Any]]:
"""Provide a copy of packages, potentially with pruning."""
any_included = {*leaf_included, *dep_included}
raw_packages = deepcopy(raw_packages)
if self.prune:
new_packages = {
pkg_name: pkg_info
for pkg_name, pkg_info in raw_packages.items()
if pkg_name in any_included
}
else:
new_packages = raw_packages
pruned_names = sorted(p for p in raw_packages if p not in new_packages)
self.log.warning(
"[offline]\t%s packages pruned: %s", len(pruned_names), pruned_names
)
self.log.warning(
"[offline]\t%s packages remain: %s", len(new_packages), sorted(new_packages)
)
return new_packages
[docs]
def get_included_names(
self, raw_packages: dict[str, dict[str, Any]]
) -> tuple[set[str], set[str]]:
"""Generate the lock."""
includes = self.all_includes
excludes = self.all_excludes
leaf_included: set[str] = set()
check_deps: set[str] = set()
# get leaf deps on first pass
for pkg_name, pkg_info in raw_packages.items():
if self.is_included(pkg_name, includes=includes, excludes=excludes):
leaf_included = {pkg_name, *leaf_included}
new_deps = {*map(canonicalize_name, pkg_info["depends"])}
if new_deps:
self.log.debug(
"[offline] leaf %s depends on: %s", pkg_name, new_deps
)
check_deps = {*check_deps, *new_deps}
dep_included: set[str] = set()
while check_deps:
pkg_name = check_deps.pop()
if pkg_name in leaf_included or pkg_name in dep_included:
continue
dep_included = {*dep_included, pkg_name}
new_deps = {*map(canonicalize_name, raw_packages[pkg_name]["depends"])}
if new_deps:
self.log.debug("[offline] dep %s depends on: %s", pkg_name, new_deps)
check_deps = {*check_deps, *new_deps}
return leaf_included, dep_included
[docs]
def is_included(
self,
pkg_name: str,
includes: list[str],
excludes: list[str],
) -> bool:
"""Get the URL and filename if a package should be downloaded."""
skip = "[offline] excluding"
if any(re.match(exclude, pkg_name) for exclude in excludes):
self.log.debug("%s: excluded file %s [%s]", skip, pkg_name, excludes)
return False
if not any(re.match(include, pkg_name) for include in includes):
self.log.debug("%s: not included %s [%s]", skip, pkg_name, includes)
return False
return True