Files
github-spec-kit/src/specify_cli/_github_http.py
lselvar f512b8b0d1 fix: resolve GitHub release asset API URL for private repo preset and workflow downloads (#2855)
* fix: resolve GitHub release asset API URL for private repo preset and workflow downloads

- Add shared `resolve_github_release_asset_api_url` utility to `_github_http.py` for
  reuse across preset and workflow download paths
- Apply the same private-repo fix from PR #2792 (extensions) to:
  - `PresetCatalog.download_pack` — ZIP downloads via catalog `download_url`
  - `preset add --from <url>` — ZIP downloads from a direct URL
  - `workflow add <url>` — workflow YAML downloads from a direct URL
  - `workflow add <id>` (catalog) — workflow YAML downloads via catalog `url`
- For browser release URLs (`github.com/…/releases/download/…`), the asset is
  resolved via the GitHub REST API and downloaded with `Accept: application/octet-stream`
- Direct REST API asset URLs (`api.github.com/…/releases/assets/<id>`) are
  downloaded directly with `Accept: application/octet-stream`
- Auth is preserved end-to-end through the existing `open_url` infrastructure
- Update `test_download_pack_sends_auth_header` and add
  `test_download_pack_accepts_direct_github_rest_asset_url` to cover both paths

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: URL-encode tag in release API URL to handle special characters

Encode the tag as a path segment (using quote with safe='') when
building the releases/tags/<tag> API URL. This prevents malformed
URLs when tags contain reserved characters like '/' or '#'.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* test: add CLI-level tests for preset add --from GitHub release URL resolution

Adds regression tests covering:
- resolve_github_release_asset_api_url unit tests (passthrough, resolution,
  network error, URL encoding of special chars in tags)
- CLI-level 'preset add --from <github-release-url>' end-to-end flow
- CLI-level 'preset add --from <api-asset-url>' direct passthrough

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* refactor: deduplicate release URL resolution; fix test issues

- ExtensionCatalog._resolve_github_release_asset_api_url now delegates
  to the shared helper in _github_http.py (also gains URL-encoding fix)
- Remove unused 'io' import from test_github_http.py
- Remove duplicate 'provides' dict keys accidentally added to test_presets.py

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* fix: align resolver timeout with download timeout; add workflow CLI tests

- Pass timeout=30 to resolve_github_release_asset_api_url in both
  workflow add paths so worst-case latency matches the download timeout
- Add CLI-level regression tests for 'workflow add <url>' covering
  browser URL resolution and direct API asset URL passthrough

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* fix: remove unused urllib.request import; add catalog workflow test

- Remove unused 'import urllib.request' in preset add --from path
- Add CLI test for catalog-based 'workflow add <id>' with GitHub
  release URL resolution

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

* style: remove unused MagicMock imports from tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Manfred Riem <mnriem@users.noreply.github.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-05 10:41:40 -05:00

167 lines
6.4 KiB
Python

"""Shared GitHub-authenticated HTTP helpers.
Used by both ExtensionCatalog and PresetCatalog to attach
GITHUB_TOKEN / GH_TOKEN credentials to requests targeting
GitHub-hosted domains, while preventing token leakage to
third-party hosts on redirects.
"""
import os
import urllib.request
from typing import Callable, Dict, Optional
from urllib.parse import quote, unquote, urlparse
# GitHub-owned hostnames that should receive the Authorization header.
# Includes codeload.github.com because GitHub archive URL downloads
# (e.g. /archive/refs/tags/<tag>.zip) redirect there and require auth
# for private repositories.
GITHUB_HOSTS = frozenset({
"raw.githubusercontent.com",
"github.com",
"api.github.com",
"codeload.github.com",
})
def build_github_request(url: str) -> urllib.request.Request:
"""Build a urllib Request, adding a GitHub auth header when available.
Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an
``Authorization: Bearer <value>`` header when the target hostname is one
of the known GitHub-owned domains. Non-GitHub URLs are returned as plain
requests so credentials are never leaked to third-party hosts.
Raises:
ValueError: If ``url`` is empty or whitespace-only.
ValueError: If ``url`` does not use the ``http`` or ``https`` scheme.
ValueError: If ``url`` does not include a hostname.
"""
headers: Dict[str, str] = {}
url = url.strip()
if not url:
raise ValueError("url must not be empty")
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
raise ValueError(f"url must start with http:// or https://, got: {url!r}")
if not parsed.hostname:
raise ValueError(f"url must include a hostname, got: {url!r}")
github_token = (os.environ.get("GITHUB_TOKEN") or "").strip()
gh_token = (os.environ.get("GH_TOKEN") or "").strip()
token = github_token or gh_token or None
hostname = parsed.hostname.lower()
if token and hostname in GITHUB_HOSTS:
headers["Authorization"] = f"Bearer {token}"
return urllib.request.Request(url, headers=headers)
class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler):
"""Redirect handler that drops the Authorization header when leaving GitHub.
Prevents token leakage to CDNs or other third-party hosts that GitHub
may redirect to (e.g. S3 for release asset downloads, objects.githubusercontent.com).
Auth is preserved as long as the redirect target remains within GITHUB_HOSTS.
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
original_auth = req.get_header("Authorization")
new_req = super().redirect_request(req, fp, code, msg, headers, newurl)
if new_req is not None:
hostname = (urlparse(newurl).hostname or "").lower()
if hostname in GITHUB_HOSTS:
if original_auth:
new_req.add_unredirected_header("Authorization", original_auth)
else:
new_req.headers.pop("Authorization", None)
new_req.unredirected_hdrs.pop("Authorization", None)
return new_req
def resolve_github_release_asset_api_url(
download_url: str,
open_url_fn: Callable,
timeout: int = 60,
) -> Optional[str]:
"""Resolve a GitHub browser release URL to its REST API asset URL.
For private or SSO-protected repositories, browser release download
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
redirect to an HTML/SSO page instead of delivering the file. This
helper resolves such a URL to the matching GitHub REST API asset URL
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
then be downloaded with ``Accept: application/octet-stream`` and an
auth token to retrieve the actual file payload.
If *download_url* is already a REST API asset URL, it is returned
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
URLs return ``None``. If the API lookup fails (e.g. network error or
asset not found), ``None`` is returned so callers can fall back to the
original URL.
Args:
download_url: The URL to resolve.
open_url_fn: A callable compatible with
``specify_cli.authentication.http.open_url`` used to make the
authenticated API request.
timeout: Per-request timeout in seconds.
Returns:
The resolved REST API asset URL, or ``None`` if resolution is not
applicable or fails.
"""
import json
import urllib.error
parsed = urlparse(download_url)
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]
# Already a REST API asset URL — use it directly
if (
parsed.hostname == "api.github.com"
and len(parts) >= 6
and parts[:1] == ["repos"]
and parts[3:5] == ["releases", "assets"]
):
return download_url
# Only handle github.com browser release download URLs
if parsed.hostname != "github.com":
return None
# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
if len(parts) < 6 or parts[2:4] != ["releases", "download"]:
return None
owner, repo, tag = parts[0], parts[1], parts[4]
asset_name = "/".join(parts[5:])
encoded_tag = quote(tag, safe="")
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"
try:
with open_url_fn(release_url, timeout=timeout) as response:
release_data = json.loads(response.read())
except (urllib.error.URLError, json.JSONDecodeError):
return None
for asset in release_data.get("assets", []):
if asset.get("name") == asset_name and asset.get("url"):
return str(asset["url"])
return None
def open_github_url(url: str, timeout: int = 10):
"""Open a URL with GitHub auth, stripping the header on cross-host redirects.
When the request carries an Authorization header, a custom redirect
handler drops that header if the redirect target is not a GitHub-owned
domain, preventing token leakage to CDNs or other third-party hosts
that GitHub may redirect to (e.g. S3 for release asset downloads).
"""
req = build_github_request(url)
if not req.get_header("Authorization"):
return urllib.request.urlopen(req, timeout=timeout)
opener = urllib.request.build_opener(_StripAuthOnRedirect)
return opener.open(req, timeout=timeout)