From a69d427e03f32a3ab1b8a7c6897c527ca480116e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 13:19:50 +0000 Subject: [PATCH] Prefer final_url over original URL for archive format detection in download paths --- src/specify_cli/__init__.py | 21 ++++++++++++++------- src/specify_cli/extensions.py | 16 +++++++++------- src/specify_cli/presets.py | 14 ++++++++------ 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index a2c7d270f..4a4c9ec59 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2632,14 +2632,17 @@ def preset_add( from .extensions import detect_archive_format as _det_fmt with tempfile.TemporaryDirectory() as tmpdir: - archive_fmt = _det_fmt(from_url) final_url = from_url + archive_fmt = "" try: with urllib.request.urlopen(from_url, timeout=60) as response: final_url = response.geturl() + content_type = response.headers.get("Content-Type", "") + # Prefer the post-redirect URL for format detection; + # fall back to the original URL only as a last hint. + archive_fmt = _det_fmt(final_url, content_type) if not archive_fmt: - content_type = response.headers.get("Content-Type", "") - archive_fmt = _det_fmt(final_url, content_type) + archive_fmt = _det_fmt(from_url) archive_data = response.read() except urllib.error.URLError as e: console.print(f"[red]Error:[/red] Failed to download: {e}") @@ -3657,18 +3660,22 @@ def extension_add( console.print("Only install extensions from sources you trust.\n") console.print(f"Downloading from {from_url}...") - # Download archive to temp location; detect format from URL or Content-Type. + # Download archive to temp location; detect format from the + # post-redirect URL (with Content-Type fallback), only using + # the original URL as a last hint. download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" download_dir.mkdir(parents=True, exist_ok=True) - archive_fmt = detect_archive_format(from_url) + final_url = from_url + archive_fmt = "" archive_path = None try: with urllib.request.urlopen(from_url, timeout=60) as response: final_url = response.geturl() + content_type = response.headers.get("Content-Type", "") + archive_fmt = detect_archive_format(final_url, content_type) if not archive_fmt: - content_type = response.headers.get("Content-Type", "") - archive_fmt = detect_archive_format(final_url, content_type) + archive_fmt = detect_archive_format(from_url) archive_data = response.read() # Re-validate scheme after any redirect (scheme-downgrade guard). diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 0247703d2..489d19dea 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -2156,17 +2156,19 @@ class ExtensionCatalog: version = ext_info.get("version", "unknown") - # Detect archive format from URL; resolve via Content-Type when needed. - # `final_url` may differ from `download_url` if the server redirects. - archive_fmt = detect_archive_format(download_url) - - # Download the archive + # Download the archive. Determine the archive format from the + # post-redirect URL first (with Content-Type fallback); only use the + # original `download_url` as a last hint if the final URL gives no + # signal. + final_url = download_url + archive_fmt = "" try: with self._open_url(download_url, timeout=60) as response: final_url = response.geturl() + content_type = response.headers.get("Content-Type", "") + archive_fmt = detect_archive_format(final_url, content_type) if not archive_fmt: - content_type = response.headers.get("Content-Type", "") - archive_fmt = detect_archive_format(final_url, content_type) + archive_fmt = detect_archive_format(download_url) archive_data = response.read() except urllib.error.URLError as e: diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 37008c62f..12ab9dca0 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -2313,16 +2313,18 @@ class PresetCatalog: version = pack_info.get("version", "unknown") - # Detect archive format from URL; resolve via Content-Type when needed. - # `final_url` may differ from `download_url` if the server redirects. - archive_fmt = detect_archive_format(download_url) - + # Determine the archive format from the post-redirect URL first + # (with Content-Type fallback); only use the original `download_url` + # as a last hint if the final URL gives no signal. + final_url = download_url + archive_fmt = "" try: with self._open_url(download_url, timeout=60) as response: final_url = response.geturl() + content_type = response.headers.get("Content-Type", "") + archive_fmt = detect_archive_format(final_url, content_type) if not archive_fmt: - content_type = response.headers.get("Content-Type", "") - archive_fmt = detect_archive_format(final_url, content_type) + archive_fmt = detect_archive_format(download_url) archive_data = response.read() except urllib.error.URLError as e: