fix: validate URL scheme in build_github_request (#2449)

* fix: validate URL scheme in build_github_request

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* test: add missing hostname validation test for build_github_request

* fix: update docstring and fix import grouping per Copilot feedback

* fix: sort imports and simplify url validation in build_github_request

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Ayesha Aziz
2026-05-05 23:28:29 +05:00
committed by GitHub
parent 10f63c914d
commit 30e6fa9e32
2 changed files with 94 additions and 2 deletions

View File

@@ -8,8 +8,8 @@ third-party hosts on redirects.
import os
import urllib.request
from urllib.parse import urlparse
from typing import Dict
from urllib.parse import urlparse
# GitHub-owned hostnames that should receive the Authorization header.
# Includes codeload.github.com because GitHub archive URL downloads
@@ -30,12 +30,25 @@ def build_github_request(url: str) -> urllib.request.Request:
``Authorization: Bearer <value>`` header when the target hostname is one
of the known GitHub-owned domains. Non-GitHub URLs are returned as plain
requests so credentials are never leaked to third-party hosts.
Raises:
ValueError: If ``url`` is empty or whitespace-only.
ValueError: If ``url`` does not use the ``http`` or ``https`` scheme.
ValueError: If ``url`` does not include a hostname.
"""
headers: Dict[str, str] = {}
url = url.strip()
if not url:
raise ValueError("url must not be empty")
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
raise ValueError(f"url must start with http:// or https://, got: {url!r}")
if not parsed.hostname:
raise ValueError(f"url must include a hostname, got: {url!r}")
github_token = (os.environ.get("GITHUB_TOKEN") or "").strip()
gh_token = (os.environ.get("GH_TOKEN") or "").strip()
token = github_token or gh_token or None
hostname = (urlparse(url).hostname or "").lower()
hostname = parsed.hostname.lower()
if token and hostname in GITHUB_HOSTS:
headers["Authorization"] = f"Bearer {token}"
return urllib.request.Request(url, headers=headers)

79
tests/test_github_http.py Normal file
View File

@@ -0,0 +1,79 @@
"""Tests for GitHub-authenticated HTTP request helpers."""
import os
from unittest.mock import patch
import pytest
from specify_cli._github_http import (
build_github_request,
)
class TestBuildGitHubRequest:
"""Tests for build_github_request() URL validation and auth handling."""
# --- URL Validation Tests ---
def test_empty_url_raises_value_error(self):
"""build_github_request() must reject an empty string URL."""
with pytest.raises(ValueError, match="url must not be empty"):
build_github_request("")
def test_whitespace_url_raises_value_error(self):
"""build_github_request() must reject a whitespace-only URL."""
with pytest.raises(ValueError, match="url must not be empty"):
build_github_request(" ")
def test_non_http_url_raises_value_error(self):
"""build_github_request() must reject URLs without http/https scheme."""
with pytest.raises(ValueError, match="url must start with http"):
build_github_request("not-a-url")
def test_ftp_url_raises_value_error(self):
"""build_github_request() must reject ftp:// URLs."""
with pytest.raises(ValueError, match="url must start with http"):
build_github_request("ftp://github.com/file.zip")
# --- Valid URL Tests ---
def test_valid_https_url_returns_request(self):
"""build_github_request() must return a Request for a valid https URL."""
req = build_github_request("https://github.com/github/spec-kit")
assert req.full_url == "https://github.com/github/spec-kit"
def test_valid_http_url_returns_request(self):
"""build_github_request() must accept http:// URLs."""
req = build_github_request("http://example.com/file")
assert req.full_url == "http://example.com/file"
# --- Auth Header Tests ---
def test_github_token_added_for_github_host(self):
"""Authorization header is set when GITHUB_TOKEN is present."""
with patch.dict(os.environ, {"GITHUB_TOKEN": "test-token", "GH_TOKEN": ""}):
req = build_github_request("https://github.com/github/spec-kit")
assert req.get_header("Authorization") == "Bearer test-token"
def test_gh_token_used_as_fallback(self):
"""GH_TOKEN is used when GITHUB_TOKEN is absent."""
with patch.dict(os.environ, {"GITHUB_TOKEN": "", "GH_TOKEN": "fallback-token"}):
req = build_github_request("https://github.com/github/spec-kit")
assert req.get_header("Authorization") == "Bearer fallback-token"
def test_no_auth_header_for_non_github_host(self):
"""Authorization header must NOT be set for non-GitHub URLs."""
with patch.dict(os.environ, {"GITHUB_TOKEN": "test-token"}):
req = build_github_request("https://example.com/file")
assert req.get_header("Authorization") is None
def test_no_auth_header_when_no_token(self):
"""No Authorization header when no token is set in environment."""
with patch.dict(os.environ, {}, clear=True):
req = build_github_request("https://github.com/github/spec-kit")
assert req.get_header("Authorization") is None
def test_missing_hostname_raises_value_error(self):
"""build_github_request() must reject URLs with valid scheme but no hostname."""
with pytest.raises(ValueError, match="url must include a hostname"):
build_github_request("http://")