Improve tarball extraction security and cleanup logic

Agent-Logs-Url: https://github.com/github/spec-kit/sessions/9fb9a8ea-0967-4baf-b95c-7101e423ff58

Co-authored-by: mnriem <15701806+mnriem@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-28 18:09:06 +00:00
committed by GitHub
parent b37f117cf9
commit b3a60f5fba
2 changed files with 15 additions and 7 deletions

View File

@@ -3643,6 +3643,7 @@ def extension_add(
download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads"
download_dir.mkdir(parents=True, exist_ok=True)
archive_fmt = _detect_archive_format(from_url)
archive_path = None
try:
with urllib.request.urlopen(from_url, timeout=60) as response:
@@ -3661,11 +3662,9 @@ def extension_add(
console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}")
raise typer.Exit(1)
finally:
# Clean up downloaded archive
for _suffix in (".zip", ".tar.gz"):
_p = download_dir / f"{extension}-url-download{_suffix}"
if _p.exists():
_p.unlink()
# Clean up the downloaded archive
if archive_path is not None and archive_path.exists():
archive_path.unlink()
else:
# Try bundled extensions first (shipped with spec-kit)

View File

@@ -155,7 +155,11 @@ def _safe_extract_tarball(
(devices, FIFOs, etc.) are rejected.
On Python 3.12 and later the ``"data"`` extraction filter is applied
for an additional layer of OS-level protection.
for an additional layer of OS-level protection. On earlier versions
the explicit member list (containing only pre-validated regular files
and directories) is passed to ``extractall()`` — since all symlinks are
already rejected in the validation phase, no archive-introduced symlink
can be followed during extraction.
Args:
archive_path: Path to the ``.tar.gz``/``.tgz`` archive.
@@ -169,6 +173,7 @@ def _safe_extract_tarball(
with tarfile.open(archive_path, "r:gz") as tf:
members = tf.getmembers()
safe_members = []
# Validate every member before extracting anything.
for member in members:
@@ -201,11 +206,15 @@ def _safe_extract_tarball(
f"Non-regular file in archive: {member.name}"
)
safe_members.append(member)
# Extract — use the "data" filter on Python 3.12+ for extra hardening.
# On older versions pass only the pre-validated members so that no
# unvetted entry (added concurrently or via a race) slips through.
if sys.version_info >= (3, 12):
tf.extractall(dest_dir, filter="data") # type: ignore[call-arg]
else:
tf.extractall(dest_dir) # noqa: S202 — validated manually above
tf.extractall(dest_dir, members=safe_members) # noqa: S202 — validated above
@dataclass