From 3a7f64c8a5298094ea327f9093df1b906788a48b Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Tue, 28 Apr 2026 18:47:22 +0500 Subject: [PATCH 1/2] fix(extensions): use explicit UTF-8 encoding when reading manifest YAML (#2370) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(extensions): use explicit UTF-8 encoding when reading manifest YAML On Windows, Python's open() defaults to the system locale encoding (e.g., GBK on Chinese Windows), which causes UnicodeDecodeError when extension.yml or preset.yml contains non-ASCII content such as Chinese characters in description fields. Add encoding='utf-8' to ExtensionManifest._load_yaml and PresetManifest._load_yaml so manifests are read consistently across platforms. Fixes #2325 * test(extensions,presets): add UTF-8 manifest regression tests for #2325 Positive: extension.yml/preset.yml with non-ASCII (Chinese + emoji) descriptions load correctly when written as UTF-8 bytes — fails on Windows without explicit encoding='utf-8'. Negative: files containing invalid UTF-8 bytes raise a clean error (ValidationError or UnicodeDecodeError), not a silent crash. * fix(extensions,presets): wrap I/O and decode errors as ValidationError Address remaining Copilot concerns on #2370: - Catch UnicodeDecodeError and OSError in both manifest loaders and re-raise as ValidationError / PresetValidationError so callers see a consistent error type, not a bare decode/IO traceback. - Validate that PresetManifest YAML root is a mapping (extensions.py already had this; presets.py was missing it). Treat None as {} for empty-file compatibility. - Tighten the negative regression tests to assert the specific message, and add a non-mapping-root test for PresetManifest matching the existing one for ExtensionManifest. --- src/specify_cli/extensions.py | 8 +++++++- src/specify_cli/presets.py | 17 +++++++++++++++-- tests/test_extensions.py | 29 +++++++++++++++++++++++++++++ tests/test_presets.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 916038cd5..a419ebf1d 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -139,12 +139,18 @@ class ExtensionManifest: def _load_yaml(self, path: Path) -> dict: """Load YAML file safely.""" try: - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) except yaml.YAMLError as e: raise ValidationError(f"Invalid YAML in {path}: {e}") except FileNotFoundError: raise ValidationError(f"Manifest not found: {path}") + except UnicodeDecodeError as e: + raise ValidationError( + f"Manifest is not valid UTF-8: {path} ({e.reason} at byte {e.start})" + ) + except OSError as e: + raise ValidationError(f"Could not read manifest {path}: {e}") if not isinstance(data, dict): raise ValidationError( f"Manifest must be a YAML mapping, got {type(data).__name__}: {path}" diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 24de73521..27054a77f 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -136,12 +136,25 @@ class PresetManifest: def _load_yaml(self, path: Path) -> dict: """Load YAML file safely.""" try: - with open(path, 'r') as f: - return yaml.safe_load(f) or {} + with open(path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) except yaml.YAMLError as e: raise PresetValidationError(f"Invalid YAML in {path}: {e}") except FileNotFoundError: raise PresetValidationError(f"Manifest not found: {path}") + except UnicodeDecodeError as e: + raise PresetValidationError( + f"Manifest is not valid UTF-8: {path} ({e.reason} at byte {e.start})" + ) + except OSError as e: + raise PresetValidationError(f"Could not read manifest {path}: {e}") + if data is None: + return {} + if not isinstance(data, dict): + raise PresetValidationError( + f"Manifest must be a YAML mapping, got {type(data).__name__}: {path}" + ) + return data def _validate(self): """Validate manifest structure and required fields.""" diff --git a/tests/test_extensions.py b/tests/test_extensions.py index e6a206c06..c5be0ab4f 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -225,6 +225,35 @@ class TestExtensionManifest: with pytest.raises(ValidationError, match="YAML mapping"): ExtensionManifest(manifest_path) + def test_utf8_non_ascii_description_loads(self, temp_dir, valid_manifest_data): + """Regression for #2325: non-ASCII (UTF-8) description loads on any platform. + + On Windows, Python's default text-mode encoding is the locale codepage + (e.g. cp1252/GBK), which raises UnicodeDecodeError on UTF-8 bytes + outside the ASCII range. The loader must open with encoding='utf-8'. + """ + import yaml + + valid_manifest_data["extension"]["description"] = "中文测试 — émojis 🚀" + manifest_path = temp_dir / "extension.yml" + # Write UTF-8 bytes explicitly so the test exercises the read path, + # not the (locale-dependent) write path. + manifest_path.write_bytes( + yaml.safe_dump(valid_manifest_data, allow_unicode=True).encode("utf-8") + ) + + manifest = ExtensionManifest(manifest_path) + assert manifest.description == "中文测试 — émojis 🚀" + + def test_invalid_utf8_bytes_raises_validation_error(self, temp_dir): + """Negative case: file containing invalid UTF-8 bytes raises ValidationError, not raw UnicodeDecodeError.""" + manifest_path = temp_dir / "extension.yml" + # 0xFF/0xFE are not valid UTF-8 lead bytes. + manifest_path.write_bytes(b"\xff\xfe not valid utf-8 \xff\n") + + with pytest.raises(ValidationError, match="not valid UTF-8"): + ExtensionManifest(manifest_path) + def test_invalid_extension_id(self, temp_dir, valid_manifest_data): """Test manifest with invalid extension ID format.""" import yaml diff --git a/tests/test_presets.py b/tests/test_presets.py index ee4a6dddb..4b167ed9b 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -160,6 +160,38 @@ class TestPresetManifest: with pytest.raises(PresetValidationError, match="Invalid YAML"): PresetManifest(bad_file) + def test_utf8_non_ascii_description_loads(self, temp_dir, valid_pack_data): + """Regression for #2325: non-ASCII (UTF-8) description loads on any platform. + + On Windows, Python's default text-mode encoding is the locale codepage + (e.g. cp1252/GBK), which raises UnicodeDecodeError on UTF-8 bytes + outside the ASCII range. The loader must open with encoding='utf-8'. + """ + valid_pack_data["preset"]["description"] = "中文测试 — émojis 🚀" + manifest_path = temp_dir / "preset.yml" + manifest_path.write_bytes( + yaml.safe_dump(valid_pack_data, allow_unicode=True).encode("utf-8") + ) + + manifest = PresetManifest(manifest_path) + assert manifest.description == "中文测试 — émojis 🚀" + + def test_invalid_utf8_bytes_raises_validation_error(self, temp_dir): + """Negative case: file containing invalid UTF-8 bytes raises PresetValidationError, not raw UnicodeDecodeError.""" + manifest_path = temp_dir / "preset.yml" + manifest_path.write_bytes(b"\xff\xfe not valid utf-8 \xff\n") + + with pytest.raises(PresetValidationError, match="not valid UTF-8"): + PresetManifest(manifest_path) + + def test_non_mapping_yaml_raises_validation_error(self, temp_dir): + """Manifest whose YAML root is a scalar or list raises PresetValidationError, not TypeError.""" + manifest_path = temp_dir / "preset.yml" + for bad_content in ("42\n", "[1, 2]\n"): + manifest_path.write_text(bad_content, encoding="utf-8") + with pytest.raises(PresetValidationError, match="YAML mapping"): + PresetManifest(manifest_path) + def test_missing_schema_version(self, temp_dir, valid_pack_data): """Test missing schema_version field.""" del valid_pack_data["schema_version"] From a91897923639db5a282081aa5946eebae1d7b507 Mon Sep 17 00:00:00 2001 From: adaumann <94932945+adaumann@users.noreply.github.com> Date: Tue, 28 Apr 2026 15:58:30 +0200 Subject: [PATCH 2/2] feat: Speckit preset fiction book v1.7 - Support for RAG (Chroma DB) offline semantic search (#2367) * Update preset-fiction-book-writing to community catalog - Preset ID: fiction-book-writing - Version: 1.5.0 - Author: Andreas Daumann - Description: Spec-Driven Development for novel and long-form fiction. Replaces software engineering terminology with storytelling craft: specs become story briefs, plans become story structures, and tasks become scene-by-scene writing tasks. Supports 8 POV modes, all major plot structure frameworks, 5 humanized-AI prose profiles, and exports to DOCX/EPUB/LaTeX via pandoc. V1.5.0: Support interactive, audiobooks, series, workflow corrections * Add fiction-book-writing preset to community catalog - Preset ID: fiction-book-writing - Version: 1.6.0 - Author: Andreas Daumann - Description: Added support for 12 languages, export with templates, cover builder, bio builder, workflow fixes * Update presets/catalog.community.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixed update_at for fiction-book-writing preset * Update README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixed description for fiction-book-writing * "Add fiction-book-preset to community catalog - Preset ID: fiction-book-writing - Version: 1.7.0 - Author: Andreas Daumann - Description: It adapts the Spec-Driven Development workflow for storytelling to create books or audiobooks (with annotations) in 12 languages: features become story elements, specs become story briefs, plans become story structures, and tasks become scene-by-scene writing tasks. Supports single and multi-POV, all major plot structure frameworks, and two style modes: an author voice sample or humanized AI prose. Supports interactive elements like brainstorming, interview, roleplay and extras like statistics, cover builder and bio command. Export with templates for KDP, D2D etc. V1.7.0: Support for offline semantic search. * Update presets/catalog.community.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update presets/catalog.community.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Add fiction-book-writing to community catalog - Preset ID: fiction-book-writing - Version: 1.7.0 - Author: Andreas Daumann - Description: Spec-Driven Development for novel and long-form fiction. RAG support * Update docs/community/presets.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/community/presets.md | 2 +- presets/catalog.community.json | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/community/presets.md b/docs/community/presets.md index 03ac777b8..c48f9a3e5 100644 --- a/docs/community/presets.md +++ b/docs/community/presets.md @@ -11,7 +11,7 @@ The following community-contributed presets customize how Spec Kit behaves — o | Canon Core | Adapts original Spec Kit workflow to work together with Canon extension | 2 templates, 8 commands | — | [spec-kit-canon](https://github.com/maximiliamus/spec-kit-canon) | | Claude AskUserQuestion | Upgrades `/speckit.clarify` and `/speckit.checklist` on Claude Code from Markdown-table prompts to the native AskUserQuestion picker, with a recommended option and reasoning on every question | 2 commands | — | [spec-kit-preset-claude-ask-questions](https://github.com/0xrafasec/spec-kit-preset-claude-ask-questions) | | Explicit Task Dependencies | Adds explicit `(depends on T###)` dependency declarations and an Execution Wave DAG to tasks.md for parallel scheduling | 1 template, 1 command | — | [spec-kit-preset-explicit-task-dependencies](https://github.com/Quratulain-bilal/spec-kit-preset-explicit-task-dependencies) | -| Fiction Book Writing | It adapts the Spec-Driven Development workflow for storytelling to create books or audiobooks (with annotations) in 12 languages: features become story elements, specs become story briefs, plans become story structures, and tasks become scene-by-scene writing tasks. Supports single and multi-POV, all major plot structure frameworks, and two style modes: an author voice sample or humanized AI prose. Supports interactive elements like brainstorming, interview, roleplay and extras like statistics, cover builder and bio command. Export with templates for KDP, D2D etc. | 22 templates, 27 commands, 1 script | — | [speckit-preset-fiction-book-writing](https://github.com/adaumann/speckit-preset-fiction-book-writing) | +| Fiction Book Writing | It adapts the Spec-Driven Development workflow for storytelling to create books or audiobooks (with annotations) in 12 languages: features become story elements, specs become story briefs, plans become story structures, and tasks become scene-by-scene writing tasks. Supports single and multi-POV, all major plot structure frameworks, and two style modes: an author voice sample or humanized AI prose. Supports interactive elements like brainstorming, interview, roleplay and extras like statistics, cover builder and bio command. Export with templates for KDP, D2D etc. | 22 templates, 27 commands, 2 scripts | — | [speckit-preset-fiction-book-writing](https://github.com/adaumann/speckit-preset-fiction-book-writing) | | Jira Issue Tracking | Overrides `speckit.taskstoissues` to create Jira epics, stories, and tasks instead of GitHub Issues via Atlassian MCP tools | 1 command | — | [spec-kit-preset-jira](https://github.com/luno/spec-kit-preset-jira) | | Multi-Repo Branching | Coordinates feature branch creation across multiple git repositories (independent repos and submodules) during plan and tasks phases | 2 commands | — | [spec-kit-preset-multi-repo-branching](https://github.com/sakitA/spec-kit-preset-multi-repo-branching) | | Pirate Speak (Full) | Transforms all Spec Kit output into pirate speak — specs become "Voyage Manifests", plans become "Battle Plans", tasks become "Crew Assignments" | 6 templates, 9 commands | — | [spec-kit-presets](https://github.com/mnriem/spec-kit-presets) | diff --git a/presets/catalog.community.json b/presets/catalog.community.json index caf28e504..8fb40e99c 100644 --- a/presets/catalog.community.json +++ b/presets/catalog.community.json @@ -108,11 +108,11 @@ "fiction-book-writing": { "name": "Fiction Book Writing", "id": "fiction-book-writing", - "version": "1.6.0", - "description": "Spec-Driven Development for novel and long-form fiction. 27 AI commands from idea to submission: story bible governance, 9 POV modes, all major plot structure frameworks, scene-by-scene drafting with quality gates, audiobook pipeline (SSML/ElevenLabs), cover design, sensitivity review, pacing and prose statistics, and pandoc-based export to DOCX/EPUB/LaTeX. Two style modes: author voice sample extraction or humanized-AI prose with 5 craft profiles. 12 languages supported.", + "version": "1.7.0", + "description": "Spec-Driven Development for novel and long-form fiction. 27 AI commands from idea to submission: story bible governance, 9 POV modes, all major plot structure frameworks, scene-by-scene drafting with quality gates, audiobook pipeline (SSML/ElevenLabs), cover design, sensitivity review, pacing and prose statistics, and pandoc-based export to DOCX/EPUB/LaTeX. Two style modes: author voice sample extraction or humanized-AI prose with 5 craft profiles. 12 languages supported. Support for offline semantic search.", "author": "Andreas Daumann", "repository": "https://github.com/adaumann/speckit-preset-fiction-book-writing", - "download_url": "https://github.com/adaumann/speckit-preset-fiction-book-writing/archive/refs/tags/v1.6.0.zip", + "download_url": "https://github.com/adaumann/speckit-preset-fiction-book-writing/archive/refs/tags/v1.7.0.zip", "homepage": "https://github.com/adaumann/speckit-preset-fiction-book-writing", "documentation": "https://github.com/adaumann/speckit-preset-fiction-book-writing/blob/main/fiction-book-writing/README.md", "license": "MIT", @@ -122,7 +122,7 @@ "provides": { "templates": 22, "commands": 27, - "scripts": 1 + "scripts": 2 }, "tags": [ "writing", @@ -140,7 +140,7 @@ "language-support" ], "created_at": "2026-04-09T08:00:00Z", - "updated_at": "2026-04-19T08:00:00Z" + "updated_at": "2026-04-27T08:00:00Z" }, "jira": { "name": "Jira Issue Tracking",