mirror of
https://github.com/sveltejs/ai-tools.git
synced 2026-07-03 11:12:28 +08:00
277 lines
7.7 KiB
TypeScript
277 lines
7.7 KiB
TypeScript
import fs from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import { parseArgs } from 'node:util';
|
|
|
|
const { values } = parseArgs({
|
|
options: {
|
|
file: { type: 'string', short: 'f' },
|
|
repo: { type: 'string', short: 'r' },
|
|
output: { type: 'string', short: 'o' },
|
|
},
|
|
});
|
|
|
|
const { file, repo, output } = values;
|
|
|
|
if (!file || !repo || !output) {
|
|
console.error(
|
|
'Usage: resolve-references --file <path-or-content> --repo <repo> --output <folder>',
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
export function remove_llm_ignore_blocks(content: string): string {
|
|
return content.replace(/<!--\s*llm-ignore-start\s*-->[\s\S]*?<!--\s*llm-ignore-end\s*-->/g, '');
|
|
}
|
|
|
|
/**
|
|
* Determines whether the input string is a file path or raw markdown content.
|
|
* If it's a file, reads and returns its content. Otherwise returns the string as-is.
|
|
*/
|
|
async function get_content(input: string) {
|
|
try {
|
|
const stat = await fs.stat(input);
|
|
if (stat.isFile()) {
|
|
return await fs.readFile(input, 'utf-8');
|
|
}
|
|
} catch {
|
|
// not a file path — treat as raw content
|
|
}
|
|
return input;
|
|
}
|
|
|
|
/**
|
|
* Extracts a section from markdown content based on a heading id (hash).
|
|
* Finds the heading whose text (lowercased, spaces replaced with `-`) matches
|
|
* the hash and returns everything from that heading up to the next heading of
|
|
* the same or higher level.
|
|
*/
|
|
function extract_section(content: string, hash: string) {
|
|
const lines = content.split('\n');
|
|
let start_index = -1;
|
|
let heading_level = 0;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i]!;
|
|
const heading_match = line.match(/^(#{1,6})\s+(.+)/);
|
|
if (!heading_match) continue;
|
|
|
|
const level = heading_match[1]!.length;
|
|
const text = heading_match[2]!;
|
|
const slug = text.toLowerCase().replace(/\s+/g, '-');
|
|
|
|
if (slug === hash.toLowerCase()) {
|
|
start_index = i;
|
|
heading_level = level;
|
|
continue;
|
|
}
|
|
|
|
if (start_index !== -1 && level <= heading_level) {
|
|
return lines.slice(start_index, i).join('\n').trim();
|
|
}
|
|
}
|
|
|
|
if (start_index !== -1) {
|
|
return lines.slice(start_index).join('\n').trim();
|
|
}
|
|
|
|
return content;
|
|
}
|
|
|
|
/**
|
|
* Removes the `title`, `skill`, and `NOTE` fields from markdown frontmatter, if present.
|
|
* Removes the entire frontmatter block if they were the only fields.
|
|
*/
|
|
function remove_frontmatter_unneeded_fields(content: string) {
|
|
const frontmatter_match = content.match(/^---\n([\s\S]*?)\n---\n?/);
|
|
if (!frontmatter_match) return content;
|
|
|
|
const frontmatter = frontmatter_match[1]!;
|
|
const lines = frontmatter.split('\n').filter((line) => !line.match(/^(title|skill|NOTE)\s*:/));
|
|
|
|
if (lines.length === 0) {
|
|
// frontmatter is now empty — remove the whole block
|
|
return content.slice(frontmatter_match[0].length);
|
|
}
|
|
|
|
return `---\n${lines.join('\n')}\n---\n` + content.slice(frontmatter_match[0].length);
|
|
}
|
|
|
|
/**
|
|
* Derives a file-safe name from a URL path segment.
|
|
* e.g. "some/deep/path" -> "path"
|
|
*/
|
|
function derive_name(link: string) {
|
|
const without_hash = link.split('#')[0]!;
|
|
const segments = without_hash.split('/').filter(Boolean);
|
|
return segments[segments.length - 1] ?? 'reference';
|
|
}
|
|
|
|
const content = remove_llm_ignore_blocks(
|
|
remove_frontmatter_unneeded_fields(await get_content(file)),
|
|
);
|
|
|
|
// Match markdown links that are either:
|
|
// 1. Relative paths (not starting with http://, https://, mailto:, #, or /)
|
|
// 2. Absolute /docs/ paths (e.g. /docs/svelte/each)
|
|
const relative_link_regex = /\[([^\]]*)\]\((?!https?:\/\/|mailto:|#|\/)([^)]+)\)/g;
|
|
const docs_link_regex = /\[([^\]]*)\]\((\/docs\/[^)]+)\)/g;
|
|
|
|
interface Link_Info {
|
|
full_match: string;
|
|
text: string;
|
|
href: string;
|
|
hash: string | undefined;
|
|
clean_path: string;
|
|
is_absolute_docs: boolean;
|
|
}
|
|
|
|
const links: Link_Info[] = [];
|
|
|
|
let match;
|
|
while ((match = relative_link_regex.exec(content)) !== null) {
|
|
const href = match[2]!;
|
|
const hash_index = href.indexOf('#');
|
|
const has_hash = hash_index !== -1;
|
|
|
|
links.push({
|
|
full_match: match[0],
|
|
text: match[1]!,
|
|
href,
|
|
hash: has_hash ? href.slice(hash_index + 1) : undefined,
|
|
clean_path: has_hash ? href.slice(0, hash_index) : href,
|
|
is_absolute_docs: false,
|
|
});
|
|
}
|
|
|
|
while ((match = docs_link_regex.exec(content)) !== null) {
|
|
const href = match[2]!;
|
|
const hash_index = href.indexOf('#');
|
|
const has_hash = hash_index !== -1;
|
|
|
|
links.push({
|
|
full_match: match[0],
|
|
text: match[1]!,
|
|
href,
|
|
hash: has_hash ? href.slice(hash_index + 1) : undefined,
|
|
clean_path: has_hash ? href.slice(0, hash_index) : href,
|
|
is_absolute_docs: true,
|
|
});
|
|
}
|
|
|
|
if (links.length === 0) {
|
|
console.log('No relative links found in the markdown.');
|
|
process.exit(0);
|
|
}
|
|
|
|
console.log(`Found ${links.length} relative link(s) to resolve.`);
|
|
|
|
const references_dir = path.join(output, 'references');
|
|
await fs.mkdir(references_dir, { recursive: true });
|
|
|
|
let updated_content = content;
|
|
|
|
// Track names we've already used to avoid collisions
|
|
const used_names = new Map<string, number>();
|
|
|
|
for (const link of links) {
|
|
const base_name = derive_name(link.clean_path);
|
|
const count = used_names.get(base_name) ?? 0;
|
|
used_names.set(base_name, count + 1);
|
|
const name = count > 0 ? `${base_name}-${count}` : base_name;
|
|
|
|
// For absolute /docs/ links, fetch directly from svelte.dev (supports cross-repo links).
|
|
// For relative links, prepend /docs/{repo}/.
|
|
const url = link.is_absolute_docs
|
|
? `https://svelte.dev${link.clean_path}/llms.txt`
|
|
: `https://svelte.dev/docs/${repo}/${link.clean_path}/llms.txt`;
|
|
|
|
console.log(`Fetching: ${url}${link.hash ? ` (section: #${link.hash})` : ''}`);
|
|
|
|
try {
|
|
const response = await fetch(url);
|
|
|
|
if (!response.ok) {
|
|
console.warn(` Warning: ${response.status} ${response.statusText} for ${url}`);
|
|
continue;
|
|
}
|
|
|
|
let fetched_content = await response.text();
|
|
|
|
if (link.hash) {
|
|
fetched_content = extract_section(fetched_content, link.hash);
|
|
}
|
|
|
|
const ref_filename = `${name}.md`;
|
|
const ref_path = path.join(references_dir, ref_filename);
|
|
|
|
await fs.writeFile(ref_path, remove_llm_ignore_blocks(remove_cut_preambles(fetched_content)));
|
|
console.log(` Saved: references/${ref_filename}`);
|
|
|
|
// Replace the link in the markdown
|
|
const new_link = `[${link.text}](references/${ref_filename})`;
|
|
updated_content = updated_content.replace(link.full_match, new_link);
|
|
} catch (error) {
|
|
console.warn(` Error fetching ${url}:`, error);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* In fenced code blocks, removes everything from the start of the block
|
|
* up to and including a `// ---cut---` comment. If no such comment exists
|
|
* the code block is left unchanged.
|
|
*/
|
|
function remove_cut_preambles(content: string) {
|
|
const lines = content.split('\n');
|
|
const result: string[] = [];
|
|
let in_code_block = false;
|
|
let code_block_buffer: string[] = [];
|
|
let fence_line = '';
|
|
|
|
for (const line of lines) {
|
|
if (!in_code_block && line.match(/^```\w*$/)) {
|
|
in_code_block = true;
|
|
fence_line = line;
|
|
code_block_buffer = [];
|
|
continue;
|
|
}
|
|
|
|
if (in_code_block && line.match(/^```$/)) {
|
|
// End of code block — check if there was a cut comment
|
|
const cut_index = code_block_buffer.findIndex((l) => l.match(/^\s*\/\/\s*---cut---\s*$/));
|
|
|
|
result.push(fence_line);
|
|
if (cut_index !== -1) {
|
|
result.push(...code_block_buffer.slice(cut_index + 1));
|
|
} else {
|
|
result.push(...code_block_buffer);
|
|
}
|
|
result.push(line);
|
|
|
|
in_code_block = false;
|
|
code_block_buffer = [];
|
|
continue;
|
|
}
|
|
|
|
if (in_code_block) {
|
|
code_block_buffer.push(line);
|
|
} else {
|
|
result.push(line);
|
|
}
|
|
}
|
|
|
|
// If file ends mid-code-block, flush as-is
|
|
if (in_code_block) {
|
|
result.push(fence_line);
|
|
result.push(...code_block_buffer);
|
|
}
|
|
|
|
return result.join('\n');
|
|
}
|
|
|
|
// Write the updated markdown content to the output folder
|
|
updated_content = remove_cut_preambles(updated_content);
|
|
|
|
const output_filename = path.join(output, 'SKILL.md');
|
|
await fs.writeFile(output_filename, updated_content);
|
|
console.log(`\nUpdated markdown written to: ${output_filename}`);
|