sveltejs-ai-tools/scripts/resolve-references.ts

import fs from 'node:fs/promises';
import path from 'node:path';
import { parseArgs } from 'node:util';

const { values } = parseArgs({
	options: {
		file: { type: 'string', short: 'f' },
		repo: { type: 'string', short: 'r' },
		output: { type: 'string', short: 'o' },
	},
});

const { file, repo, output } = values;

if (!file || !repo || !output) {
	console.error(
		'Usage: resolve-references --file <path-or-content> --repo <repo> --output <folder>',
	);
	process.exit(1);
}

export function remove_llm_ignore_blocks(content: string): string {
	return content.replace(/<!--\s*llm-ignore-start\s*-->[\s\S]*?<!--\s*llm-ignore-end\s*-->/g, '');
}

/**
 * Determines whether the input string is a file path or raw markdown content.
 * If it's a file, reads and returns its content. Otherwise returns the string as-is.
 */
async function get_content(input: string) {
	try {
		const stat = await fs.stat(input);
		if (stat.isFile()) {
			return await fs.readFile(input, 'utf-8');
		}
	} catch {
		// not a file path — treat as raw content
	}
	return input;
}

/**
 * Extracts a section from markdown content based on a heading id (hash).
 * Finds the heading whose text (lowercased, spaces replaced with `-`) matches
 * the hash and returns everything from that heading up to the next heading of
 * the same or higher level.
 */
function extract_section(content: string, hash: string) {
	const lines = content.split('\n');
	let start_index = -1;
	let heading_level = 0;

	for (let i = 0; i < lines.length; i++) {
		const line = lines[i]!;
		const heading_match = line.match(/^(#{1,6})\s+(.+)/);
		if (!heading_match) continue;

		const level = heading_match[1]!.length;
		const text = heading_match[2]!;
		const slug = text.toLowerCase().replace(/\s+/g, '-');

		if (slug === hash.toLowerCase()) {
			start_index = i;
			heading_level = level;
			continue;
		}

		if (start_index !== -1 && level <= heading_level) {
			return lines.slice(start_index, i).join('\n').trim();
		}
	}

	if (start_index !== -1) {
		return lines.slice(start_index).join('\n').trim();
	}

	return content;
}

/**
 * Removes the `title`, `skill`, and `NOTE` fields from markdown frontmatter, if present.
 * Removes the entire frontmatter block if they were the only fields.
 */
function remove_frontmatter_unneeded_fields(content: string) {
	const frontmatter_match = content.match(/^---\n([\s\S]*?)\n---\n?/);
	if (!frontmatter_match) return content;

	const frontmatter = frontmatter_match[1]!;
	const lines = frontmatter.split('\n').filter((line) => !line.match(/^(title|skill|NOTE)\s*:/));

	if (lines.length === 0) {
		// frontmatter is now empty — remove the whole block
		return content.slice(frontmatter_match[0].length);
	}

	return `---\n${lines.join('\n')}\n---\n` + content.slice(frontmatter_match[0].length);
}

/**
 * Derives a file-safe name from a URL path segment.
 * e.g. "some/deep/path" -> "path"
 */
function derive_name(link: string) {
	const without_hash = link.split('#')[0]!;
	const segments = without_hash.split('/').filter(Boolean);
	return segments[segments.length - 1] ?? 'reference';
}

const content = remove_llm_ignore_blocks(
	remove_frontmatter_unneeded_fields(await get_content(file)),
);

// Match markdown links that are either:
// 1. Relative paths (not starting with http://, https://, mailto:, #, or /)
// 2. Absolute /docs/ paths (e.g. /docs/svelte/each)
const relative_link_regex = /\[([^\]]*)\]\((?!https?:\/\/|mailto:|#|\/)([^)]+)\)/g;
const docs_link_regex = /\[([^\]]*)\]\((\/docs\/[^)]+)\)/g;

interface Link_Info {
	full_match: string;
	text: string;
	href: string;
	hash: string | undefined;
	clean_path: string;
	is_absolute_docs: boolean;
}

const links: Link_Info[] = [];

let match;
while ((match = relative_link_regex.exec(content)) !== null) {
	const href = match[2]!;
	const hash_index = href.indexOf('#');
	const has_hash = hash_index !== -1;

	links.push({
		full_match: match[0],
		text: match[1]!,
		href,
		hash: has_hash ? href.slice(hash_index + 1) : undefined,
		clean_path: has_hash ? href.slice(0, hash_index) : href,
		is_absolute_docs: false,
	});
}

while ((match = docs_link_regex.exec(content)) !== null) {
	const href = match[2]!;
	const hash_index = href.indexOf('#');
	const has_hash = hash_index !== -1;

	links.push({
		full_match: match[0],
		text: match[1]!,
		href,
		hash: has_hash ? href.slice(hash_index + 1) : undefined,
		clean_path: has_hash ? href.slice(0, hash_index) : href,
		is_absolute_docs: true,
	});
}

if (links.length === 0) {
	console.log('No relative links found in the markdown.');
	process.exit(0);
}

console.log(`Found ${links.length} relative link(s) to resolve.`);

const references_dir = path.join(output, 'references');
await fs.mkdir(references_dir, { recursive: true });

let updated_content = content;

// Track names we've already used to avoid collisions
const used_names = new Map<string, number>();

for (const link of links) {
	const base_name = derive_name(link.clean_path);
	const count = used_names.get(base_name) ?? 0;
	used_names.set(base_name, count + 1);
	const name = count > 0 ? `${base_name}-${count}` : base_name;

	// For absolute /docs/ links, fetch directly from svelte.dev (supports cross-repo links).
	// For relative links, prepend /docs/{repo}/.
	const url = link.is_absolute_docs
		? `https://svelte.dev${link.clean_path}/llms.txt`
		: `https://svelte.dev/docs/${repo}/${link.clean_path}/llms.txt`;

	console.log(`Fetching: ${url}${link.hash ? ` (section: #${link.hash})` : ''}`);

	try {
		const response = await fetch(url);

		if (!response.ok) {
			console.warn(`  Warning: ${response.status} ${response.statusText} for ${url}`);
			continue;
		}

		let fetched_content = await response.text();

		if (link.hash) {
			fetched_content = extract_section(fetched_content, link.hash);
		}

		const ref_filename = `${name}.md`;
		const ref_path = path.join(references_dir, ref_filename);

		await fs.writeFile(ref_path, remove_llm_ignore_blocks(remove_cut_preambles(fetched_content)));
		console.log(`  Saved: references/${ref_filename}`);

		// Replace the link in the markdown
		const new_link = `[${link.text}](references/${ref_filename})`;
		updated_content = updated_content.replace(link.full_match, new_link);
	} catch (error) {
		console.warn(`  Error fetching ${url}:`, error);
	}
}

/**
 * In fenced code blocks, removes everything from the start of the block
 * up to and including a `// ---cut---` comment. If no such comment exists
 * the code block is left unchanged.
 */
function remove_cut_preambles(content: string) {
	const lines = content.split('\n');
	const result: string[] = [];
	let in_code_block = false;
	let code_block_buffer: string[] = [];
	let fence_line = '';

	for (const line of lines) {
		if (!in_code_block && line.match(/^```\w*$/)) {
			in_code_block = true;
			fence_line = line;
			code_block_buffer = [];
			continue;
		}

		if (in_code_block && line.match(/^```$/)) {
			// End of code block — check if there was a cut comment
			const cut_index = code_block_buffer.findIndex((l) => l.match(/^\s*\/\/\s*---cut---\s*$/));

			result.push(fence_line);
			if (cut_index !== -1) {
				result.push(...code_block_buffer.slice(cut_index + 1));
			} else {
				result.push(...code_block_buffer);
			}
			result.push(line);

			in_code_block = false;
			code_block_buffer = [];
			continue;
		}

		if (in_code_block) {
			code_block_buffer.push(line);
		} else {
			result.push(line);
		}
	}

	// If file ends mid-code-block, flush as-is
	if (in_code_block) {
		result.push(fence_line);
		result.push(...code_block_buffer);
	}

	return result.join('\n');
}

// Write the updated markdown content to the output folder
updated_content = remove_cut_preambles(updated_content);

const output_filename = path.join(output, 'SKILL.md');
await fs.writeFile(output_filename, updated_content);
console.log(`\nUpdated markdown written to: ${output_filename}`);