Add YamlScalarFormatter for quote-safe YAML scalars

The upcoming DAP execution-view renderer serves a synthesized YAML
document as the job's debugger source. The skeleton is hand-emitted
so we can track per-step line offsets, but scalar values (step names,
action refs, etc.) need quote-safe formatting that respects YAML's
reserved chars, leading/trailing whitespace, and embedded `: `/`#`
sequences. Doing this by hand is bug-prone and easy to get wrong on
edge cases (empty strings, expressions, multiline content).

This commit adds a thin wrapper around YamlDotNet's `Emitter` that
emits a single scalar, strips the surrounding document markers, and
forces LF line breaks (`StringWriter` otherwise picks up Windows's
CRLF via `Environment.NewLine` and corrupts the document-end
stripping).

No caller yet — the renderer that uses it lands in a follow-up PR.
This is part 1 of 5 splitting the previously-monolithic foundation
for review tractability.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Francesco Renzi
2026-05-19 02:02:25 -07:00
parent 3ff2186ec0
commit 5c49625758
2 changed files with 182 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
using System;
using System.Globalization;
using System.IO;
using GitHub.Runner.Sdk;
using YamlDotNet.Core;
using YamlDotNet.Core.Events;
namespace GitHub.Runner.Worker.Dap
{
/// <summary>
/// Formats a single string as a quote-safe YAML scalar by routing it
/// through YamlDotNet's <see cref="Emitter"/>. The returned text is
/// safe to splice into a hand-emitted YAML document fragment.
///
/// Caller responsibility: this only handles the scalar value; it does
/// not emit a key, indent, or trailing newline.
/// </summary>
internal static class YamlScalarFormatter
{
/// <summary>
/// Return <paramref name="value"/> formatted as a YAML scalar:
/// plain, single-quoted, or double-quoted as the emitter chooses,
/// with no surrounding document markers or trailing newline.
/// </summary>
public static string Format(string value)
{
ArgUtil.NotNull(value, nameof(value));
using var sw = new StringWriter(CultureInfo.InvariantCulture);
// Force LF line breaks; YamlDotNet's Emitter calls WriteLine,
// which would otherwise produce CRLF on Windows and break
// both our document-end stripping below and downstream
// consumers that assume a single line-break convention.
sw.NewLine = "\n";
var emitter = new Emitter(sw);
emitter.Emit(new StreamStart());
emitter.Emit(new DocumentStart(null, null, true));
emitter.Emit(new Scalar(null, null, value, ScalarStyle.Any, true, true));
emitter.Emit(new DocumentEnd(true));
emitter.Emit(new StreamEnd());
string raw = sw.ToString();
// Strip YAML document markers. Emitter elides these for most
// scalars but emits "--- " (with space) for some edge cases
// (e.g. empty strings). Defensively handle "---\n" too.
if (raw.StartsWith("--- ", StringComparison.Ordinal))
{
raw = raw.Substring(4);
}
else if (raw.StartsWith("---\n", StringComparison.Ordinal))
{
raw = raw.Substring(4);
}
raw = raw.TrimEnd('\n');
const string DocEndMarker = "\n...";
if (raw.EndsWith(DocEndMarker, StringComparison.Ordinal))
{
raw = raw.Substring(0, raw.Length - DocEndMarker.Length);
}
return raw.TrimEnd('\n');
}
}
}

View File

@@ -0,0 +1,119 @@
using System;
using System.Collections.Generic;
using GitHub.Runner.Worker.Dap;
using Xunit;
using YamlDotNet.Serialization;
namespace GitHub.Runner.Common.Tests.Worker
{
public sealed class YamlScalarFormatterL0
{
private static readonly IDeserializer Deserializer = new DeserializerBuilder().Build();
// Embed the formatter output inside a minimal YAML mapping and
// round-trip through YamlDotNet, asserting the parsed value equals
// the original input. Decouples assertions from the emitter's
// quoting choices (plain vs single- vs double-quoted).
private static void AssertRoundTrips(string value)
{
string scalar = YamlScalarFormatter.Format(value);
string yaml = $"k: {scalar}\n";
Dictionary<string, object> doc;
try
{
doc = Deserializer.Deserialize<Dictionary<string, object>>(yaml);
}
catch (Exception ex)
{
throw new Xunit.Sdk.XunitException(
$"Formatted scalar did not round-trip as valid YAML.\nInput: '{value}'\nFormatted: '{scalar}'\nFull YAML:\n{yaml}\nError: {ex.Message}");
}
Assert.NotNull(doc);
Assert.True(doc.ContainsKey("k"), $"missing key in parsed doc. Formatted: '{scalar}'");
Assert.Equal(value, doc["k"] as string);
}
[Theory]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
[InlineData("hello")]
[InlineData("with: colon")]
[InlineData("with#hash")]
[InlineData(" leading")]
[InlineData("trailing ")]
[InlineData("a\"b")]
[InlineData("a\\b")]
[InlineData("@at")]
[InlineData("*star")]
[InlineData("&amp")]
[InlineData("?question")]
[InlineData("!exclaim")]
[InlineData("- dash")]
[InlineData("{brace}")]
[InlineData("[bracket]")]
public void Format_RoundTripsThroughYamlDeserializer(string value)
{
// The formatter must produce output that, embedded under a key,
// parses back to exactly the input. The emitter is free to
// pick plain, single-quoted, or double-quoted style.
AssertRoundTrips(value);
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void Format_PlainAscii_NoQuotingNeeded()
{
// Sanity check that the simple case stays plain.
Assert.Equal("hello", YamlScalarFormatter.Format("hello"));
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void Format_NoTrailingNewline()
{
Assert.False(YamlScalarFormatter.Format("hello").EndsWith("\n"));
Assert.False(YamlScalarFormatter.Format("with: colon").EndsWith("\n"));
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void Format_NoDocumentMarkers()
{
// The emitter wraps the scalar in a document; the formatter
// must strip both `--- ` (with space) and `---\n` (on its
// own line) prefixes plus the `\n...` suffix.
Assert.DoesNotContain("---", YamlScalarFormatter.Format("hello"));
Assert.DoesNotContain("...", YamlScalarFormatter.Format("hello"));
// Empty string is one of the cases where the emitter does
// produce a document marker by default.
Assert.DoesNotContain("---", YamlScalarFormatter.Format(""));
Assert.DoesNotContain("...", YamlScalarFormatter.Format(""));
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void Format_AlwaysUsesLfLineBreaks()
{
// Regression: YamlDotNet's Emitter calls WriteLine, which on
// Windows produces CRLF (the host's Environment.NewLine).
// Format must force LF so the output round-trips regardless
// of platform.
Assert.DoesNotContain('\r', YamlScalarFormatter.Format("hello"));
Assert.DoesNotContain('\r', YamlScalarFormatter.Format("with: colon"));
Assert.DoesNotContain('\r', YamlScalarFormatter.Format(""));
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void Format_NullValue_Throws()
{
Assert.Throws<ArgumentNullException>(() => YamlScalarFormatter.Format(null));
}
}
}