From c75b7de4394a2889494f8ac067ff70fcd5b11e20 Mon Sep 17 00:00:00 2001 From: 0xZunia Date: Thu, 30 Oct 2025 15:15:04 +0100 Subject: [PATCH] 1.0.0 --- .gitignore | 5 + LICENSE | 21 + README.md | 275 ++++++++++ SPEC.md | 691 +++++++++++++++++++++++++ ToonSharp.Tests/ToonSerializerTests.cs | 421 +++++++++++++++ ToonSharp.Tests/ToonSharp.Tests.csproj | 25 + ToonSharp.sln | 45 ++ ToonSharp/ToonDelimiter.cs | 22 + ToonSharp/ToonException.cs | 74 +++ ToonSharp/ToonHelpers.cs | 287 ++++++++++ ToonSharp/ToonReader.cs | 645 +++++++++++++++++++++++ ToonSharp/ToonSerializer.cs | 125 +++++ ToonSharp/ToonSerializerOptions.cs | 67 +++ ToonSharp/ToonSharp.csproj | 39 ++ ToonSharp/ToonWriter.cs | 640 +++++++++++++++++++++++ 15 files changed, 3382 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 SPEC.md create mode 100644 ToonSharp.Tests/ToonSerializerTests.cs create mode 100644 ToonSharp.Tests/ToonSharp.Tests.csproj create mode 100644 ToonSharp.sln create mode 100644 ToonSharp/ToonDelimiter.cs create mode 100644 ToonSharp/ToonException.cs create mode 100644 ToonSharp/ToonHelpers.cs create mode 100644 ToonSharp/ToonReader.cs create mode 100644 ToonSharp/ToonSerializer.cs create mode 100644 ToonSharp/ToonSerializerOptions.cs create mode 100644 ToonSharp/ToonSharp.csproj create mode 100644 ToonSharp/ToonWriter.cs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..add57be --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +bin/ +obj/ +/packages/ +riderModule.iml +/_ReSharper.Caches/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ab6d3ca --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025-PRESENT Reyan CARLIER + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d2ea83 --- /dev/null +++ b/README.md @@ -0,0 +1,275 @@ +# ToonSharp + +A high-performance, .NET 9 library for serializing and deserializing data in the TOON format - a human-readable, line-oriented data serialization format optimized for LLM contexts. + +[![.NET 9.0](https://img.shields.io/badge/.NET-9.0-blue.svg)](https://dotnet.microsoft.com/download) +[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE) + +## Features + +- **Full TOON v1.2 Specification Support** - Complete implementation of the TOON specification +- **Performance-Driven** - Built with .NET 9 modern performance features +- **Type-Safe** - Leverages C# 12 features and nullable reference types +- **Strict Mode** - Optional strict validation for production environments +- **Tabular Data** - First-class support for tabular arrays +- **Multiple Delimiters** - Comma, tab, and pipe delimiter support +- **Fully Documented** - Comprehensive XML documentation for IntelliSense + +## Installation + +```bash +dotnet add package ToonSharp +``` + +## Quick Start + +### Serialization + +```csharp +using ToonSharp; + +// Simple object +var user = new +{ + id = 123, + name = "Ada Lovelace", + active = true +}; + +var toon = ToonSerializer.Serialize(user); +// Output: +// id: 123 +// name: Ada Lovelace +// active: true +``` + +### Deserialization + +```csharp +using ToonSharp; +using System.Text.Json.Nodes; + +var toon = """ +id: 123 +name: Ada Lovelace +active: true +"""; + +var result = ToonSerializer.Deserialize(toon); +var obj = result.AsObject(); + +Console.WriteLine(obj["name"]); // "Ada Lovelace" +``` + +### Strongly-Typed Deserialization + +```csharp +public class User +{ + public int Id { get; set; } + public string Name { get; set; } + public bool Active { get; set; } +} + +var user = ToonSerializer.Deserialize(toon); +Console.WriteLine(user.Name); // "Ada Lovelace" +``` + +## Examples + +### Nested Objects + +```csharp +var data = new +{ + user = new + { + id = 123, + name = "Ada" + } +}; + +var toon = ToonSerializer.Serialize(data); +// Output: +// user: +// id: 123 +// name: Ada +``` + +### Primitive Arrays + +```csharp +var data = new +{ + tags = new[] { "admin", "developer", "ops" } +}; + +var toon = ToonSerializer.Serialize(data); +// Output: +// tags[3]: admin,developer,ops +``` + +### Tabular Data + +```csharp +var data = new +{ + products = new[] + { + new { sku = "A1", qty = 2, price = 9.99 }, + new { sku = "B2", qty = 1, price = 14.50 } + } +}; + +var toon = ToonSerializer.Serialize(data); +// Output: +// products[2]{sku,qty,price}: +// A1,2,9.99 +// B2,1,14.5 +``` + +### Custom Delimiters + +```csharp +var options = new ToonSerializerOptions +{ + Delimiter = ToonDelimiter.Tab +}; + +var data = new { tags = new[] { "reading", "gaming", "coding" } }; +var toon = ToonSerializer.Serialize(data, options); +// Output: +// tags[3 ]: reading gaming coding +``` + +### Async Operations + +```csharp +// Serialize to stream +await using var stream = File.Create("data.toon"); +await ToonSerializer.SerializeAsync(stream, data); + +// Deserialize from stream +await using var readStream = File.OpenRead("data.toon"); +var result = await ToonSerializer.DeserializeAsync(readStream); +``` + +## Configuration + +### ToonSerializerOptions + +```csharp +var options = new ToonSerializerOptions +{ + IndentSize = 2, // Spaces per indentation level (default: 2) + Delimiter = ToonDelimiter.Comma, // Document delimiter (default: Comma) + UseLengthMarker = false, // Include # in array headers (default: false) + Strict = true // Enable strict mode (default: true) +}; +``` + +### Strict Mode + +When `Strict = true` (default), the parser enforces: + +- Array counts must match declared lengths +- Indentation must be exact multiples of `IndentSize` +- Tabs cannot be used for indentation +- Invalid escape sequences cause errors +- Missing colons after keys cause errors +- Blank lines inside arrays/tabular rows cause errors + +```csharp +// Strict mode (default) +var strictOptions = new ToonSerializerOptions { Strict = true }; + +// Non-strict mode (more lenient) +var lenientOptions = new ToonSerializerOptions { Strict = false }; +``` + +## Supported Types + +### Primitives +- `string` +- `int`, `long`, `short`, `byte` (and unsigned variants) +- `float`, `double`, `decimal` +- `bool` +- `null` + +### Collections +- Arrays (`T[]`) +- `List` +- `IEnumerable` + +### Objects +- POCOs (Plain Old CLR Objects) +- Anonymous types +- `Dictionary` +- `JsonObject` / `JsonArray` / `JsonNode` + +## API Reference + +### ToonSerializer + +| Method | Description | +|--------|-------------| +| `Serialize(T value, options?)` | Converts value to TOON string | +| `SerializeAsync(Stream, T value, options?, token?)` | Async serialize to stream | +| `Deserialize(string toon, options?)` | Parses TOON to JsonNode | +| `Deserialize(string toon, options?)` | Parses TOON to type T | +| `DeserializeAsync(Stream, options?, token?)` | Async deserialize from stream | +| `TryDeserialize(string, out T?, options?)` | Safe deserialization | + +### ToonDelimiter + +- `Comma` - Default comma delimiter (`,`) +- `Tab` - Tab delimiter (`\t`) +- `Pipe` - Pipe delimiter (`|`) + +## Error Handling + +```csharp +try +{ + var result = ToonSerializer.Deserialize(toon); +} +catch (ToonException ex) +{ + Console.WriteLine($"Error at line {ex.LineNumber}: {ex.Message}"); +} +``` + +## Performance + +ToonSharp is built with performance in mind: + +- Uses `Span` and `ReadOnlySpan` for zero-allocation string operations +- Minimal allocations during parsing +- Efficient `StringBuilder` usage for serialization +- Optimized for .NET 9 runtime improvements + +## Specification + +This library implements the [TOON Specification v1.2](SPEC.md). + +## Contributing + +Contributions are welcome! Please ensure: + +1. All tests pass (`dotnet test`) +2. Code follows .NET conventions +3. XML documentation is complete +4. Tests cover changes + +## License + +MIT License - see [LICENSE](LICENSE) for details. + +## Acknowledgments + +Built with .NET 9 following modern C# best practices and the System.Text.Json design patterns. +This is a port of https://github.com/johannschopplich/toon to .NET9 + +--- + +Made with ❤️ for the .NET community diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..f70d89b --- /dev/null +++ b/SPEC.md @@ -0,0 +1,691 @@ +Original link: https://github.com/johannschopplich/toon/blob/main/SPEC.md + +# TOON Specification (v1.2) + +Status: Draft, normative where indicated. This version specifies both encoding (producer behavior) and decoding (parser behavior). + +- Normative statements use RFC 2119/8174 keywords: MUST, MUST NOT, SHOULD, SHOULD NOT, MAY. +- Audience: implementers of encoders/decoders/validators; tool authors; practitioners embedding TOON in LLM prompts. + +Changelog: +- v1.2: + - Centralized decoding rules (primitives, keys) and strict-mode checklist. + - Made header grammar normative and clarified delimiter scoping. + - Tightened strict-mode indentation (exact multiples; tabs error). + - Defined blank-line and trailing-newline decoding behavior with explicit skipping rules outside arrays. + - Clarified hyphen-based quoting: "-" or any string starting with "-" MUST be quoted. + - Clarified BigInt normalization (quoted string when out of safe range). + - Unified root-form detection and row/key disambiguation language; disambiguation uses first unquoted delimiter vs colon. + - Introduced "document delimiter" vs "active delimiter" terminology. +- v1.1: Made decoding behavior normative; added strict-mode rules, delimiter-aware parsing, and reference algorithms; decoder options (indent, strict). +- v1: Initial encoding, normalization, and conformance rules. + +Scope: +- Defines the data model, encoding normalization (reference JS/TS), concrete syntax, decoding semantics, and conformance requirements for producing and consuming TOON. + +## 1. Terminology and Conventions + +- TOON document: A sequence of UTF-8 text lines formatted according to this spec. +- Line: A sequence of non-newline characters terminated by LF (U+000A) in serialized form. Encoders MUST use LF. +- Indentation level (depth): Leading indentation measured in fixed-size space units (indentSize). Depth 0 has no indentation. +- Indentation unit (indentSize): A fixed number of spaces per level (default 2). Tabs MUST NOT be used for indentation. +- Header: The bracketed declaration for arrays, optionally followed by a field list, and terminating with a colon; e.g., key[3]: or items[2]{a,b}:. +- Field list: Brace-enclosed, delimiter-separated list of field names for tabular arrays: {f1f2}. +- List item: A line beginning with "- " at a given depth representing an element in an expanded array. +- Delimiter: The character used to separate array/tabular values: comma (default), tab (HTAB, U+0009), or pipe ("|"). +- Document delimiter: The encoder-selected delimiter used for quoting decisions outside any array scope (default comma). +- Active delimiter: The delimiter declared by the closest array header in scope, used to split inline primitive arrays and tabular rows under that header; it also governs quoting decisions for values within that array’s scope. +- Length marker: Optional "#" prefix for array lengths in headers, e.g., [#3]. Decoders MUST accept and ignore it semantically. +- Primitive: string, number, boolean, or null. +- Object: Mapping from string keys to JsonValue. +- Array: Ordered sequence of JsonValue. +- JsonValue: Primitive | Object | Array. +- Strict mode: Decoder mode that enforces counts, indentation, and delimiter consistency; also rejects invalid escapes and missing colons (default: true). + +Notation: +- Regular expressions appear in slash-delimited form. +- ABNF snippets follow RFC 5234; HTAB means the U+0009 character. +- Examples are informative unless stated otherwise. + +## 2. Data Model + +- TOON models data as: + - JsonPrimitive: string | number | boolean | null + - JsonObject: { [string]: JsonValue } + - JsonArray: JsonValue[] +- Ordering: + - Array order MUST be preserved. + - Object key order MUST be preserved as encountered by the encoder. +- Numbers (encoding): + - -0 MUST be normalized to 0. + - Finite numbers MUST be rendered without scientific notation (e.g., 1e6 → 1000000; 1e-6 → 0.000001). +- Null: Represented as the literal null. + +## 3. Encoding Normalization (Reference Encoder) + +The reference encoder normalizes non-JSON values to the data model: + +- Number: + - Finite → number (non-exponential). -0 → 0. + - NaN, +Infinity, -Infinity → null. + - Implementations MUST ensure decimal rendering does not use exponent notation. +- BigInt (JavaScript): + - If within Number.MIN_SAFE_INTEGER..Number.MAX_SAFE_INTEGER → converted to number. + - Otherwise → converted to a decimal string (e.g., "9007199254740993") and encoded as a string (quoted because it is numeric-like). +- Date → ISO string (e.g., "2025-01-01T00:00:00.000Z"). +- Set → array by iterating entries and normalizing each element. +- Map → object using String(key) for keys and normalizing values. +- Plain object → own enumerable string keys in encounter order; values normalized recursively. +- Function, symbol, undefined, or unrecognized types → null. + +Note: Other language ports SHOULD apply analogous normalization consistent with this spec’s data model and encoding rules. + +## 4. Decoding Interpretation (Reference Decoder) + +Decoders map text tokens to host values: + +- Quoted tokens (strings and keys): + - MUST be unescaped per Section 7.1 (only \\, \", \n, \r, \t are valid). Any other escape or an unterminated string MUST error. + - Quoted primitives remain strings even if they look like numbers/booleans/null. +- Unquoted value tokens: + - true, false, null → booleans/null. + - Numeric parsing: + - MUST accept standard decimal and exponent forms (e.g., 42, -3.14, 1e-6, -1E+9). + - MUST treat tokens with forbidden leading zeros (e.g., "05", "0001") as strings (not numbers). + - Only finite numbers are expected from conforming encoders. + - Otherwise → string. +- Keys: + - Decoded as strings (quoted keys MUST be unescaped per Section 7.1). + - A colon MUST follow a key; missing colon MUST error. + +## 5. Concrete Syntax and Root Form + +TOON is a deterministic, line-oriented, indentation-based notation. + +- Objects: + - key: value for primitives. + - key: alone for nested or empty objects; nested fields appear at depth +1. +- Arrays: + - Primitive arrays are inline: key[N]: v1v2…. + - Arrays of arrays (primitives): expanded list items under a header: key[N]: then "- [M]: …". + - Arrays of objects: + - Tabular form when uniform and primitive-only: key[N]{f1f2}: then one row per line. + - Otherwise: expanded list items: key[N]: with "- …" items (see Sections 9.4 and 10). +- Root form discovery: + - If the first non-empty depth-0 line is a valid root array header per Section 6 (must include a colon), decode a root array. + - Else if the document has exactly one non-empty line and it is neither a valid array header nor a key-value line (quoted or unquoted key), decode a single primitive. + - Otherwise, decode an object. + - In strict mode, multiple non-key/value non-header lines at depth 0 is invalid. + +## 6. Header Syntax (Normative) + +Array headers declare length and active delimiter, and optionally field names. + +General forms: +- Root header (no key): [N]: +- With key: key[N]: +- Tabular fields: key[N]{field1field2…}: + +Where: +- N is the non-negative integer length. +- is optional "#"; decoders MUST accept and ignore it semantically. +- is: + - absent for comma (","), + - HTAB (U+0009) for tab, + - "|" for pipe. +- Field names in braces are separated by the same active delimiter and encoded as keys (Section 7.3). + +Spacing and delimiters: +- Every header line MUST end with a colon. +- When inline values follow a header on the same line (non-empty primitive arrays), there MUST be exactly one space after the colon before the first value. +- The active delimiter declared by the bracket segment applies to: + - splitting inline primitive arrays on that header line, + - splitting tabular field names in "{…}", + - splitting all rows/items within the header’s scope, + - unless a nested header changes it. +- The same delimiter symbol declared in the bracket MUST be used in the fields segment and in all row/value splits in that scope. +- Absence of a delimiter symbol in a bracket segment ALWAYS means comma, regardless of any parent header. + +Normative header grammar (ABNF): +``` +bracket-seg = "[" [ "#" ] 1*DIGIT [ delimsym ] "]" +delimsym = HTAB / "|" +; Field names are keys (quoted/unquoted) separated by the active delimiter +fields-seg = "{" fieldname *( delim fieldname ) "}" +delim = delimsym / "," +fieldname = key + +header = [ key ] bracket-seg [ fields-seg ] ":" +key = unquoted-key / quoted-key + +; Unquoted keys must match identifier pattern +unquoted-key = ( ALPHA / "_" ) *( ALPHA / DIGIT / "_" / "." ) + +; Quoted keys use only escapes from Section 7.1 +; (Exact escaped-char repertoire is defined in Section 7.1) +; quoted-key = DQUOTE *(escaped-char / safe-char) DQUOTE +``` + +Decoding requirements: +- The bracket segment MUST parse as a non-negative integer length N. +- If a trailing tab or pipe appears inside the brackets, it selects the active delimiter; otherwise comma is active. +- If a fields segment occurs between the bracket and the colon, parse field names using the active delimiter; quoted names MUST be unescaped per Section 7.1. +- A colon MUST follow the bracket and optional fields; missing colon MUST error. + +## 7. Strings and Keys + +### 7.1 Escaping (Encoding and Decoding) + +In quoted strings and keys, the following characters MUST be escaped: +- "\\" → "\\\\" +- "\"" → "\\\"" +- U+000A newline → "\\n" +- U+000D carriage return → "\\r" +- U+0009 tab → "\\t" + +Decoders MUST reject any other escape sequence and unterminated strings. + +Tabs are allowed inside quoted strings and as a declared delimiter; they MUST NOT be used for indentation (Section 12). + +### 7.2 Quoting Rules for String Values (Encoding) + +A string value MUST be quoted if any of the following is true: +- It is empty (""). +- It has leading or trailing whitespace. +- It equals true, false, or null (case-sensitive). +- It is numeric-like: + - Matches /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i (e.g., "42", "-3.14", "1e-6"). + - Or matches /^0\d+$/ (leading-zero decimals such as "05"). +- It contains a colon (:), double quote ("), or backslash (\). +- It contains brackets or braces ([, ], {, }). +- It contains control characters: newline, carriage return, or tab. +- It contains the relevant delimiter: + - Inside array scope: the active delimiter (Section 1). + - Outside array scope: the document delimiter (Section 1). +- It equals "-" or starts with "-" (any hyphen at position 0). + +Otherwise, the string MAY be emitted without quotes. Unicode, emoji, and strings with internal (non-leading/trailing) spaces are safe unquoted provided they do not violate the conditions. + +### 7.3 Key Encoding (Encoding) + +Object keys and tabular field names: +- MAY be unquoted only if they match: ^[A-Za-z_][\w.]*$. +- Otherwise, they MUST be quoted and escaped per Section 7.1. + +### 7.4 Decoding Rules for Strings and Keys (Decoding) + +- Quoted strings and keys MUST be unescaped per Section 7.1; any other escape MUST error. Quoted primitives remain strings. +- Unquoted values: + - true/false/null → boolean/null + - Numeric tokens → numbers (with the leading-zero rule in Section 4) + - Otherwise → strings +- Keys (quoted or unquoted) MUST be followed by ":"; missing colon MUST error. + +## 8. Objects + +- Encoding: + - Primitive fields: key: value (single space after colon). + - Nested or empty objects: key: on its own line. If non-empty, nested fields appear at depth +1. + - Key order: Implementations MUST preserve encounter order when emitting fields. + - An empty object at the root yields an empty document (no lines). +- Decoding: + - A line "key:" with nothing after the colon at depth d opens an object; subsequent lines at depth > d belong to that object until the depth decreases to ≤ d. + - Lines "key: value" at the same depth are sibling fields. + - Missing colon after a key MUST error. + +## 9. Arrays + +### 9.1 Primitive Arrays (Inline) + +- Encoding: + - Non-empty arrays: key[N]: v1v2… where each vi is encoded as a primitive (Section 7) with delimiter-aware quoting. + - Empty arrays: key[0]: (no values following). + - Root arrays: [N]: v1… +- Decoding: + - Split using the active delimiter declared by the header; non-active delimiters MUST NOT split values. + - In strict mode, the number of decoded values MUST equal N; otherwise MUST error. + +### 9.2 Arrays of Arrays (Primitives Only) — Expanded List + +- Encoding: + - Parent header: key[N]: on its own line. + - Each inner primitive array is a list item: + - - [M]: v1v2… + - Empty inner arrays: - [0]: +- Decoding: + - Items appear at depth +1, each starting with "- " and an inner array header "[M]: …". + - Inner arrays are split using their own active delimiter; in strict mode, counts MUST match M. + - In strict mode, the number of list items MUST equal outer N. + +### 9.3 Arrays of Objects — Tabular Form + +Tabular detection (encoding; MUST hold for all elements): +- Every element is an object. +- All objects have the same set of keys (order per object MAY vary). +- All values across these keys are primitives (no nested arrays/objects). + +When satisfied (encoding): +- Header: key[N]{f1f2…}: where field order is the first object’s key encounter order. +- Field names encoded per Section 7.3. +- Rows: one line per object at depth +1 under the header; values are encoded primitives (Section 7) and joined by the active delimiter. +- Root tabular arrays omit the key: [N]{…}: followed by rows. + +Decoding: +- A tabular header declares the active delimiter and ordered field list. +- Rows appear at depth +1 as delimiter-separated value lines. +- Strict mode MUST enforce: + - Each row’s value count equals the field count. + - The number of rows equals N. +- Disambiguation at row depth (unquoted tokens): + - Compute the first unquoted occurrence of the active delimiter and the first unquoted colon. + - If a same-depth line has no unquoted colon → row. + - If both appear, compare first-unquoted positions: + - Delimiter before colon → row. + - Colon before delimiter → key-value line (end of rows). + - If a line has an unquoted colon but no unquoted active delimiter → key-value line (end of rows). + +### 9.4 Mixed / Non-Uniform Arrays — Expanded List + +When tabular requirements are not met (encoding): +- Header: key[N]: +- Each element is rendered as a list item at depth +1 under the header: + - Primitive: - + - Primitive array: - [M]: v1… + - Object: formatted per Section 10 (objects as list items). + - Complex arrays: - key'[M]: followed by nested items as appropriate. + +Decoding: +- Header declares list length N and the active delimiter for any nested inline arrays. +- Each list item starts with "- " at depth +1 and is parsed as: + - Primitive (no colon and no array header), + - Inline primitive array (- [M]: …), + - Object with first field on the hyphen line (- key: … or - key[N…]{…}: …), + - Or nested arrays via nested headers. +- In strict mode, the number of list items MUST equal N. + +## 10. Objects as List Items + +For an object appearing as a list item: + +- Empty object list item: a single "-" at the list-item indentation level. +- First field on the hyphen line: + - Primitive: - key: value + - Primitive array: - key[M]: v1… + - Tabular array: - key[N]{fields}: + - Followed by tabular rows at depth +1 (relative to the hyphen line). + - Non-uniform array: - key[N]: + - Followed by list items at depth +1. + - Object: - key: + - Nested object fields appear at depth +2 (i.e., one deeper than subsequent sibling fields of the same list item). +- Remaining fields of the same object appear at depth +1 under the hyphen line in encounter order, using normal object field rules. + +Decoding: +- The first field is parsed from the hyphen line. If it is a nested object (- key:), nested fields are at +2 relative to the hyphen line; subsequent fields of the same list item are at +1. +- If the first field is a tabular header on the hyphen line, its rows are at +1; subsequent sibling fields continue at +1 after the rows. + +## 11. Delimiters + +- Supported delimiters: + - Comma (default): header omits the delimiter symbol. + - Tab: header includes HTAB inside brackets and braces (e.g., [N], {ab}); rows/inline arrays use tabs. + - Pipe: header includes "|" inside brackets and braces; rows/inline arrays use "|". +- Document vs Active delimiter: + - Encoders select a document delimiter (option) that influences quoting in contexts not governed by an array header (e.g., object values). + - Inside an array header’s scope, the active delimiter governs splitting and quoting of inline arrays and tabular rows for that array. + - Absence of a delimiter symbol in a header ALWAYS means comma for that array’s scope; it does not inherit from any parent. +- Delimiter-aware quoting (encoding): + - Within an array’s scope, strings containing the active delimiter MUST be quoted to avoid splitting. + - Outside any array scope, encoders SHOULD use the document delimiter to decide delimiter-aware quoting for values. + - Strings containing non-active delimiters do not require quoting unless another quoting condition applies (Section 7.2). +- Delimiter-aware parsing (decoding): + - Inline arrays and tabular rows MUST be split only on the active delimiter declared by the nearest array header. + - Strings containing the active delimiter MUST be quoted to avoid splitting; non-active delimiters MUST NOT cause splits. + - Nested headers may change the active delimiter; decoding MUST use the delimiter declared by the nearest header. + - If the bracket declares tab or pipe, the same symbol MUST be used in the fields segment and for splitting all rows/values in that scope. + +## 12. Indentation and Whitespace + +- Encoding: + - Encoders MUST use a consistent number of spaces per level (default 2; configurable). + - Tabs MUST NOT be used for indentation. + - Exactly one space after ": " in key: value lines. + - Exactly one space after array headers when followed by inline values. + - No trailing spaces at the end of any line. + - No trailing newline at the end of the document. +- Decoding: + - Strict mode: + - The number of leading spaces on a line MUST be an exact multiple of indentSize; otherwise MUST error. + - Tabs used as indentation MUST error. Tabs are allowed in quoted strings and as the HTAB delimiter. + - Non-strict mode: + - Depth MAY be computed as floor(indentSpaces / indentSize). + - Tabs in indentation are non-conforming and MAY be accepted or rejected. + - Surrounding whitespace around tokens SHOULD be tolerated; internal semantics follow quoting rules. + - Blank lines: + - Outside arrays/tabular rows: decoders SHOULD ignore completely blank lines (do not create/close structures). + - Inside arrays/tabular rows: in strict mode, MUST error; in non-strict mode, MAY be ignored and not counted as a row/item. + - Trailing newline at end-of-file: decoders SHOULD accept; validators MAY warn. + +Recommended blank-line handling (normative where stated): +- Before decoding, or during scanning: + - Track blank lines with depth. + - For strict mode: if a blank line occurs between the first and last row/item line in an array/tabular block, this MUST error. + - Otherwise (outside arrays/tabular rows), blank lines SHOULD be skipped and not contribute to root-form detection. +- Empty input means: after ignoring trailing newlines and ignorable blank lines outside arrays/tabular rows, there are no non-empty lines. + +## 13. Conformance and Options + +Conformance classes: + +- Encoder: + - MUST produce output adhering to all normative rules in Sections 2–12 and 15. + - MUST be deterministic regarding: + - Object field order (encounter order). + - Tabular detection (uniform vs non-uniform). + - Quoting decisions given values and delimiter context (document delimiter or active delimiter in array scope). + +- Decoder: + - MUST implement tokenization, escaping, and type interpretation per Sections 4 and 7.4. + - MUST parse array headers per Section 6 and apply the declared active delimiter to inline arrays and tabular rows. + - MUST implement structure and depth rules per Sections 8–11, including objects-as-list-items placement. + - MUST enforce strict-mode rules in Section 14 when strict = true. + +- Validator: + - SHOULD verify structural conformance (headers, indentation, list markers). + - SHOULD verify whitespace invariants. + - SHOULD verify delimiter consistency between headers and rows. + - SHOULD verify length counts vs declared [N]. + +Options: +- Encoder options: + - indent (default: 2 spaces) + - delimiter (document delimiter; default: comma; alternatives: tab, pipe) + - lengthMarker (default: disabled) +- Decoder options: + - indent (default: 2 spaces) + - strict (default: true) + +Note: Section 14 is authoritative for strict-mode errors; validators MAY add informative diagnostics for style and encoding invariants. + +## 14. Strict Mode Errors and Diagnostics (Authoritative Checklist) + +When strict mode is enabled (default), decoders MUST error on: + +- Array count mismatches: + - Inline primitive arrays: decoded value count ≠ declared N. + - List arrays: number of list items ≠ declared N. + - Tabular arrays: number of rows ≠ declared N. +- Tabular row width mismatches: + - Any row’s value count ≠ field count. +- Missing colon in key context. +- Invalid escape sequences or unterminated strings in quoted tokens. +- Indentation errors: + - Leading spaces not a multiple of indentSize. + - Any tab used in indentation. +- Delimiter mismatch (e.g., rows joined by a different delimiter than declared), detected via width/count checks and header scope. +- Blank lines inside arrays/tabular rows. +- Empty input (document with no non-empty lines after ignoring trailing newline(s) and ignorable blank lines outside arrays/tabular rows). + +Validators SHOULD additionally report: +- Trailing spaces, trailing newlines (encoding invariants). +- Headers missing delimiter marks when non-comma delimiter is in use. +- Values violating delimiter-aware quoting rules. + +Recommended error messages (informative): +- Missing colon after key +- Unterminated string: missing closing quote +- Invalid escape sequence: \x +- Indentation must be an exact multiple of N spaces +- Tabs are not allowed in indentation +- Expected N tabular rows, but got M +- Expected N list array items, but got M +- Expected K values in row, but got L + +## 15. Security Considerations + +- Injection and ambiguity are mitigated by quoting rules: + - Strings with colon, the relevant delimiter (document or active), hyphen marker cases ("-" or strings starting with "-"), control characters, or brackets/braces MUST be quoted. +- Strict-mode checks (Section 14) detect malformed strings, truncation, or injected rows/items via length and width mismatches. +- Encoders SHOULD avoid excessive memory on large inputs; implement streaming/tabular row emission where feasible. +- Unicode: + - Encoders SHOULD avoid altering Unicode beyond required escaping; decoders SHOULD accept valid UTF-8 in quoted strings/keys (with only the five escapes). + +## 16. Internationalization + +- Full Unicode is supported in keys and values, subject to quoting and escaping rules. +- Encoders MUST NOT apply locale-dependent formatting for numbers or booleans (e.g., no thousands separators). +- ISO 8601 strings SHOULD be used for Date normalization. + +## 17. Interoperability and Mappings (Informative) + +- JSON: + - TOON deterministically encodes JSON-compatible data (after normalization). + - Arrays of uniform objects map to CSV-like rows; other structures map to YAML-like nested forms. +- CSV: + - TOON tabular sections generalize CSV with explicit lengths, field lists, and flexible delimiter choice. +- YAML: + - TOON borrows indentation and list-item patterns but uses fewer quotes and explicit array headers. + +## 18. Media Type and File Extensions (Provisional) + +- Suggested media type: text/toon +- Suggested file extension: .toon +- Encoding: UTF-8 +- Line endings: LF (U+000A) + +## 19. Examples (Informative) + +Objects: +``` +id: 123 +name: Ada +active: true +``` + +Nested objects: +``` +user: + id: 123 + name: Ada +``` + +Primitive arrays: +``` +tags[3]: admin,ops,dev +``` + +Arrays of arrays (primitives): +``` +pairs[2]: + - [2]: 1,2 + - [2]: 3,4 +``` + +Tabular arrays: +``` +items[2]{sku,qty,price}: + A1,2,9.99 + B2,1,14.5 +``` + +Mixed arrays: +``` +items[3]: + - 1 + - a: 1 + - text +``` + +Objects as list items (first field on hyphen line): +``` +items[2]: + - id: 1 + name: First + - id: 2 + name: Second + extra: true +``` + +Nested tabular inside a list item: +``` +items[1]: + - users[2]{id,name}: + 1,Ada + 2,Bob + status: active +``` + +Delimiter variations: +``` +# Tab delimiter +items[2 ]{sku name qty price}: + A1 Widget 2 9.99 + B2 Gadget 1 14.5 + +# Pipe delimiter +tags[3|]: reading|gaming|coding +``` + +Length marker: +``` +tags[#3]: reading,gaming,coding +pairs[#2]: + - [#2]: a,b + - [#2]: c,d +``` + +Quoted colons and disambiguation (rows continue; colon is inside quotes): +``` +links[2]{id,url}: + 1,"http://a:b" + 2,"https://example.com?q=a:b" +``` + +## 20. Parsing Helpers (Informative) + +These sketches illustrate structure and common decoding helpers. They are informative; normative behavior is defined in Sections 4–12 and 14. + +### 20.1 Decoding Overview + +- Split input into lines; compute depth from leading spaces and indent size (Section 12). +- Skip ignorable blank lines outside arrays/tabular rows (Section 12). +- Decide root form per Section 5. +- For objects at depth d: process lines at depth d; for arrays at depth d: read rows/list items at depth d+1. + +### 20.2 Array Header Parsing + +- Locate the first "[ … ]" segment on the line; parse: + - Optional leading "#" marker (ignored semantically). + - Length N as decimal integer. + - Optional delimiter symbol at the end: HTAB or pipe (comma otherwise). +- If a "{ … }" fields segment occurs between the "]" and the ":", parse field names using the active delimiter; unescape quoted names. +- Require a colon ":" after the bracket/fields segment. +- Return the header (key?, length, delimiter, fields?, hasLengthMarker) and any inline values after the colon. +- Absence of a delimiter symbol in the bracket segment ALWAYS means comma for that header (no inheritance). + +### 20.3 parseDelimitedValues + +- Iterate characters left-to-right while maintaining a current token and an inQuotes flag. +- On a double quote, toggle inQuotes. +- While inQuotes, treat backslash + next char as a literal pair (string parser validates later). +- Only split on the active delimiter when not in quotes (unquoted occurrences). +- Trim surrounding spaces around each token. Empty tokens decode to empty string. + +### 20.4 Primitive Token Parsing + +- If token starts with a quote, it MUST be a properly quoted string (no trailing characters after the closing quote). Unescape using only the five escapes; otherwise MUST error. +- Else if token is true/false/null → boolean/null. +- Else if token is numeric without forbidden leading zeros and finite → number. +- Else → string. + +### 20.5 Object and List Item Parsing + +- Key-value line: parse a key up to the first colon; missing colon → MUST error. The remainder of the line is the primitive value (if present). +- Nested object: "key:" with nothing after colon opens a nested object. If this is: + - A field inside a regular object: nested fields are at depth +1 relative to that line. + - The first field on a list-item hyphen line: nested fields at depth +2 relative to the hyphen line; subsequent fields at +1. +- List items: + - Lines start with "- " at one deeper depth than the parent array header. + - After "- ": + - If "[ … ]:" appears → inline array item; decode with its own header and active delimiter. + - Else if a colon appears → object with first field on hyphen line. + - Else → primitive token. + +### 20.6 Blank-Line Handling + +- Track blank lines during scanning with line numbers and depth. +- For arrays/tabular rows: + - In strict mode, any blank line between the first and last item/row line MUST error. + - In non-strict mode, blank lines MAY be ignored and not counted as items/rows. +- Outside arrays/tabular rows: + - Blank lines SHOULD be ignored (do not affect root-form detection or object boundaries). + +## 21. Test Suite and Compliance (Informative) + +Implementations are encouraged to validate against a comprehensive test suite covering: +- Primitive encoding/decoding, quoting, control-character escaping. +- Object key encoding/decoding and order preservation. +- Primitive arrays (inline), empty arrays. +- Arrays of arrays (expanded), mixed-length and empty inner arrays. +- Tabular detection and formatting, including delimiter variations. +- Mixed arrays and objects-as-list-items behavior, including nested arrays and objects. +- Whitespace invariants (no trailing spaces/newline). +- Normalization (BigInt, Date, undefined, NaN/Infinity, functions, symbols). +- Decoder strict-mode errors: count mismatches, invalid escapes, missing colon, delimiter mismatches, indentation errors, blank-line handling. + +## 22. TOON Core Profile (Normative Subset) + +This profile captures the most common, memory-friendly rules. + +- Character set: UTF-8; LF line endings. +- Indentation: 2 spaces per level (configurable indentSize). + - Strict mode: leading spaces MUST be a multiple of indentSize; tabs in indentation MUST error. +- Keys: + - Unquoted if they match ^[A-Za-z_][\w.]*$; otherwise quoted. + - A colon MUST follow a key. +- Strings: + - Only these escapes allowed in quotes: \\, \", \n, \r, \t. + - Quote if empty; leading/trailing whitespace; equals true/false/null; numeric-like; contains colon/backslash/quote/brackets/braces/control char; contains the relevant delimiter (active inside arrays, document otherwise); equals "-" or starts with "-". +- Numbers: + - Encoder emits non-exponential decimal; -0 → 0. + - Decoder accepts decimal and exponent forms; tokens with forbidden leading zeros decode as strings. +- Arrays and headers: + - Header: [#?N[delim?]] where delim is absent (comma), HTAB (tab), or "|" (pipe). + - Keyed header: key[#?N[delim?]]:. Optional fields: {f1f2}. + - Primitive arrays inline: key[N]: v1v2. Empty arrays: key[0]: (no values). + - Tabular arrays: key[N]{fields}: then N rows at depth +1. + - Otherwise list form: key[N]: then N items, each starting with "- ". +- Delimiters: + - Only split on the active delimiter from the nearest header. Non-active delimiters never split. +- Objects as list items: + - "- value" (primitive), "- [M]: …" (inline array), or "- key: …" (object). + - If first field is "- key:" with nested object: nested fields at +2; subsequent sibling fields at +1. +- Root form: + - Root array if the first depth-0 line is a header (per Section 6). + - Root primitive if exactly one non-empty line and it is not a header or key-value. + - Otherwise object. +- Strict mode checks: + - All count/width checks; missing colon; invalid escapes; indentation multiple-of-indentSize; delimiter mismatches via count checks; blank lines inside arrays/tabular rows; empty input. + +## 23. Versioning and Extensibility + +- Backward-compatible evolutions SHOULD preserve current headers, quoting rules, and indentation semantics. +- Reserved/structural characters (colon, brackets, braces, hyphen) MUST retain current meanings. +- Future work (non-normative): schemas, comments/annotations, additional delimiter profiles, optional \uXXXX escapes (if added, must be precisely defined). + +## 24. Acknowledgments and License + +- Credits: Author and contributors; ports in other languages (Elixir, PHP, Python, Ruby, Java, .NET, Swift, Go). +- License: MIT (see repository for details). + +--- + +Appendix: Cross-check With Reference Behavior (Informative) + +- The reference encoder/decoder test suites implement: + - Safe-unquoted string rules and delimiter-aware quoting (document vs active delimiter). + - Header formation and delimiter-aware parsing with active delimiter scoping. + - Length marker propagation (encoding) and acceptance (decoding). + - Tabular detection requiring uniform keys and primitive-only values. + - Objects-as-list-items parsing (+2 nested object rule; +1 siblings). + - Whitespace invariants for encoding and strict-mode indentation enforcement for decoding. + - Blank-line handling and trailing-newline acceptance. diff --git a/ToonSharp.Tests/ToonSerializerTests.cs b/ToonSharp.Tests/ToonSerializerTests.cs new file mode 100644 index 0000000..75f541f --- /dev/null +++ b/ToonSharp.Tests/ToonSerializerTests.cs @@ -0,0 +1,421 @@ +using System.Text.Json.Nodes; +using Xunit; + +namespace ToonSharp.Tests; + +public class ToonSerializerTests +{ + [Fact] + public void Serialize_SimpleObject_ReturnsCorrectToon() + { + // Arrange + var obj = new + { + id = 123, + name = "Ada", + active = true + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "id: 123\nname: Ada\nactive: true"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_NestedObject_ReturnsCorrectToon() + { + // Arrange + var obj = new + { + user = new + { + id = 123, + name = "Ada" + } + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "user:\n id: 123\n name: Ada"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_PrimitiveArray_ReturnsCorrectToon() + { + // Arrange + var obj = new + { + tags = new[] { "admin", "ops", "dev" } + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "tags[3]: admin,ops,dev"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_ArrayOfArrays_ReturnsCorrectToon() + { + // Arrange + var obj = new + { + pairs = new[] + { + new[] { 1, 2 }, + new[] { 3, 4 } + } + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "pairs[2]:\n - [2]: 1,2\n - [2]: 3,4"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_TabularArray_ReturnsCorrectToon() + { + // Arrange + var obj = new + { + items = new[] + { + new { sku = "A1", qty = 2, price = 9.99 }, + new { sku = "B2", qty = 1, price = 14.5 } + } + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_MixedArray_ReturnsCorrectToon() + { + // Arrange + var items = new JsonArray + { + JsonValue.Create(1), + new JsonObject { ["a"] = 1 }, + JsonValue.Create("text") + }; + var obj = new JsonObject { ["items"] = items }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + var expected = "items[3]:\n - 1\n - a: 1\n - text"; + Assert.Equal(expected, toon); + } + + [Fact] + public void Serialize_ObjectsAsListItems_ReturnsCorrectToon() + { + // Arrange + var items = new JsonArray + { + new JsonObject { ["id"] = 1, ["name"] = "First" }, + new JsonObject { ["id"] = 2, ["name"] = "Second", ["extra"] = true } + }; + var obj = new JsonObject { ["items"] = items }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + Assert.Contains("items[2]", toon); + } + + [Fact] + public void Deserialize_SimpleObject_ReturnsCorrectObject() + { + // Arrange + var toon = "id: 123\nname: Ada\nactive: true"; + + // Act + var result = ToonSerializer.Deserialize(toon); + + // Assert + Assert.NotNull(result); + var obj = result.AsObject(); + Assert.Equal(123, obj["id"]!.GetValue()); + Assert.Equal("Ada", obj["name"]!.GetValue()); + Assert.True(obj["active"]!.GetValue()); + } + + [Fact] + public void Deserialize_PrimitiveArray_ReturnsCorrectArray() + { + // Arrange + var toon = "tags[3]: admin,ops,dev"; + + // Act + var result = ToonSerializer.Deserialize(toon); + + // Assert + Assert.NotNull(result); + var obj = result.AsObject(); + var tags = obj["tags"] as JsonArray; + Assert.NotNull(tags); + Assert.Equal(3, tags.Count); + Assert.Equal("admin", tags[0]!.GetValue()); + Assert.Equal("ops", tags[1]!.GetValue()); + Assert.Equal("dev", tags[2]!.GetValue()); + } + + [Fact] + public void Deserialize_TabularArray_ReturnsCorrectObjects() + { + // Arrange + var toon = "items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5"; + + // Act + var result = ToonSerializer.Deserialize(toon); + + // Assert + Assert.NotNull(result); + var obj = result.AsObject(); + var items = obj["items"] as JsonArray; + Assert.NotNull(items); + Assert.Equal(2, items.Count); + + var first = items[0] as JsonObject; + Assert.NotNull(first); + Assert.Equal("A1", first["sku"]!.GetValue()); + Assert.Equal(2, first["qty"]!.GetValue()); + Assert.Equal(9.99, first["price"]!.GetValue()); + } + + [Fact] + public void Serialize_EmptyObject_ReturnsEmptyString() + { + // Arrange + var obj = new { }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + Assert.Equal("", toon); + } + + [Fact] + public void Serialize_NullValue_ReturnsNull() + { + // Arrange + var obj = new + { + value = (string?)null + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + Assert.Equal("value: null", toon); + } + + [Fact] + public void Serialize_QuotedStrings_HandlesSpecialCharacters() + { + // Arrange + var obj = new + { + colon = "a:b", + comma = "a,b", + quote = "a\"b", + newline = "a\nb", + empty = "" + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + Assert.Contains("colon: \"a:b\"", toon); + Assert.Contains("comma: \"a,b\"", toon); + Assert.Contains("quote: \"a\\\"b\"", toon); + Assert.Contains("newline: \"a\\nb\"", toon); + Assert.Contains("empty: \"\"", toon); + } + + [Fact] + public void Serialize_WithTabDelimiter_UsesTabsInHeader() + { + // Arrange + var obj = new + { + tags = new[] { "reading", "gaming", "coding" } + }; + + var options = new ToonSerializerOptions + { + Delimiter = ToonDelimiter.Tab + }; + + // Act + var toon = ToonSerializer.Serialize(obj, options); + + // Assert + Assert.Contains("tags[3\t]:", toon); + Assert.Contains("\t", toon.Split(':')[1]); + } + + [Fact] + public void Serialize_WithPipeDelimiter_UsesPipesInHeader() + { + // Arrange + var obj = new + { + tags = new[] { "reading", "gaming", "coding" } + }; + + var options = new ToonSerializerOptions + { + Delimiter = ToonDelimiter.Pipe + }; + + // Act + var toon = ToonSerializer.Serialize(obj, options); + + // Assert + Assert.Contains("tags[3|]:", toon); + Assert.Contains("|", toon.Split(':')[1]); + } + + [Fact] + public void Serialize_WithLengthMarker_IncludesHashInHeader() + { + // Arrange + var obj = new + { + tags = new[] { "reading", "gaming", "coding" } + }; + + var options = new ToonSerializerOptions + { + UseLengthMarker = true + }; + + // Act + var toon = ToonSerializer.Serialize(obj, options); + + // Assert + Assert.Contains("tags[#3]:", toon); + } + + [Fact] + public void RoundTrip_ComplexObject_PreservesData() + { + // Arrange + var original = new + { + id = 123, + name = "Test User", + scores = new[] { 95, 87, 92 }, + settings = new + { + theme = "dark", + notifications = true + }, + tags = new[] { "admin", "developer" } + }; + + // Act + var toon = ToonSerializer.Serialize(original); + var result = ToonSerializer.Deserialize(toon); + + // Assert + Assert.NotNull(result); + var deserialized = result.AsObject(); + Assert.Equal(123, deserialized["id"]!.GetValue()); + Assert.Equal("Test User", deserialized["name"]!.GetValue()); + + var scores = deserialized["scores"] as JsonArray; + Assert.NotNull(scores); + Assert.Equal(3, scores.Count); + + var settings = deserialized["settings"] as JsonObject; + Assert.NotNull(settings); + Assert.Equal("dark", settings["theme"]!.GetValue()); + Assert.True(settings["notifications"]!.GetValue()); + } + + [Fact] + public void Deserialize_StrictMode_ThrowsOnCountMismatch() + { + // Arrange + var toon = "tags[3]: admin,ops"; // Only 2 values, not 3 + + var options = new ToonSerializerOptions + { + Strict = true + }; + + // Act & Assert + Assert.Throws(() => ToonSerializer.Deserialize(toon, options)); + } + + [Fact] + public void Deserialize_NonStrictMode_AllowsCountMismatch() + { + // Arrange + var toon = "tags[3]: admin,ops"; // Only 2 values, not 3 + + var options = new ToonSerializerOptions + { + Strict = false + }; + + // Act + var result = ToonSerializer.Deserialize(toon, options); + + // Assert + Assert.NotNull(result); + var obj = result.AsObject(); + var tags = obj["tags"] as JsonArray; + Assert.NotNull(tags); + Assert.Equal(2, tags.Count); // Should have 2 items despite header saying 3 + } + + [Fact] + public void Serialize_NumbersWithoutExponent_UsesDecimalNotation() + { + // Arrange + var obj = new + { + large = 1000000, + small = 0.000001, + value = 42 + }; + + // Act + var toon = ToonSerializer.Serialize(obj); + + // Assert + Assert.Contains("large: 1000000", toon); + Assert.Contains("small: 0.000001", toon); + Assert.Contains("value: 42", toon); + // Check that scientific notation (e+ or e-) is not used + Assert.DoesNotContain("e+", toon); + Assert.DoesNotContain("e-", toon); + Assert.DoesNotContain("E+", toon); + Assert.DoesNotContain("E-", toon); + } +} diff --git a/ToonSharp.Tests/ToonSharp.Tests.csproj b/ToonSharp.Tests/ToonSharp.Tests.csproj new file mode 100644 index 0000000..c0535be --- /dev/null +++ b/ToonSharp.Tests/ToonSharp.Tests.csproj @@ -0,0 +1,25 @@ + + + + net9.0 + enable + enable + false + + + + + + + + + + + + + + + + + + diff --git a/ToonSharp.sln b/ToonSharp.sln new file mode 100644 index 0000000..d94b99d --- /dev/null +++ b/ToonSharp.sln @@ -0,0 +1,45 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToonSharp", "ToonSharp\ToonSharp.csproj", "{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToonSharp.Tests", "ToonSharp.Tests\ToonSharp.Tests.csproj", "{CC73B156-3947-43E3-8521-B9D20AF943CD}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x64.ActiveCfg = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x64.Build.0 = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x86.ActiveCfg = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x86.Build.0 = Debug|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|Any CPU.Build.0 = Release|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x64.ActiveCfg = Release|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x64.Build.0 = Release|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x86.ActiveCfg = Release|Any CPU + {9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x86.Build.0 = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x64.ActiveCfg = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x64.Build.0 = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x86.ActiveCfg = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x86.Build.0 = Debug|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|Any CPU.Build.0 = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x64.ActiveCfg = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x64.Build.0 = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x86.ActiveCfg = Release|Any CPU + {CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/ToonSharp/ToonDelimiter.cs b/ToonSharp/ToonDelimiter.cs new file mode 100644 index 0000000..1b0142d --- /dev/null +++ b/ToonSharp/ToonDelimiter.cs @@ -0,0 +1,22 @@ +namespace ToonSharp; + +/// +/// Specifies the delimiter character used for separating array values and tabular fields in TOON format. +/// +public enum ToonDelimiter : byte +{ + /// + /// Comma delimiter (,) - the default delimiter. + /// + Comma = 0, + + /// + /// Tab delimiter (HTAB, U+0009) for tab-separated values. + /// + Tab = 1, + + /// + /// Pipe delimiter (|) for pipe-separated values. + /// + Pipe = 2 +} diff --git a/ToonSharp/ToonException.cs b/ToonSharp/ToonException.cs new file mode 100644 index 0000000..ade5e38 --- /dev/null +++ b/ToonSharp/ToonException.cs @@ -0,0 +1,74 @@ +namespace ToonSharp; + +/// +/// The exception that is thrown when an error occurs during TOON serialization or deserialization. +/// +public sealed class ToonException : Exception +{ + /// + /// Gets the line number where the error occurred, or null if not applicable. + /// + public int? LineNumber { get; init; } + + /// + /// Gets the column number where the error occurred, or null if not applicable. + /// + public int? ColumnNumber { get; init; } + + /// + /// Initializes a new instance of the class. + /// + public ToonException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The message that describes the error. + public ToonException(string message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message + /// and line number. + /// + /// The message that describes the error. + /// The line number where the error occurred. + public ToonException(string message, int lineNumber) : base(FormatMessage(message, lineNumber, null)) + { + LineNumber = lineNumber; + } + + /// + /// Initializes a new instance of the class with a specified error message, + /// line number, and column number. + /// + /// The message that describes the error. + /// The line number where the error occurred. + /// The column number where the error occurred. + public ToonException(string message, int lineNumber, int columnNumber) + : base(FormatMessage(message, lineNumber, columnNumber)) + { + LineNumber = lineNumber; + ColumnNumber = columnNumber; + } + + /// + /// Initializes a new instance of the class with a specified error message + /// and a reference to the inner exception that is the cause of this exception. + /// + /// The message that describes the error. + /// The exception that is the cause of the current exception. + public ToonException(string message, Exception innerException) : base(message, innerException) + { + } + + private static string FormatMessage(string message, int lineNumber, int? columnNumber) + { + return columnNumber.HasValue + ? $"Line {lineNumber}, Column {columnNumber}: {message}" + : $"Line {lineNumber}: {message}"; + } +} diff --git a/ToonSharp/ToonHelpers.cs b/ToonSharp/ToonHelpers.cs new file mode 100644 index 0000000..2f9b71b --- /dev/null +++ b/ToonSharp/ToonHelpers.cs @@ -0,0 +1,287 @@ +using System.Runtime.CompilerServices; +using System.Text; + +namespace ToonSharp; + +/// +/// Internal helper methods for TOON serialization and deserialization. +/// +internal static class ToonHelpers +{ + /// + /// Gets the character representation of a delimiter. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static char GetDelimiterChar(ToonDelimiter delimiter) => delimiter switch + { + ToonDelimiter.Comma => ',', + ToonDelimiter.Tab => '\t', + ToonDelimiter.Pipe => '|', + _ => ',' + }; + + /// + /// Gets the string representation of a delimiter for headers. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static string GetDelimiterString(ToonDelimiter delimiter) => delimiter switch + { + ToonDelimiter.Comma => "", + ToonDelimiter.Tab => "\t", + ToonDelimiter.Pipe => "|", + _ => "" + }; + + /// + /// Tries to parse a delimiter from a character. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryParseDelimiter(char c, out ToonDelimiter delimiter) + { + delimiter = c switch + { + ',' => ToonDelimiter.Comma, + '\t' => ToonDelimiter.Tab, + '|' => ToonDelimiter.Pipe, + _ => ToonDelimiter.Comma + }; + return c is ',' or '\t' or '|'; + } + + /// + /// Determines if a string needs to be quoted according to TOON quoting rules. + /// + public static bool RequiresQuoting(ReadOnlySpan value, ToonDelimiter delimiter) + { + if (value.IsEmpty) + return true; + + // Check for leading/trailing whitespace + if (char.IsWhiteSpace(value[0]) || char.IsWhiteSpace(value[^1])) + return true; + + // Check for reserved literals + if (value is "true" || value is "false" || value is "null") + return true; + + // Check for hyphen at the start + if (value[0] == '-') + return true; + + // Check for numeric patterns + if (IsNumericLike(value)) + return true; + + var delimiterChar = GetDelimiterChar(delimiter); + + // Check each character + foreach (var c in value) + { + if (c == ':' || c == '"' || c == '\\' || + c == '[' || c == ']' || c == '{' || c == '}' || + c == '\n' || c == '\r' || c == '\t' || + c == delimiterChar) + { + return true; + } + } + + return false; + } + + /// + /// Checks if a string looks like a number. + /// + private static bool IsNumericLike(ReadOnlySpan value) + { + if (value.IsEmpty) + return false; + + var span = value; + var index = 0; + + // Optional negative sign + if (span[0] == '-') + { + if (span.Length == 1) + return false; + index = 1; + } + + // Check for leading zeros (e.g., "05", "0001") + if (span.Length > index + 1 && span[index] == '0' && char.IsDigit(span[index + 1])) + return true; // Forbidden leading zeros + + var hasDigit = false; + var hasDot = false; + var hasE = false; + + for (; index < span.Length; index++) + { + var c = span[index]; + + if (char.IsDigit(c)) + { + hasDigit = true; + } + else if (c == '.' && !hasDot && !hasE) + { + hasDot = true; + } + else if ((c == 'e' || c == 'E') && !hasE && hasDigit) + { + hasE = true; + hasDigit = false; // Reset for exponent part + + // Check for optional sign after 'e' + if (index + 1 < span.Length && (span[index + 1] == '+' || span[index + 1] == '-')) + index++; + } + else + { + return false; + } + } + + return hasDigit; + } + + /// + /// Determines if a key can be unquoted (matches an identifier pattern). + /// + public static bool IsValidUnquotedKey(ReadOnlySpan key) + { + if (key.IsEmpty) + return false; + + var first = key[0]; + if (!char.IsLetter(first) && first != '_') + return false; + + for (int i = 1; i < key.Length; i++) + { + var c = key[i]; + if (!char.IsLetterOrDigit(c) && c != '_' && c != '.') + return false; + } + + return true; + } + + /// + /// Escapes a string for use in TOON format. + /// + public static string Escape(ReadOnlySpan value) + { + // Fast path: no escaping needed + var needsEscaping = false; + foreach (var c in value) + { + if (c is '\\' or '"' or '\n' or '\r' or '\t') + { + needsEscaping = true; + break; + } + } + + if (!needsEscaping) + return new string(value); + + var sb = new StringBuilder(value.Length + 4); + + foreach (var c in value) + { + switch (c) + { + case '\\': + sb.Append("\\\\"); + break; + case '"': + sb.Append("\\\""); + break; + case '\n': + sb.Append("\\n"); + break; + case '\r': + sb.Append("\\r"); + break; + case '\t': + sb.Append("\\t"); + break; + default: + sb.Append(c); + break; + } + } + + return sb.ToString(); + } + + /// + /// Unescapes a TOON-escaped string. + /// + public static string Unescape(ReadOnlySpan value) + { + var index = value.IndexOf('\\'); + if (index < 0) + return new string(value); + + var sb = new StringBuilder(value.Length); + var pos = 0; + + while (index >= 0) + { + // Append characters before the escape + sb.Append(value.Slice(pos, index - pos)); + + if (index + 1 >= value.Length) + throw new ToonException("Unterminated escape sequence"); + + var escapeChar = value[index + 1]; + switch (escapeChar) + { + case '\\': + sb.Append('\\'); + break; + case '"': + sb.Append('"'); + break; + case 'n': + sb.Append('\n'); + break; + case 'r': + sb.Append('\r'); + break; + case 't': + sb.Append('\t'); + break; + default: + throw new ToonException($"Invalid escape sequence: \\{escapeChar}"); + } + + pos = index + 2; + index = value.Slice(pos).IndexOf('\\'); + if (index >= 0) + index += pos; + } + + // Append remaining characters + if (pos < value.Length) + sb.Append(value.Slice(pos)); + + return sb.ToString(); + } + + /// + /// Creates an indentation string with the specified depth and size. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static string GetIndentation(int depth, int indentSize) + { + if (depth == 0) + return string.Empty; + + var totalSpaces = depth * indentSize; + return new string(' ', totalSpaces); + } +} diff --git a/ToonSharp/ToonReader.cs b/ToonSharp/ToonReader.cs new file mode 100644 index 0000000..7fb0cb2 --- /dev/null +++ b/ToonSharp/ToonReader.cs @@ -0,0 +1,645 @@ +using System.Globalization; +using System.Text.Json.Nodes; + +namespace ToonSharp; + +/// +/// Reads TOON-formatted text and produces JsonNode structures. +/// +internal sealed class ToonReader +{ + private readonly ToonSerializerOptions _options; + private string[] _lines = []; + private int _currentLine; + + public ToonReader(ToonSerializerOptions options) + { + _options = options; + } + + public JsonNode? Read(string toon) + { + _lines = toon.Split('\n'); + _currentLine = 0; + + // Skip blank lines and handle empty input + var nonEmptyLines = _lines + .Select((line, index) => (line, index)) + .Where(x => !IsBlankLine(x.line)) + .ToList(); + + if (nonEmptyLines.Count == 0) + { + if (_options.Strict) + throw new ToonException("Empty input"); + return null; + } + + // Determine root form + var firstLine = nonEmptyLines[0].line; + var firstDepth = GetDepth(firstLine, nonEmptyLines[0].index + 1); + + if (firstDepth != 0) + throw new ToonException("First non-empty line must be at depth 0", nonEmptyLines[0].index + 1); + + // Check if it's a root array header + if (IsRootArrayHeader(firstLine)) + { + return ParseRootArray(); + } + + // Check if it's a single primitive (exactly one line, not a header, not key:value) + if (nonEmptyLines.Count == 1 && !firstLine.Contains(':')) + { + return ParsePrimitive(firstLine.Trim(), ToonDelimiter.Comma); + } + + // Otherwise, it's an object + return ParseRootObject(); + } + + private bool IsBlankLine(string line) + { + return string.IsNullOrWhiteSpace(line); + } + + private bool IsRootArrayHeader(string line) + { + var trimmed = line.TrimStart(); + return trimmed.StartsWith('[') && trimmed.Contains(']') && trimmed.Contains(':'); + } + + private JsonNode ParseRootObject() + { + var obj = new JsonObject(); + ParseObjectFields(obj, 0); + return obj; + } + + private JsonNode ParseRootArray() + { + var line = GetCurrentLine(); + var header = ParseArrayHeader(line, _currentLine + 1, null); + _currentLine++; + + var array = new JsonArray(); + + if (header.Count == 0) + return array; + + if (header.IsInline) + { + // Inline primitive array + ParseInlineArrayValues(array, header); + } + else if (header.Fields is not null) + { + // Tabular array + ParseTabularRows(array, header, 1); + } + else + { + // Expanded array + ParseExpandedArrayItems(array, header, 1); + } + + return array; + } + + private void ParseObjectFields(JsonObject obj, int expectedDepth) + { + while (_currentLine < _lines.Length) + { + var line = _lines[_currentLine]; + + if (IsBlankLine(line)) + { + _currentLine++; + continue; + } + + var depth = GetDepth(line, _currentLine + 1); + + if (depth < expectedDepth) + break; + + if (depth > expectedDepth) + throw new ToonException($"Unexpected indentation depth {depth}, expected {expectedDepth}", _currentLine + 1); + + var trimmed = line.TrimStart(); + + // Parse key-value or nested structure + var colonIndex = FindUnquotedChar(trimmed, ':'); + if (colonIndex < 0) + { + if (_options.Strict) + throw new ToonException("Missing colon after key", _currentLine + 1); + _currentLine++; + continue; + } + + var keyPart = trimmed.Substring(0, colonIndex); + var valuePart = trimmed.Substring(colonIndex + 1).TrimStart(); + + // Check if it's an array header + if (keyPart.Contains('[') && keyPart.Contains(']')) + { + // Extract key before the bracket + var bracketIndex = keyPart.IndexOf('['); + var keyBeforeBracket = keyPart.Substring(0, bracketIndex); + var key = ParseKey(keyBeforeBracket); + + var header = ParseArrayHeader(trimmed, _currentLine + 1, key); + _currentLine++; + + var array = new JsonArray(); + + if (header.IsInline) + { + ParseInlineArrayValues(array, header); + } + else if (header.Fields is not null) + { + ParseTabularRows(array, header, expectedDepth + 1); + } + else + { + ParseExpandedArrayItems(array, header, expectedDepth + 1); + } + + obj[key] = array; + } + else + { + var key = ParseKey(keyPart); + + if (string.IsNullOrWhiteSpace(valuePart)) + { + // Nested object + _currentLine++; + var nestedObj = new JsonObject(); + ParseObjectFields(nestedObj, expectedDepth + 1); + obj[key] = nestedObj; + } + else + { + // Primitive value + var value = ParsePrimitive(valuePart, _options.Delimiter); + obj[key] = value; + _currentLine++; + } + } + } + } + + private void ParseInlineArrayValues(JsonArray array, ArrayHeader header) + { + if (string.IsNullOrWhiteSpace(header.InlineValues)) + return; + + var values = SplitByDelimiter(header.InlineValues, header.Delimiter); + + if (_options.Strict && values.Count != header.Count) + { + throw new ToonException( + $"Array count mismatch: expected {header.Count}, got {values.Count}", + _currentLine + 1); + } + + foreach (var val in values) + { + array.Add(ParsePrimitive(val.Trim(), header.Delimiter)); + } + } + + private void ParseTabularRows(JsonArray array, ArrayHeader header, int expectedDepth) + { + var rowCount = 0; + + while (_currentLine < _lines.Length) + { + var line = _lines[_currentLine]; + + if (IsBlankLine(line)) + { + if (_options.Strict) + throw new ToonException("Blank lines not allowed inside tabular arrays", _currentLine + 1); + _currentLine++; + continue; + } + + var depth = GetDepth(line, _currentLine + 1); + + if (depth < expectedDepth) + break; + + if (depth > expectedDepth) + throw new ToonException($"Unexpected indentation in tabular row", _currentLine + 1); + + var trimmed = line.TrimStart(); + + // Check if this is a row or a new key-value line + if (IsTabularRow(trimmed, header.Delimiter)) + { + var values = SplitByDelimiter(trimmed, header.Delimiter); + + if (_options.Strict && header.Fields != null && values.Count != header.Fields.Count) + { + throw new ToonException( + $"Row width mismatch: expected {header.Fields.Count} values, got {values.Count}", + _currentLine + 1); + } + + var obj = new JsonObject(); + if (header.Fields != null) + for (int i = 0; i < Math.Min(values.Count, header.Fields.Count); i++) + { + obj[header.Fields[i]] = ParsePrimitive(values[i].Trim(), header.Delimiter); + } + + array.Add(obj); + rowCount++; + _currentLine++; + } + else + { + // End of rows + break; + } + } + + if (_options.Strict && rowCount != header.Count) + { + throw new ToonException( + $"Tabular array count mismatch: expected {header.Count} rows, got {rowCount}", + _currentLine + 1); + } + } + + private bool IsTabularRow(string line, ToonDelimiter delimiter) + { + var delimiterIndex = FindUnquotedChar(line, ToonHelpers.GetDelimiterChar(delimiter)); + var colonIndex = FindUnquotedChar(line, ':'); + + if (colonIndex < 0) + return true; // No colon = row + + if (delimiterIndex < 0) + return false; // Colon but no delimiter = key-value + + return delimiterIndex < colonIndex; // Delimiter before colon = row + } + + private void ParseExpandedArrayItems(JsonArray array, ArrayHeader header, int expectedDepth) + { + var itemCount = 0; + + while (_currentLine < _lines.Length) + { + var line = _lines[_currentLine]; + + if (IsBlankLine(line)) + { + if (_options.Strict) + throw new ToonException("Blank lines not allowed inside arrays", _currentLine + 1); + _currentLine++; + continue; + } + + var depth = GetDepth(line, _currentLine + 1); + + if (depth < expectedDepth) + break; + + if (depth > expectedDepth) + throw new ToonException($"Unexpected indentation in array item", _currentLine + 1); + + var trimmed = line.TrimStart(); + + if (!trimmed.StartsWith("- ")) + break; + + var itemContent = trimmed.Substring(2); + + // Determine item type + if (itemContent.StartsWith('[')) + { + // Inline array item + var itemHeader = ParseArrayHeader(itemContent, _currentLine + 1, null); + var innerArray = new JsonArray(); + ParseInlineArrayValues(innerArray, itemHeader); + array.Add(innerArray); + _currentLine++; + } + else if (itemContent.Contains(':')) + { + // Object item + var obj = ParseObjectListItem(itemContent, expectedDepth); + array.Add(obj); + } + else + { + // Primitive item + array.Add(ParsePrimitive(itemContent, header.Delimiter)); + _currentLine++; + } + + itemCount++; + } + + if (_options.Strict && itemCount != header.Count) + { + throw new ToonException( + $"Array count mismatch: expected {header.Count} items, got {itemCount}", + _currentLine + 1); + } + } + + private JsonObject ParseObjectListItem(string firstFieldLine, int itemDepth) + { + var obj = new JsonObject(); + + // Parse first field from the hyphen line + var colonIndex = FindUnquotedChar(firstFieldLine, ':'); + if (colonIndex < 0) + { + if (_options.Strict) + throw new ToonException("Missing colon in object field", _currentLine + 1); + _currentLine++; + return obj; + } + + var keyPart = firstFieldLine.Substring(0, colonIndex); + var valuePart = firstFieldLine.Substring(colonIndex + 1).TrimStart(); + + var key = ParseKey(keyPart); + + // Check if it's an array + if (keyPart.Contains('[') && keyPart.Contains(']')) + { + var header = ParseArrayHeader(firstFieldLine, _currentLine + 1, key); + _currentLine++; + + var array = new JsonArray(); + + if (header.IsInline) + { + ParseInlineArrayValues(array, header); + } + else if (header.Fields is not null) + { + ParseTabularRows(array, header, itemDepth + 1); + } + else + { + ParseExpandedArrayItems(array, header, itemDepth + 1); + } + + obj[key] = array; + } + else if (string.IsNullOrWhiteSpace(valuePart)) + { + // Nested object - fields at depth + 2 + _currentLine++; + var nestedObj = new JsonObject(); + ParseObjectFields(nestedObj, itemDepth + 2); + obj[key] = nestedObj; + } + else + { + obj[key] = ParsePrimitive(valuePart, _options.Delimiter); + _currentLine++; + } + + // Parse remaining fields at itemDepth + ParseObjectFields(obj, itemDepth); + + return obj; + } + + private ArrayHeader ParseArrayHeader(string line, int lineNumber, string? key) + { + var bracketStart = line.IndexOf('['); + var bracketEnd = line.IndexOf(']'); + + if (bracketStart < 0 || bracketEnd < 0 || bracketEnd <= bracketStart) + throw new ToonException("Invalid array header", lineNumber); + + var bracketContent = line.Substring(bracketStart + 1, bracketEnd - bracketStart - 1); + + // Parse length marker and delimiter + var hasLengthMarker = bracketContent.StartsWith('#'); + if (hasLengthMarker) + bracketContent = bracketContent.Substring(1); + + // Detect delimiter + var delimiter = ToonDelimiter.Comma; + if (bracketContent.EndsWith('\t')) + { + delimiter = ToonDelimiter.Tab; + bracketContent = bracketContent.Substring(0, bracketContent.Length - 1); + } + else if (bracketContent.EndsWith('|')) + { + delimiter = ToonDelimiter.Pipe; + bracketContent = bracketContent.Substring(0, bracketContent.Length - 1); + } + + if (!int.TryParse(bracketContent, out var count)) + throw new ToonException($"Invalid array length: {bracketContent}", lineNumber); + + // Parse fields if present + List? fields = null; + var fieldsStart = line.IndexOf('{', bracketEnd); + var fieldsEnd = line.IndexOf('}', bracketEnd); + + if (fieldsStart >= 0 && fieldsEnd > fieldsStart) + { + var fieldsContent = line.Substring(fieldsStart + 1, fieldsEnd - fieldsStart - 1); + var fieldNames = SplitByDelimiter(fieldsContent, delimiter); + fields = fieldNames.Select(f => ParseKey(f.Trim())).ToList(); + } + + // Check for inline values + var colonIndex = line.IndexOf(':', bracketEnd); + if (colonIndex < 0) + throw new ToonException("Missing colon after array header", lineNumber); + + var afterColon = line.Substring(colonIndex + 1).TrimStart(); + var isInline = !string.IsNullOrWhiteSpace(afterColon); + + return new ArrayHeader + { + Key = key, + Count = count, + Delimiter = delimiter, + Fields = fields, + IsInline = isInline, + InlineValues = isInline ? afterColon : null, + HasLengthMarker = hasLengthMarker + }; + } + + private string ParseKey(string keyText) + { + var trimmed = keyText.Trim(); + + if (trimmed.StartsWith('"') && trimmed.EndsWith('"')) + { + var content = trimmed.Substring(1, trimmed.Length - 2); + return ToonHelpers.Unescape(content); + } + + return trimmed; + } + + private JsonValue? ParsePrimitive(string text, ToonDelimiter delimiter) + { + if (string.IsNullOrWhiteSpace(text)) + return JsonValue.Create(""); + + var trimmed = text.Trim(); + + // Quoted string + if (trimmed.StartsWith('"') && trimmed.EndsWith('"') && trimmed.Length >= 2) + { + var content = trimmed.Substring(1, trimmed.Length - 2); + return JsonValue.Create(ToonHelpers.Unescape(content)); + } + + switch (trimmed) + { + // Literals + case "null": + return JsonValue.Create((string?)null); + case "true": + return JsonValue.Create(true); + case "false": + return JsonValue.Create(false); + } + + // Try parsing as number + return TryParseNumber(trimmed, out var number) ? JsonValue.Create(number) : + // Otherwise it's an unquoted string + JsonValue.Create(trimmed); + } + + private bool TryParseNumber(string text, out double number) + { + // Check for forbidden leading zeros + if (text.Length <= 1 || text[0] != '0' || !char.IsDigit(text[1])) + return double.TryParse(text, NumberStyles.Float, CultureInfo.InvariantCulture, out number); + number = 0; + return false; // Treat as string + + } + + private List SplitByDelimiter(string text, ToonDelimiter delimiter) + { + var result = new List(); + var current = new System.Text.StringBuilder(); + var inQuotes = false; + var delimiterChar = ToonHelpers.GetDelimiterChar(delimiter); + + for (int i = 0; i < text.Length; i++) + { + var c = text[i]; + + if (c == '"') + { + inQuotes = !inQuotes; + current.Append(c); + } + else if (c == '\\' && inQuotes && i + 1 < text.Length) + { + current.Append(c); + current.Append(text[++i]); + } + else if (c == delimiterChar && !inQuotes) + { + result.Add(current.ToString()); + current.Clear(); + } + else + { + current.Append(c); + } + } + + result.Add(current.ToString()); + return result; + } + + private int FindUnquotedChar(string text, char target) + { + var inQuotes = false; + + for (int i = 0; i < text.Length; i++) + { + var c = text[i]; + + if (c == '"') + { + inQuotes = !inQuotes; + } + else if (c == '\\' && inQuotes && i + 1 < text.Length) + { + i++; // Skip escaped character + } + else if (c == target && !inQuotes) + { + return i; + } + } + + return -1; + } + + private int GetDepth(string line, int lineNumber) + { + var spaces = 0; + + foreach (var c in line) + { + if (c == ' ') + { + spaces++; + } + else if (c == '\t') + { + if (_options.Strict) + throw new ToonException("Tabs are not allowed in indentation", lineNumber); + spaces += _options.IndentSize; + } + else + { + break; + } + } + + if (_options.Strict && spaces % _options.IndentSize != 0) + { + throw new ToonException( + $"Indentation must be an exact multiple of {_options.IndentSize} spaces", + lineNumber); + } + + return spaces / _options.IndentSize; + } + + private string GetCurrentLine() + { + return _currentLine < _lines.Length ? _lines[_currentLine] : string.Empty; + } + + private record ArrayHeader + { + public required string? Key { get; init; } + public required int Count { get; init; } + public required ToonDelimiter Delimiter { get; init; } + public required List? Fields { get; init; } + public required bool IsInline { get; init; } + public required string? InlineValues { get; init; } + public required bool HasLengthMarker { get; init; } + } +} diff --git a/ToonSharp/ToonSerializer.cs b/ToonSharp/ToonSerializer.cs new file mode 100644 index 0000000..2ddebac --- /dev/null +++ b/ToonSharp/ToonSerializer.cs @@ -0,0 +1,125 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Nodes; + +namespace ToonSharp; + +/// +/// Provides functionality to serialize objects to TOON format and deserialize TOON data to objects. +/// +public static class ToonSerializer +{ + /// + /// Converts the provided value to a TOON string. + /// + /// The type of the value to serialize. + /// The value to convert. + /// Options to control serialization behavior. + /// A TOON string representation of the value. + public static string Serialize(TValue value, ToonSerializerOptions? options = null) + { + options ??= ToonSerializerOptions.Default; + + // Convert to JsonNode first for normalization + var jsonNode = JsonSerializer.SerializeToNode(value); + + var writer = new ToonWriter(options); + return writer.Write(jsonNode); + } + + /// + /// Converts the provided value to a TOON string asynchronously. + /// + /// The type of the value to serialize. + /// The UTF-8 stream to write the TOON data to. + /// The value to convert. + /// Options to control serialization behavior. + /// A cancellation token to observe. + /// A task representing the asynchronous operation. + public static async Task SerializeAsync( + Stream stream, + TValue value, + ToonSerializerOptions? options = null, + CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(stream); + + var toonString = Serialize(value, options); + var writer = new StreamWriter(stream, leaveOpen: true); + await writer.WriteAsync(toonString.AsMemory(), cancellationToken).ConfigureAwait(false); + await writer.FlushAsync(cancellationToken).ConfigureAwait(false); + } + + /// + /// Parses the TOON string and returns the result as a . + /// + /// The TOON string to parse. + /// Options to control deserialization behavior. + /// A representation of the TOON data. + public static JsonNode? Deserialize(string toon, ToonSerializerOptions? options = null) + { + ArgumentNullException.ThrowIfNull(toon); + + options ??= ToonSerializerOptions.Default; + var reader = new ToonReader(options); + return reader.Read(toon); + } + + /// + /// Parses the TOON string and returns a value of the type specified by a generic type parameter. + /// + /// The target type to deserialize to. + /// The TOON string to parse. + /// Options to control deserialization behavior. + /// A representation of the TOON data. + public static TValue? Deserialize(string toon, ToonSerializerOptions? options = null) + { + var jsonNode = Deserialize(toon, options); + return jsonNode is null ? default : jsonNode.Deserialize(); + } + + /// + /// Reads the UTF-8 encoded stream and returns a value of the type specified by a generic type parameter. + /// + /// The target type to deserialize to. + /// The UTF-8 stream to read the TOON data from. + /// Options to control deserialization behavior. + /// A cancellation token to observe. + /// A task representing the asynchronous operation with a representation of the TOON data. + public static async Task DeserializeAsync( + Stream stream, + ToonSerializerOptions? options = null, + CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(stream); + + using var reader = new StreamReader(stream, leaveOpen: true); + var toonString = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false); + return Deserialize(toonString, options); + } + + /// + /// Attempts to parse the TOON string and returns a value that indicates whether the operation succeeded. + /// + /// The target type to deserialize to. + /// The TOON string to parse. + /// When this method returns, contains the parsed value. + /// Options to control deserialization behavior. + /// if the TOON string was converted successfully; otherwise, . + public static bool TryDeserialize( + string toon, + [NotNullWhen(true)] out TValue? result, + ToonSerializerOptions? options = null) + { + try + { + result = Deserialize(toon, options); + return result is not null; + } + catch + { + result = default; + return false; + } + } +} diff --git a/ToonSharp/ToonSerializerOptions.cs b/ToonSharp/ToonSerializerOptions.cs new file mode 100644 index 0000000..9c3ac9a --- /dev/null +++ b/ToonSharp/ToonSerializerOptions.cs @@ -0,0 +1,67 @@ +namespace ToonSharp; + +/// +/// Provides options for controlling TOON serialization and deserialization behavior. +/// +public sealed class ToonSerializerOptions +{ + private int _indentSize = 2; + + /// + /// Gets or sets the number of spaces per indentation level. + /// Default is 2 spaces per level. + /// + /// + /// Thrown when the value is less than 1 or greater than 8. + /// + public int IndentSize + { + get => _indentSize; + set + { + if (value < 1 || value > 8) + { + throw new ArgumentOutOfRangeException(nameof(value), "IndentSize must be between 1 and 8."); + } + _indentSize = value; + } + } + + /// + /// Gets or sets the delimiter character used for array values and tabular fields. + /// Default is . + /// + /// + /// This is the document delimiter used outside of any array scope. + /// Individual arrays can override this with their own header declarations. + /// + public ToonDelimiter Delimiter { get; set; } = ToonDelimiter.Comma; + + /// + /// Gets or sets whether to include the length marker ("#") in array headers. + /// Default is false (length marker omitted). + /// + public bool UseLengthMarker { get; set; } + + /// + /// Gets or sets whether to enable strict mode during deserialization. + /// Default is true. + /// + /// + /// When enabled, the parser enforces: + /// + /// Array count and tabular row width must match declared lengths + /// Indentation must be exact multiples of + /// Tabs cannot be used for indentation + /// Invalid escape sequences cause errors + /// Missing colons after keys cause errors + /// Blank lines inside arrays/tabular rows cause errors + /// + /// + public bool Strict { get; set; } = true; + + /// + /// Gets the default options with standard settings. + /// + public static ToonSerializerOptions Default { get; } = new(); +} diff --git a/ToonSharp/ToonSharp.csproj b/ToonSharp/ToonSharp.csproj new file mode 100644 index 0000000..f6a69c2 --- /dev/null +++ b/ToonSharp/ToonSharp.csproj @@ -0,0 +1,39 @@ + + + + net9.0 + enable + enable + 12 + + + true + 1.0.0 + 0xZunia + 1.0.0 + 1.0.0 + en + ToonSharp + A high-performance .NET 9 library for the TOON data serialization format - human-readable, line-oriented data format optimized for LLM contexts. + toon;serialization;data-format;json;parser;encoder;decoder;tabular;llm + MIT + README.md + + + true + true + snupkg + + + true + true + https://github.com/0xZunia/ToonSharp + https://github.com/0xZunia/ToonSharp + git + + + + + + + diff --git a/ToonSharp/ToonWriter.cs b/ToonSharp/ToonWriter.cs new file mode 100644 index 0000000..e66df07 --- /dev/null +++ b/ToonSharp/ToonWriter.cs @@ -0,0 +1,640 @@ +using System.Globalization; +using System.Text; +using System.Text.Json; +using System.Text.Json.Nodes; + +namespace ToonSharp; + +/// +/// Writes TOON-formatted output from JsonNode structures. +/// +internal sealed class ToonWriter +{ + private readonly ToonSerializerOptions _options; + private readonly StringBuilder _sb; + + public ToonWriter(ToonSerializerOptions options) + { + _options = options; + _sb = new StringBuilder(); + } + + public string Write(JsonNode? node) + { + _sb.Clear(); + + if (node is null) + { + _sb.Append("null"); + return _sb.ToString(); + } + + WriteValue(node, 0, _options.Delimiter, isRoot: true); + + // Spec: No trailing newline at end of document + return _sb.ToString(); + } + + private void WriteValue(JsonNode node, int depth, ToonDelimiter activeDelimiter, bool isRoot = false, string? forcedKey = null) + { + switch (node) + { + case JsonObject obj: + if (isRoot) + { + WriteObject(obj, depth, activeDelimiter); + } + else if (forcedKey is not null) + { + WriteKey(forcedKey, depth); + if (obj.Count == 0) + { + // Empty object + _sb.Append(':'); + } + else + { + _sb.AppendLine(":"); + WriteObject(obj, depth + 1, activeDelimiter); + } + } + else + { + WriteObject(obj, depth, activeDelimiter); + } + break; + + case JsonArray arr: + if (isRoot) + { + WriteRootArray(arr, depth, activeDelimiter); + } + else if (forcedKey is not null) + { + WriteArrayWithKey(forcedKey, arr, depth, activeDelimiter); + } + else + { + throw new ToonException("Array without key in non-root context"); + } + break; + + case JsonValue val: + if (forcedKey is not null) + { + WriteKey(forcedKey, depth); + _sb.Append(": "); + WritePrimitive(val, activeDelimiter); + } + else if (isRoot) + { + WritePrimitive(val, activeDelimiter); + } + else + { + WritePrimitive(val, activeDelimiter); + } + break; + } + } + + private void WriteObject(JsonObject obj, int depth, ToonDelimiter activeDelimiter) + { + if (obj.Count == 0 && depth == 0) + { + // Empty root object = empty document + return; + } + + var first = true; + foreach (var kvp in obj) + { + if (!first) + _sb.AppendLine(); + first = false; + + var key = kvp.Key; + var value = kvp.Value; + + if (value is null) + { + WriteKey(key, depth); + _sb.Append(": null"); + } + else + { + WriteValue(value, depth, activeDelimiter, isRoot: false, forcedKey: key); + } + } + } + + private void WriteRootArray(JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + WriteArrayInternal(null, array, depth, activeDelimiter); + } + + private void WriteArrayWithKey(string key, JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + WriteArrayInternal(key, array, depth, activeDelimiter); + } + + private void WriteArrayInternal(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + // Determine array format: inline primitives, tabular, or expanded list + if (array.Count == 0) + { + WriteEmptyArray(key, depth, activeDelimiter); + return; + } + + if (IsPrimitiveArray(array)) + { + WriteInlineArray(key, array, depth, activeDelimiter); + } + else if (IsTabularArray(array, out var fields)) + { + if (fields != null) WriteTabularArray(key, array, fields, depth, activeDelimiter); + } + else + { + WriteExpandedArray(key, array, depth, activeDelimiter); + } + } + + private void WriteEmptyArray(string? key, int depth, ToonDelimiter activeDelimiter) + { + if (key is not null) + WriteKey(key, depth); + + _sb.Append('['); + if (_options.UseLengthMarker) + _sb.Append('#'); + _sb.Append('0'); + _sb.Append(ToonHelpers.GetDelimiterString(activeDelimiter)); + _sb.Append("]:"); + } + + private bool IsPrimitiveArray(JsonArray array) + { + foreach (var item in array) + { + if (item is not JsonValue) + return false; + } + return true; + } + + private bool IsTabularArray(JsonArray array, out List? fields) + { + fields = null; + + // All elements must be objects + if (array.Count == 0) + return false; + + var firstKeys = new List(); + JsonObject? firstObj = null; + + foreach (var item in array) + { + if (item is not JsonObject obj) + return false; + + if (firstObj is null) + { + firstObj = obj; + firstKeys.AddRange(obj.Select(kvp => kvp.Key)); + } + else + { + // All objects must have the same keys + if (obj.Count != firstKeys.Count) + return false; + + foreach (var key in firstKeys) + { + if (!obj.ContainsKey(key)) + return false; + } + } + + // All values must be primitives + foreach (var kvp in obj) + { + if (kvp.Value is not JsonValue) + return false; + } + } + + fields = firstKeys; + return firstKeys.Count > 0; + } + + private void WriteInlineArray(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + if (key is not null) + WriteKey(key, depth); + + WriteArrayHeader(array.Count, activeDelimiter, fields: null); + _sb.Append(": "); + + var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter); + + for (int i = 0; i < array.Count; i++) + { + if (i > 0) + _sb.Append(delimiterChar); + + var item = array[i]; + if (item is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + } + } + + private void WriteTabularArray(string? key, JsonArray array, List fields, int depth, ToonDelimiter activeDelimiter) + { + if (key is not null) + WriteKey(key, depth); + + WriteArrayHeader(array.Count, activeDelimiter, fields); + _sb.AppendLine(":"); + + var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter); + + foreach (var item in array) + { + if (item is not JsonObject obj) + continue; + + WriteIndent(depth + 1); + + for (int i = 0; i < fields.Count; i++) + { + if (i > 0) + _sb.Append(delimiterChar); + + var fieldValue = obj[fields[i]]; + if (fieldValue is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + else + { + _sb.Append("null"); + } + } + + if (item != array[^1]) + _sb.AppendLine(); + } + } + + private void WriteExpandedArray(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + if (key is not null) + WriteKey(key, depth); + + WriteArrayHeader(array.Count, activeDelimiter, fields: null); + _sb.AppendLine(":"); + + for (int i = 0; i < array.Count; i++) + { + var item = array[i]; + WriteIndent(depth + 1); + _sb.Append("- "); + + if (item is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + else if (item is JsonArray innerArray) + { + WriteInlineArray(null, innerArray, 0, activeDelimiter); + } + else if (item is JsonObject obj) + { + WriteObjectAsListItem(obj, depth + 1, activeDelimiter); + } + + if (i < array.Count - 1) + _sb.AppendLine(); + } + } + + private void WriteObjectAsListItem(JsonObject obj, int depth, ToonDelimiter activeDelimiter) + { + if (obj.Count == 0) + { + // Empty object is just "-" + return; + } + + // First field on the hyphen line (no indentation, already on "- " line) + var first = true; + foreach (var kvp in obj) + { + if (first) + { + first = false; + var key = kvp.Key; + var value = kvp.Value; + + // Write key without indentation + WriteKeyUnquoted(key); + + if (value is null) + { + _sb.Append(": null"); + } + else if (value is JsonValue val) + { + _sb.Append(": "); + WritePrimitive(val, activeDelimiter); + } + else if (value is JsonArray arr) + { + WriteArrayHeader(arr.Count, activeDelimiter, fields: null); + if (IsPrimitiveArray(arr)) + { + _sb.Append(": "); + WriteInlineArrayValues(arr, activeDelimiter); + } + else if (IsTabularArray(arr, out var fields)) + { + WriteArrayHeader(arr.Count, activeDelimiter, fields); + _sb.AppendLine(":"); + if (fields != null) WriteTabularArrayRows(arr, fields, depth, activeDelimiter); + } + else + { + _sb.AppendLine(":"); + WriteExpandedArrayValues(arr, depth, activeDelimiter); + } + } + else if (value is JsonObject nestedObj) + { + _sb.AppendLine(":"); + WriteObject(nestedObj, depth + 2, activeDelimiter); // +2 for nested object in list item + } + } + else + { + _sb.AppendLine(); + var key = kvp.Key; + var value = kvp.Value; + + if (value is null) + { + WriteKey(key, depth); + _sb.Append(": null"); + } + else + { + WriteValue(value, depth, activeDelimiter, isRoot: false, forcedKey: key); + } + } + } + } + + private void WriteInlineArrayValues(JsonArray array, ToonDelimiter activeDelimiter) + { + var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter); + for (int i = 0; i < array.Count; i++) + { + if (i > 0) + _sb.Append(delimiterChar); + + if (array[i] is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + } + } + + private void WriteTabularArrayRows(JsonArray array, List fields, int depth, ToonDelimiter activeDelimiter) + { + var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter); + + foreach (var item in array) + { + if (item is not JsonObject obj) + continue; + + WriteIndent(depth + 1); + + for (int i = 0; i < fields.Count; i++) + { + if (i > 0) + _sb.Append(delimiterChar); + + var fieldValue = obj[fields[i]]; + if (fieldValue is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + else + { + _sb.Append("null"); + } + } + + if (item != array[^1]) + _sb.AppendLine(); + } + } + + private void WriteExpandedArrayValues(JsonArray array, int depth, ToonDelimiter activeDelimiter) + { + for (int i = 0; i < array.Count; i++) + { + var item = array[i]; + WriteIndent(depth + 1); + _sb.Append("- "); + + if (item is JsonValue val) + { + WritePrimitive(val, activeDelimiter); + } + else if (item is JsonArray innerArray) + { + WriteInlineArray(null, innerArray, 0, activeDelimiter); + } + else if (item is JsonObject obj) + { + WriteObjectAsListItem(obj, depth + 1, activeDelimiter); + } + + if (i < array.Count - 1) + _sb.AppendLine(); + } + } + + private void WriteArrayHeader(int count, ToonDelimiter delimiter, List? fields) + { + _sb.Append('['); + + if (_options.UseLengthMarker) + _sb.Append('#'); + + _sb.Append(count); + _sb.Append(ToonHelpers.GetDelimiterString(delimiter)); + _sb.Append(']'); + + if (fields is not null && fields.Count > 0) + { + _sb.Append('{'); + var delimiterChar = ToonHelpers.GetDelimiterChar(delimiter); + + for (int i = 0; i < fields.Count; i++) + { + if (i > 0) + _sb.Append(delimiterChar); + + WriteKeyUnquoted(fields[i]); + } + + _sb.Append('}'); + } + } + + private void WritePrimitive(JsonValue value, ToonDelimiter activeDelimiter) + { + var obj = value.GetValue(); + + switch (obj) + { + case null: + _sb.Append("null"); + break; + + case bool b: + _sb.Append(b ? "true" : "false"); + break; + + case string s: + WriteString(s, activeDelimiter); + break; + + case JsonElement elem: + WritePrimitiveFromElement(elem, activeDelimiter); + break; + + default: + // Numbers + if (obj is byte or sbyte or short or ushort or int or uint or long or ulong or float or double or decimal) + { + WriteNumber(obj); + } + else + { + WriteString(obj.ToString() ?? "null", activeDelimiter); + } + break; + } + } + + private void WritePrimitiveFromElement(JsonElement elem, ToonDelimiter activeDelimiter) + { + switch (elem.ValueKind) + { + case JsonValueKind.String: + WriteString(elem.GetString() ?? "", activeDelimiter); + break; + case JsonValueKind.Number: + WriteNumber(elem.GetDouble()); + break; + case JsonValueKind.True: + _sb.Append("true"); + break; + case JsonValueKind.False: + _sb.Append("false"); + break; + case JsonValueKind.Null: + case JsonValueKind.Undefined: + _sb.Append("null"); + break; + } + } + + private void WriteNumber(object num) + { + // Spec: Numbers must be rendered without scientific notation, -0 → 0 + var str = num switch + { + float f => NormalizeNumber(f), + double d => NormalizeNumber(d), + decimal m => m.ToString("0.##################", CultureInfo.InvariantCulture), + _ => Convert.ToString(num, CultureInfo.InvariantCulture) ?? "0" + }; + + _sb.Append(str); + } + + private string NormalizeNumber(double d) + { + // Handle -0 + if (d == 0) + return "0"; + + // Handle NaN and Infinity (should be null per spec) + if (double.IsNaN(d) || double.IsInfinity(d)) + return "null"; + + // Format without scientific notation + return d.ToString("0.##################", CultureInfo.InvariantCulture); + } + + private void WriteString(string str, ToonDelimiter activeDelimiter) + { + if (ToonHelpers.RequiresQuoting(str, activeDelimiter)) + { + _sb.Append('"'); + _sb.Append(ToonHelpers.Escape(str)); + _sb.Append('"'); + } + else + { + _sb.Append(str); + } + } + + private void WriteKey(string key, int depth) + { + WriteIndent(depth); + + if (ToonHelpers.IsValidUnquotedKey(key)) + { + _sb.Append(key); + } + else + { + _sb.Append('"'); + _sb.Append(ToonHelpers.Escape(key)); + _sb.Append('"'); + } + } + + private void WriteKeyUnquoted(string key) + { + if (ToonHelpers.IsValidUnquotedKey(key)) + { + _sb.Append(key); + } + else + { + _sb.Append('"'); + _sb.Append(ToonHelpers.Escape(key)); + _sb.Append('"'); + } + } + + private void WriteIndent(int depth) + { + if (depth > 0) + { + _sb.Append(ToonHelpers.GetIndentation(depth, _options.IndentSize)); + } + } +}