1.0.0
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
bin/
|
||||
obj/
|
||||
/packages/
|
||||
riderModule.iml
|
||||
/_ReSharper.Caches/
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025-PRESENT Reyan CARLIER
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,275 @@
|
||||
# ToonSharp
|
||||
|
||||
A high-performance, .NET 9 library for serializing and deserializing data in the TOON format - a human-readable, line-oriented data serialization format optimized for LLM contexts.
|
||||
|
||||
[](https://dotnet.microsoft.com/download)
|
||||
[](LICENSE)
|
||||
|
||||
## Features
|
||||
|
||||
- **Full TOON v1.2 Specification Support** - Complete implementation of the TOON specification
|
||||
- **Performance-Driven** - Built with .NET 9 modern performance features
|
||||
- **Type-Safe** - Leverages C# 12 features and nullable reference types
|
||||
- **Strict Mode** - Optional strict validation for production environments
|
||||
- **Tabular Data** - First-class support for tabular arrays
|
||||
- **Multiple Delimiters** - Comma, tab, and pipe delimiter support
|
||||
- **Fully Documented** - Comprehensive XML documentation for IntelliSense
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
dotnet add package ToonSharp
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Serialization
|
||||
|
||||
```csharp
|
||||
using ToonSharp;
|
||||
|
||||
// Simple object
|
||||
var user = new
|
||||
{
|
||||
id = 123,
|
||||
name = "Ada Lovelace",
|
||||
active = true
|
||||
};
|
||||
|
||||
var toon = ToonSerializer.Serialize(user);
|
||||
// Output:
|
||||
// id: 123
|
||||
// name: Ada Lovelace
|
||||
// active: true
|
||||
```
|
||||
|
||||
### Deserialization
|
||||
|
||||
```csharp
|
||||
using ToonSharp;
|
||||
using System.Text.Json.Nodes;
|
||||
|
||||
var toon = """
|
||||
id: 123
|
||||
name: Ada Lovelace
|
||||
active: true
|
||||
""";
|
||||
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
var obj = result.AsObject();
|
||||
|
||||
Console.WriteLine(obj["name"]); // "Ada Lovelace"
|
||||
```
|
||||
|
||||
### Strongly-Typed Deserialization
|
||||
|
||||
```csharp
|
||||
public class User
|
||||
{
|
||||
public int Id { get; set; }
|
||||
public string Name { get; set; }
|
||||
public bool Active { get; set; }
|
||||
}
|
||||
|
||||
var user = ToonSerializer.Deserialize<User>(toon);
|
||||
Console.WriteLine(user.Name); // "Ada Lovelace"
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Nested Objects
|
||||
|
||||
```csharp
|
||||
var data = new
|
||||
{
|
||||
user = new
|
||||
{
|
||||
id = 123,
|
||||
name = "Ada"
|
||||
}
|
||||
};
|
||||
|
||||
var toon = ToonSerializer.Serialize(data);
|
||||
// Output:
|
||||
// user:
|
||||
// id: 123
|
||||
// name: Ada
|
||||
```
|
||||
|
||||
### Primitive Arrays
|
||||
|
||||
```csharp
|
||||
var data = new
|
||||
{
|
||||
tags = new[] { "admin", "developer", "ops" }
|
||||
};
|
||||
|
||||
var toon = ToonSerializer.Serialize(data);
|
||||
// Output:
|
||||
// tags[3]: admin,developer,ops
|
||||
```
|
||||
|
||||
### Tabular Data
|
||||
|
||||
```csharp
|
||||
var data = new
|
||||
{
|
||||
products = new[]
|
||||
{
|
||||
new { sku = "A1", qty = 2, price = 9.99 },
|
||||
new { sku = "B2", qty = 1, price = 14.50 }
|
||||
}
|
||||
};
|
||||
|
||||
var toon = ToonSerializer.Serialize(data);
|
||||
// Output:
|
||||
// products[2]{sku,qty,price}:
|
||||
// A1,2,9.99
|
||||
// B2,1,14.5
|
||||
```
|
||||
|
||||
### Custom Delimiters
|
||||
|
||||
```csharp
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
Delimiter = ToonDelimiter.Tab
|
||||
};
|
||||
|
||||
var data = new { tags = new[] { "reading", "gaming", "coding" } };
|
||||
var toon = ToonSerializer.Serialize(data, options);
|
||||
// Output:
|
||||
// tags[3 ]: reading gaming coding
|
||||
```
|
||||
|
||||
### Async Operations
|
||||
|
||||
```csharp
|
||||
// Serialize to stream
|
||||
await using var stream = File.Create("data.toon");
|
||||
await ToonSerializer.SerializeAsync(stream, data);
|
||||
|
||||
// Deserialize from stream
|
||||
await using var readStream = File.OpenRead("data.toon");
|
||||
var result = await ToonSerializer.DeserializeAsync<MyType>(readStream);
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### ToonSerializerOptions
|
||||
|
||||
```csharp
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
IndentSize = 2, // Spaces per indentation level (default: 2)
|
||||
Delimiter = ToonDelimiter.Comma, // Document delimiter (default: Comma)
|
||||
UseLengthMarker = false, // Include # in array headers (default: false)
|
||||
Strict = true // Enable strict mode (default: true)
|
||||
};
|
||||
```
|
||||
|
||||
### Strict Mode
|
||||
|
||||
When `Strict = true` (default), the parser enforces:
|
||||
|
||||
- Array counts must match declared lengths
|
||||
- Indentation must be exact multiples of `IndentSize`
|
||||
- Tabs cannot be used for indentation
|
||||
- Invalid escape sequences cause errors
|
||||
- Missing colons after keys cause errors
|
||||
- Blank lines inside arrays/tabular rows cause errors
|
||||
|
||||
```csharp
|
||||
// Strict mode (default)
|
||||
var strictOptions = new ToonSerializerOptions { Strict = true };
|
||||
|
||||
// Non-strict mode (more lenient)
|
||||
var lenientOptions = new ToonSerializerOptions { Strict = false };
|
||||
```
|
||||
|
||||
## Supported Types
|
||||
|
||||
### Primitives
|
||||
- `string`
|
||||
- `int`, `long`, `short`, `byte` (and unsigned variants)
|
||||
- `float`, `double`, `decimal`
|
||||
- `bool`
|
||||
- `null`
|
||||
|
||||
### Collections
|
||||
- Arrays (`T[]`)
|
||||
- `List<T>`
|
||||
- `IEnumerable<T>`
|
||||
|
||||
### Objects
|
||||
- POCOs (Plain Old CLR Objects)
|
||||
- Anonymous types
|
||||
- `Dictionary<string, T>`
|
||||
- `JsonObject` / `JsonArray` / `JsonNode`
|
||||
|
||||
## API Reference
|
||||
|
||||
### ToonSerializer
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `Serialize<T>(T value, options?)` | Converts value to TOON string |
|
||||
| `SerializeAsync<T>(Stream, T value, options?, token?)` | Async serialize to stream |
|
||||
| `Deserialize(string toon, options?)` | Parses TOON to JsonNode |
|
||||
| `Deserialize<T>(string toon, options?)` | Parses TOON to type T |
|
||||
| `DeserializeAsync<T>(Stream, options?, token?)` | Async deserialize from stream |
|
||||
| `TryDeserialize<T>(string, out T?, options?)` | Safe deserialization |
|
||||
|
||||
### ToonDelimiter
|
||||
|
||||
- `Comma` - Default comma delimiter (`,`)
|
||||
- `Tab` - Tab delimiter (`\t`)
|
||||
- `Pipe` - Pipe delimiter (`|`)
|
||||
|
||||
## Error Handling
|
||||
|
||||
```csharp
|
||||
try
|
||||
{
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
}
|
||||
catch (ToonException ex)
|
||||
{
|
||||
Console.WriteLine($"Error at line {ex.LineNumber}: {ex.Message}");
|
||||
}
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
ToonSharp is built with performance in mind:
|
||||
|
||||
- Uses `Span<T>` and `ReadOnlySpan<T>` for zero-allocation string operations
|
||||
- Minimal allocations during parsing
|
||||
- Efficient `StringBuilder` usage for serialization
|
||||
- Optimized for .NET 9 runtime improvements
|
||||
|
||||
## Specification
|
||||
|
||||
This library implements the [TOON Specification v1.2](SPEC.md).
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please ensure:
|
||||
|
||||
1. All tests pass (`dotnet test`)
|
||||
2. Code follows .NET conventions
|
||||
3. XML documentation is complete
|
||||
4. Tests cover changes
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see [LICENSE](LICENSE) for details.
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
Built with .NET 9 following modern C# best practices and the System.Text.Json design patterns.
|
||||
This is a port of https://github.com/johannschopplich/toon to .NET9
|
||||
|
||||
---
|
||||
|
||||
Made with ❤️ for the .NET community
|
||||
@@ -0,0 +1,691 @@
|
||||
Original link: https://github.com/johannschopplich/toon/blob/main/SPEC.md
|
||||
|
||||
# TOON Specification (v1.2)
|
||||
|
||||
Status: Draft, normative where indicated. This version specifies both encoding (producer behavior) and decoding (parser behavior).
|
||||
|
||||
- Normative statements use RFC 2119/8174 keywords: MUST, MUST NOT, SHOULD, SHOULD NOT, MAY.
|
||||
- Audience: implementers of encoders/decoders/validators; tool authors; practitioners embedding TOON in LLM prompts.
|
||||
|
||||
Changelog:
|
||||
- v1.2:
|
||||
- Centralized decoding rules (primitives, keys) and strict-mode checklist.
|
||||
- Made header grammar normative and clarified delimiter scoping.
|
||||
- Tightened strict-mode indentation (exact multiples; tabs error).
|
||||
- Defined blank-line and trailing-newline decoding behavior with explicit skipping rules outside arrays.
|
||||
- Clarified hyphen-based quoting: "-" or any string starting with "-" MUST be quoted.
|
||||
- Clarified BigInt normalization (quoted string when out of safe range).
|
||||
- Unified root-form detection and row/key disambiguation language; disambiguation uses first unquoted delimiter vs colon.
|
||||
- Introduced "document delimiter" vs "active delimiter" terminology.
|
||||
- v1.1: Made decoding behavior normative; added strict-mode rules, delimiter-aware parsing, and reference algorithms; decoder options (indent, strict).
|
||||
- v1: Initial encoding, normalization, and conformance rules.
|
||||
|
||||
Scope:
|
||||
- Defines the data model, encoding normalization (reference JS/TS), concrete syntax, decoding semantics, and conformance requirements for producing and consuming TOON.
|
||||
|
||||
## 1. Terminology and Conventions
|
||||
|
||||
- TOON document: A sequence of UTF-8 text lines formatted according to this spec.
|
||||
- Line: A sequence of non-newline characters terminated by LF (U+000A) in serialized form. Encoders MUST use LF.
|
||||
- Indentation level (depth): Leading indentation measured in fixed-size space units (indentSize). Depth 0 has no indentation.
|
||||
- Indentation unit (indentSize): A fixed number of spaces per level (default 2). Tabs MUST NOT be used for indentation.
|
||||
- Header: The bracketed declaration for arrays, optionally followed by a field list, and terminating with a colon; e.g., key[3]: or items[2]{a,b}:.
|
||||
- Field list: Brace-enclosed, delimiter-separated list of field names for tabular arrays: {f1<delim>f2}.
|
||||
- List item: A line beginning with "- " at a given depth representing an element in an expanded array.
|
||||
- Delimiter: The character used to separate array/tabular values: comma (default), tab (HTAB, U+0009), or pipe ("|").
|
||||
- Document delimiter: The encoder-selected delimiter used for quoting decisions outside any array scope (default comma).
|
||||
- Active delimiter: The delimiter declared by the closest array header in scope, used to split inline primitive arrays and tabular rows under that header; it also governs quoting decisions for values within that array’s scope.
|
||||
- Length marker: Optional "#" prefix for array lengths in headers, e.g., [#3]. Decoders MUST accept and ignore it semantically.
|
||||
- Primitive: string, number, boolean, or null.
|
||||
- Object: Mapping from string keys to JsonValue.
|
||||
- Array: Ordered sequence of JsonValue.
|
||||
- JsonValue: Primitive | Object | Array.
|
||||
- Strict mode: Decoder mode that enforces counts, indentation, and delimiter consistency; also rejects invalid escapes and missing colons (default: true).
|
||||
|
||||
Notation:
|
||||
- Regular expressions appear in slash-delimited form.
|
||||
- ABNF snippets follow RFC 5234; HTAB means the U+0009 character.
|
||||
- Examples are informative unless stated otherwise.
|
||||
|
||||
## 2. Data Model
|
||||
|
||||
- TOON models data as:
|
||||
- JsonPrimitive: string | number | boolean | null
|
||||
- JsonObject: { [string]: JsonValue }
|
||||
- JsonArray: JsonValue[]
|
||||
- Ordering:
|
||||
- Array order MUST be preserved.
|
||||
- Object key order MUST be preserved as encountered by the encoder.
|
||||
- Numbers (encoding):
|
||||
- -0 MUST be normalized to 0.
|
||||
- Finite numbers MUST be rendered without scientific notation (e.g., 1e6 → 1000000; 1e-6 → 0.000001).
|
||||
- Null: Represented as the literal null.
|
||||
|
||||
## 3. Encoding Normalization (Reference Encoder)
|
||||
|
||||
The reference encoder normalizes non-JSON values to the data model:
|
||||
|
||||
- Number:
|
||||
- Finite → number (non-exponential). -0 → 0.
|
||||
- NaN, +Infinity, -Infinity → null.
|
||||
- Implementations MUST ensure decimal rendering does not use exponent notation.
|
||||
- BigInt (JavaScript):
|
||||
- If within Number.MIN_SAFE_INTEGER..Number.MAX_SAFE_INTEGER → converted to number.
|
||||
- Otherwise → converted to a decimal string (e.g., "9007199254740993") and encoded as a string (quoted because it is numeric-like).
|
||||
- Date → ISO string (e.g., "2025-01-01T00:00:00.000Z").
|
||||
- Set → array by iterating entries and normalizing each element.
|
||||
- Map → object using String(key) for keys and normalizing values.
|
||||
- Plain object → own enumerable string keys in encounter order; values normalized recursively.
|
||||
- Function, symbol, undefined, or unrecognized types → null.
|
||||
|
||||
Note: Other language ports SHOULD apply analogous normalization consistent with this spec’s data model and encoding rules.
|
||||
|
||||
## 4. Decoding Interpretation (Reference Decoder)
|
||||
|
||||
Decoders map text tokens to host values:
|
||||
|
||||
- Quoted tokens (strings and keys):
|
||||
- MUST be unescaped per Section 7.1 (only \\, \", \n, \r, \t are valid). Any other escape or an unterminated string MUST error.
|
||||
- Quoted primitives remain strings even if they look like numbers/booleans/null.
|
||||
- Unquoted value tokens:
|
||||
- true, false, null → booleans/null.
|
||||
- Numeric parsing:
|
||||
- MUST accept standard decimal and exponent forms (e.g., 42, -3.14, 1e-6, -1E+9).
|
||||
- MUST treat tokens with forbidden leading zeros (e.g., "05", "0001") as strings (not numbers).
|
||||
- Only finite numbers are expected from conforming encoders.
|
||||
- Otherwise → string.
|
||||
- Keys:
|
||||
- Decoded as strings (quoted keys MUST be unescaped per Section 7.1).
|
||||
- A colon MUST follow a key; missing colon MUST error.
|
||||
|
||||
## 5. Concrete Syntax and Root Form
|
||||
|
||||
TOON is a deterministic, line-oriented, indentation-based notation.
|
||||
|
||||
- Objects:
|
||||
- key: value for primitives.
|
||||
- key: alone for nested or empty objects; nested fields appear at depth +1.
|
||||
- Arrays:
|
||||
- Primitive arrays are inline: key[N<delim?>]: v1<delim>v2….
|
||||
- Arrays of arrays (primitives): expanded list items under a header: key[N<delim?>]: then "- [M<delim?>]: …".
|
||||
- Arrays of objects:
|
||||
- Tabular form when uniform and primitive-only: key[N<delim?>]{f1<delim>f2}: then one row per line.
|
||||
- Otherwise: expanded list items: key[N<delim?>]: with "- …" items (see Sections 9.4 and 10).
|
||||
- Root form discovery:
|
||||
- If the first non-empty depth-0 line is a valid root array header per Section 6 (must include a colon), decode a root array.
|
||||
- Else if the document has exactly one non-empty line and it is neither a valid array header nor a key-value line (quoted or unquoted key), decode a single primitive.
|
||||
- Otherwise, decode an object.
|
||||
- In strict mode, multiple non-key/value non-header lines at depth 0 is invalid.
|
||||
|
||||
## 6. Header Syntax (Normative)
|
||||
|
||||
Array headers declare length and active delimiter, and optionally field names.
|
||||
|
||||
General forms:
|
||||
- Root header (no key): [<marker?>N<delim?>]:
|
||||
- With key: key[<marker?>N<delim?>]:
|
||||
- Tabular fields: key[<marker?>N<delim?>]{field1<delim>field2<delim>…}:
|
||||
|
||||
Where:
|
||||
- N is the non-negative integer length.
|
||||
- <marker?> is optional "#"; decoders MUST accept and ignore it semantically.
|
||||
- <delim?> is:
|
||||
- absent for comma (","),
|
||||
- HTAB (U+0009) for tab,
|
||||
- "|" for pipe.
|
||||
- Field names in braces are separated by the same active delimiter and encoded as keys (Section 7.3).
|
||||
|
||||
Spacing and delimiters:
|
||||
- Every header line MUST end with a colon.
|
||||
- When inline values follow a header on the same line (non-empty primitive arrays), there MUST be exactly one space after the colon before the first value.
|
||||
- The active delimiter declared by the bracket segment applies to:
|
||||
- splitting inline primitive arrays on that header line,
|
||||
- splitting tabular field names in "{…}",
|
||||
- splitting all rows/items within the header’s scope,
|
||||
- unless a nested header changes it.
|
||||
- The same delimiter symbol declared in the bracket MUST be used in the fields segment and in all row/value splits in that scope.
|
||||
- Absence of a delimiter symbol in a bracket segment ALWAYS means comma, regardless of any parent header.
|
||||
|
||||
Normative header grammar (ABNF):
|
||||
```
|
||||
bracket-seg = "[" [ "#" ] 1*DIGIT [ delimsym ] "]"
|
||||
delimsym = HTAB / "|"
|
||||
; Field names are keys (quoted/unquoted) separated by the active delimiter
|
||||
fields-seg = "{" fieldname *( delim fieldname ) "}"
|
||||
delim = delimsym / ","
|
||||
fieldname = key
|
||||
|
||||
header = [ key ] bracket-seg [ fields-seg ] ":"
|
||||
key = unquoted-key / quoted-key
|
||||
|
||||
; Unquoted keys must match identifier pattern
|
||||
unquoted-key = ( ALPHA / "_" ) *( ALPHA / DIGIT / "_" / "." )
|
||||
|
||||
; Quoted keys use only escapes from Section 7.1
|
||||
; (Exact escaped-char repertoire is defined in Section 7.1)
|
||||
; quoted-key = DQUOTE *(escaped-char / safe-char) DQUOTE
|
||||
```
|
||||
|
||||
Decoding requirements:
|
||||
- The bracket segment MUST parse as a non-negative integer length N.
|
||||
- If a trailing tab or pipe appears inside the brackets, it selects the active delimiter; otherwise comma is active.
|
||||
- If a fields segment occurs between the bracket and the colon, parse field names using the active delimiter; quoted names MUST be unescaped per Section 7.1.
|
||||
- A colon MUST follow the bracket and optional fields; missing colon MUST error.
|
||||
|
||||
## 7. Strings and Keys
|
||||
|
||||
### 7.1 Escaping (Encoding and Decoding)
|
||||
|
||||
In quoted strings and keys, the following characters MUST be escaped:
|
||||
- "\\" → "\\\\"
|
||||
- "\"" → "\\\""
|
||||
- U+000A newline → "\\n"
|
||||
- U+000D carriage return → "\\r"
|
||||
- U+0009 tab → "\\t"
|
||||
|
||||
Decoders MUST reject any other escape sequence and unterminated strings.
|
||||
|
||||
Tabs are allowed inside quoted strings and as a declared delimiter; they MUST NOT be used for indentation (Section 12).
|
||||
|
||||
### 7.2 Quoting Rules for String Values (Encoding)
|
||||
|
||||
A string value MUST be quoted if any of the following is true:
|
||||
- It is empty ("").
|
||||
- It has leading or trailing whitespace.
|
||||
- It equals true, false, or null (case-sensitive).
|
||||
- It is numeric-like:
|
||||
- Matches /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i (e.g., "42", "-3.14", "1e-6").
|
||||
- Or matches /^0\d+$/ (leading-zero decimals such as "05").
|
||||
- It contains a colon (:), double quote ("), or backslash (\).
|
||||
- It contains brackets or braces ([, ], {, }).
|
||||
- It contains control characters: newline, carriage return, or tab.
|
||||
- It contains the relevant delimiter:
|
||||
- Inside array scope: the active delimiter (Section 1).
|
||||
- Outside array scope: the document delimiter (Section 1).
|
||||
- It equals "-" or starts with "-" (any hyphen at position 0).
|
||||
|
||||
Otherwise, the string MAY be emitted without quotes. Unicode, emoji, and strings with internal (non-leading/trailing) spaces are safe unquoted provided they do not violate the conditions.
|
||||
|
||||
### 7.3 Key Encoding (Encoding)
|
||||
|
||||
Object keys and tabular field names:
|
||||
- MAY be unquoted only if they match: ^[A-Za-z_][\w.]*$.
|
||||
- Otherwise, they MUST be quoted and escaped per Section 7.1.
|
||||
|
||||
### 7.4 Decoding Rules for Strings and Keys (Decoding)
|
||||
|
||||
- Quoted strings and keys MUST be unescaped per Section 7.1; any other escape MUST error. Quoted primitives remain strings.
|
||||
- Unquoted values:
|
||||
- true/false/null → boolean/null
|
||||
- Numeric tokens → numbers (with the leading-zero rule in Section 4)
|
||||
- Otherwise → strings
|
||||
- Keys (quoted or unquoted) MUST be followed by ":"; missing colon MUST error.
|
||||
|
||||
## 8. Objects
|
||||
|
||||
- Encoding:
|
||||
- Primitive fields: key: value (single space after colon).
|
||||
- Nested or empty objects: key: on its own line. If non-empty, nested fields appear at depth +1.
|
||||
- Key order: Implementations MUST preserve encounter order when emitting fields.
|
||||
- An empty object at the root yields an empty document (no lines).
|
||||
- Decoding:
|
||||
- A line "key:" with nothing after the colon at depth d opens an object; subsequent lines at depth > d belong to that object until the depth decreases to ≤ d.
|
||||
- Lines "key: value" at the same depth are sibling fields.
|
||||
- Missing colon after a key MUST error.
|
||||
|
||||
## 9. Arrays
|
||||
|
||||
### 9.1 Primitive Arrays (Inline)
|
||||
|
||||
- Encoding:
|
||||
- Non-empty arrays: key[N<delim?>]: v1<delim>v2<delim>… where each vi is encoded as a primitive (Section 7) with delimiter-aware quoting.
|
||||
- Empty arrays: key[0<delim?>]: (no values following).
|
||||
- Root arrays: [N<delim?>]: v1<delim>…
|
||||
- Decoding:
|
||||
- Split using the active delimiter declared by the header; non-active delimiters MUST NOT split values.
|
||||
- In strict mode, the number of decoded values MUST equal N; otherwise MUST error.
|
||||
|
||||
### 9.2 Arrays of Arrays (Primitives Only) — Expanded List
|
||||
|
||||
- Encoding:
|
||||
- Parent header: key[N<delim?>]: on its own line.
|
||||
- Each inner primitive array is a list item:
|
||||
- - [M<delim?>]: v1<delim>v2<delim>…
|
||||
- Empty inner arrays: - [0<delim?>]:
|
||||
- Decoding:
|
||||
- Items appear at depth +1, each starting with "- " and an inner array header "[M<delim?>]: …".
|
||||
- Inner arrays are split using their own active delimiter; in strict mode, counts MUST match M.
|
||||
- In strict mode, the number of list items MUST equal outer N.
|
||||
|
||||
### 9.3 Arrays of Objects — Tabular Form
|
||||
|
||||
Tabular detection (encoding; MUST hold for all elements):
|
||||
- Every element is an object.
|
||||
- All objects have the same set of keys (order per object MAY vary).
|
||||
- All values across these keys are primitives (no nested arrays/objects).
|
||||
|
||||
When satisfied (encoding):
|
||||
- Header: key[N<delim?>]{f1<delim>f2<delim>…}: where field order is the first object’s key encounter order.
|
||||
- Field names encoded per Section 7.3.
|
||||
- Rows: one line per object at depth +1 under the header; values are encoded primitives (Section 7) and joined by the active delimiter.
|
||||
- Root tabular arrays omit the key: [N<delim?>]{…}: followed by rows.
|
||||
|
||||
Decoding:
|
||||
- A tabular header declares the active delimiter and ordered field list.
|
||||
- Rows appear at depth +1 as delimiter-separated value lines.
|
||||
- Strict mode MUST enforce:
|
||||
- Each row’s value count equals the field count.
|
||||
- The number of rows equals N.
|
||||
- Disambiguation at row depth (unquoted tokens):
|
||||
- Compute the first unquoted occurrence of the active delimiter and the first unquoted colon.
|
||||
- If a same-depth line has no unquoted colon → row.
|
||||
- If both appear, compare first-unquoted positions:
|
||||
- Delimiter before colon → row.
|
||||
- Colon before delimiter → key-value line (end of rows).
|
||||
- If a line has an unquoted colon but no unquoted active delimiter → key-value line (end of rows).
|
||||
|
||||
### 9.4 Mixed / Non-Uniform Arrays — Expanded List
|
||||
|
||||
When tabular requirements are not met (encoding):
|
||||
- Header: key[N<delim?>]:
|
||||
- Each element is rendered as a list item at depth +1 under the header:
|
||||
- Primitive: - <primitive>
|
||||
- Primitive array: - [M<delim?>]: v1<delim>…
|
||||
- Object: formatted per Section 10 (objects as list items).
|
||||
- Complex arrays: - key'[M<delim?>]: followed by nested items as appropriate.
|
||||
|
||||
Decoding:
|
||||
- Header declares list length N and the active delimiter for any nested inline arrays.
|
||||
- Each list item starts with "- " at depth +1 and is parsed as:
|
||||
- Primitive (no colon and no array header),
|
||||
- Inline primitive array (- [M<delim?>]: …),
|
||||
- Object with first field on the hyphen line (- key: … or - key[N…]{…}: …),
|
||||
- Or nested arrays via nested headers.
|
||||
- In strict mode, the number of list items MUST equal N.
|
||||
|
||||
## 10. Objects as List Items
|
||||
|
||||
For an object appearing as a list item:
|
||||
|
||||
- Empty object list item: a single "-" at the list-item indentation level.
|
||||
- First field on the hyphen line:
|
||||
- Primitive: - key: value
|
||||
- Primitive array: - key[M<delim?>]: v1<delim>…
|
||||
- Tabular array: - key[N<delim?>]{fields}:
|
||||
- Followed by tabular rows at depth +1 (relative to the hyphen line).
|
||||
- Non-uniform array: - key[N<delim?>]:
|
||||
- Followed by list items at depth +1.
|
||||
- Object: - key:
|
||||
- Nested object fields appear at depth +2 (i.e., one deeper than subsequent sibling fields of the same list item).
|
||||
- Remaining fields of the same object appear at depth +1 under the hyphen line in encounter order, using normal object field rules.
|
||||
|
||||
Decoding:
|
||||
- The first field is parsed from the hyphen line. If it is a nested object (- key:), nested fields are at +2 relative to the hyphen line; subsequent fields of the same list item are at +1.
|
||||
- If the first field is a tabular header on the hyphen line, its rows are at +1; subsequent sibling fields continue at +1 after the rows.
|
||||
|
||||
## 11. Delimiters
|
||||
|
||||
- Supported delimiters:
|
||||
- Comma (default): header omits the delimiter symbol.
|
||||
- Tab: header includes HTAB inside brackets and braces (e.g., [N<TAB>], {a<TAB>b}); rows/inline arrays use tabs.
|
||||
- Pipe: header includes "|" inside brackets and braces; rows/inline arrays use "|".
|
||||
- Document vs Active delimiter:
|
||||
- Encoders select a document delimiter (option) that influences quoting in contexts not governed by an array header (e.g., object values).
|
||||
- Inside an array header’s scope, the active delimiter governs splitting and quoting of inline arrays and tabular rows for that array.
|
||||
- Absence of a delimiter symbol in a header ALWAYS means comma for that array’s scope; it does not inherit from any parent.
|
||||
- Delimiter-aware quoting (encoding):
|
||||
- Within an array’s scope, strings containing the active delimiter MUST be quoted to avoid splitting.
|
||||
- Outside any array scope, encoders SHOULD use the document delimiter to decide delimiter-aware quoting for values.
|
||||
- Strings containing non-active delimiters do not require quoting unless another quoting condition applies (Section 7.2).
|
||||
- Delimiter-aware parsing (decoding):
|
||||
- Inline arrays and tabular rows MUST be split only on the active delimiter declared by the nearest array header.
|
||||
- Strings containing the active delimiter MUST be quoted to avoid splitting; non-active delimiters MUST NOT cause splits.
|
||||
- Nested headers may change the active delimiter; decoding MUST use the delimiter declared by the nearest header.
|
||||
- If the bracket declares tab or pipe, the same symbol MUST be used in the fields segment and for splitting all rows/values in that scope.
|
||||
|
||||
## 12. Indentation and Whitespace
|
||||
|
||||
- Encoding:
|
||||
- Encoders MUST use a consistent number of spaces per level (default 2; configurable).
|
||||
- Tabs MUST NOT be used for indentation.
|
||||
- Exactly one space after ": " in key: value lines.
|
||||
- Exactly one space after array headers when followed by inline values.
|
||||
- No trailing spaces at the end of any line.
|
||||
- No trailing newline at the end of the document.
|
||||
- Decoding:
|
||||
- Strict mode:
|
||||
- The number of leading spaces on a line MUST be an exact multiple of indentSize; otherwise MUST error.
|
||||
- Tabs used as indentation MUST error. Tabs are allowed in quoted strings and as the HTAB delimiter.
|
||||
- Non-strict mode:
|
||||
- Depth MAY be computed as floor(indentSpaces / indentSize).
|
||||
- Tabs in indentation are non-conforming and MAY be accepted or rejected.
|
||||
- Surrounding whitespace around tokens SHOULD be tolerated; internal semantics follow quoting rules.
|
||||
- Blank lines:
|
||||
- Outside arrays/tabular rows: decoders SHOULD ignore completely blank lines (do not create/close structures).
|
||||
- Inside arrays/tabular rows: in strict mode, MUST error; in non-strict mode, MAY be ignored and not counted as a row/item.
|
||||
- Trailing newline at end-of-file: decoders SHOULD accept; validators MAY warn.
|
||||
|
||||
Recommended blank-line handling (normative where stated):
|
||||
- Before decoding, or during scanning:
|
||||
- Track blank lines with depth.
|
||||
- For strict mode: if a blank line occurs between the first and last row/item line in an array/tabular block, this MUST error.
|
||||
- Otherwise (outside arrays/tabular rows), blank lines SHOULD be skipped and not contribute to root-form detection.
|
||||
- Empty input means: after ignoring trailing newlines and ignorable blank lines outside arrays/tabular rows, there are no non-empty lines.
|
||||
|
||||
## 13. Conformance and Options
|
||||
|
||||
Conformance classes:
|
||||
|
||||
- Encoder:
|
||||
- MUST produce output adhering to all normative rules in Sections 2–12 and 15.
|
||||
- MUST be deterministic regarding:
|
||||
- Object field order (encounter order).
|
||||
- Tabular detection (uniform vs non-uniform).
|
||||
- Quoting decisions given values and delimiter context (document delimiter or active delimiter in array scope).
|
||||
|
||||
- Decoder:
|
||||
- MUST implement tokenization, escaping, and type interpretation per Sections 4 and 7.4.
|
||||
- MUST parse array headers per Section 6 and apply the declared active delimiter to inline arrays and tabular rows.
|
||||
- MUST implement structure and depth rules per Sections 8–11, including objects-as-list-items placement.
|
||||
- MUST enforce strict-mode rules in Section 14 when strict = true.
|
||||
|
||||
- Validator:
|
||||
- SHOULD verify structural conformance (headers, indentation, list markers).
|
||||
- SHOULD verify whitespace invariants.
|
||||
- SHOULD verify delimiter consistency between headers and rows.
|
||||
- SHOULD verify length counts vs declared [N].
|
||||
|
||||
Options:
|
||||
- Encoder options:
|
||||
- indent (default: 2 spaces)
|
||||
- delimiter (document delimiter; default: comma; alternatives: tab, pipe)
|
||||
- lengthMarker (default: disabled)
|
||||
- Decoder options:
|
||||
- indent (default: 2 spaces)
|
||||
- strict (default: true)
|
||||
|
||||
Note: Section 14 is authoritative for strict-mode errors; validators MAY add informative diagnostics for style and encoding invariants.
|
||||
|
||||
## 14. Strict Mode Errors and Diagnostics (Authoritative Checklist)
|
||||
|
||||
When strict mode is enabled (default), decoders MUST error on:
|
||||
|
||||
- Array count mismatches:
|
||||
- Inline primitive arrays: decoded value count ≠ declared N.
|
||||
- List arrays: number of list items ≠ declared N.
|
||||
- Tabular arrays: number of rows ≠ declared N.
|
||||
- Tabular row width mismatches:
|
||||
- Any row’s value count ≠ field count.
|
||||
- Missing colon in key context.
|
||||
- Invalid escape sequences or unterminated strings in quoted tokens.
|
||||
- Indentation errors:
|
||||
- Leading spaces not a multiple of indentSize.
|
||||
- Any tab used in indentation.
|
||||
- Delimiter mismatch (e.g., rows joined by a different delimiter than declared), detected via width/count checks and header scope.
|
||||
- Blank lines inside arrays/tabular rows.
|
||||
- Empty input (document with no non-empty lines after ignoring trailing newline(s) and ignorable blank lines outside arrays/tabular rows).
|
||||
|
||||
Validators SHOULD additionally report:
|
||||
- Trailing spaces, trailing newlines (encoding invariants).
|
||||
- Headers missing delimiter marks when non-comma delimiter is in use.
|
||||
- Values violating delimiter-aware quoting rules.
|
||||
|
||||
Recommended error messages (informative):
|
||||
- Missing colon after key
|
||||
- Unterminated string: missing closing quote
|
||||
- Invalid escape sequence: \x
|
||||
- Indentation must be an exact multiple of N spaces
|
||||
- Tabs are not allowed in indentation
|
||||
- Expected N tabular rows, but got M
|
||||
- Expected N list array items, but got M
|
||||
- Expected K values in row, but got L
|
||||
|
||||
## 15. Security Considerations
|
||||
|
||||
- Injection and ambiguity are mitigated by quoting rules:
|
||||
- Strings with colon, the relevant delimiter (document or active), hyphen marker cases ("-" or strings starting with "-"), control characters, or brackets/braces MUST be quoted.
|
||||
- Strict-mode checks (Section 14) detect malformed strings, truncation, or injected rows/items via length and width mismatches.
|
||||
- Encoders SHOULD avoid excessive memory on large inputs; implement streaming/tabular row emission where feasible.
|
||||
- Unicode:
|
||||
- Encoders SHOULD avoid altering Unicode beyond required escaping; decoders SHOULD accept valid UTF-8 in quoted strings/keys (with only the five escapes).
|
||||
|
||||
## 16. Internationalization
|
||||
|
||||
- Full Unicode is supported in keys and values, subject to quoting and escaping rules.
|
||||
- Encoders MUST NOT apply locale-dependent formatting for numbers or booleans (e.g., no thousands separators).
|
||||
- ISO 8601 strings SHOULD be used for Date normalization.
|
||||
|
||||
## 17. Interoperability and Mappings (Informative)
|
||||
|
||||
- JSON:
|
||||
- TOON deterministically encodes JSON-compatible data (after normalization).
|
||||
- Arrays of uniform objects map to CSV-like rows; other structures map to YAML-like nested forms.
|
||||
- CSV:
|
||||
- TOON tabular sections generalize CSV with explicit lengths, field lists, and flexible delimiter choice.
|
||||
- YAML:
|
||||
- TOON borrows indentation and list-item patterns but uses fewer quotes and explicit array headers.
|
||||
|
||||
## 18. Media Type and File Extensions (Provisional)
|
||||
|
||||
- Suggested media type: text/toon
|
||||
- Suggested file extension: .toon
|
||||
- Encoding: UTF-8
|
||||
- Line endings: LF (U+000A)
|
||||
|
||||
## 19. Examples (Informative)
|
||||
|
||||
Objects:
|
||||
```
|
||||
id: 123
|
||||
name: Ada
|
||||
active: true
|
||||
```
|
||||
|
||||
Nested objects:
|
||||
```
|
||||
user:
|
||||
id: 123
|
||||
name: Ada
|
||||
```
|
||||
|
||||
Primitive arrays:
|
||||
```
|
||||
tags[3]: admin,ops,dev
|
||||
```
|
||||
|
||||
Arrays of arrays (primitives):
|
||||
```
|
||||
pairs[2]:
|
||||
- [2]: 1,2
|
||||
- [2]: 3,4
|
||||
```
|
||||
|
||||
Tabular arrays:
|
||||
```
|
||||
items[2]{sku,qty,price}:
|
||||
A1,2,9.99
|
||||
B2,1,14.5
|
||||
```
|
||||
|
||||
Mixed arrays:
|
||||
```
|
||||
items[3]:
|
||||
- 1
|
||||
- a: 1
|
||||
- text
|
||||
```
|
||||
|
||||
Objects as list items (first field on hyphen line):
|
||||
```
|
||||
items[2]:
|
||||
- id: 1
|
||||
name: First
|
||||
- id: 2
|
||||
name: Second
|
||||
extra: true
|
||||
```
|
||||
|
||||
Nested tabular inside a list item:
|
||||
```
|
||||
items[1]:
|
||||
- users[2]{id,name}:
|
||||
1,Ada
|
||||
2,Bob
|
||||
status: active
|
||||
```
|
||||
|
||||
Delimiter variations:
|
||||
```
|
||||
# Tab delimiter
|
||||
items[2 ]{sku name qty price}:
|
||||
A1 Widget 2 9.99
|
||||
B2 Gadget 1 14.5
|
||||
|
||||
# Pipe delimiter
|
||||
tags[3|]: reading|gaming|coding
|
||||
```
|
||||
|
||||
Length marker:
|
||||
```
|
||||
tags[#3]: reading,gaming,coding
|
||||
pairs[#2]:
|
||||
- [#2]: a,b
|
||||
- [#2]: c,d
|
||||
```
|
||||
|
||||
Quoted colons and disambiguation (rows continue; colon is inside quotes):
|
||||
```
|
||||
links[2]{id,url}:
|
||||
1,"http://a:b"
|
||||
2,"https://example.com?q=a:b"
|
||||
```
|
||||
|
||||
## 20. Parsing Helpers (Informative)
|
||||
|
||||
These sketches illustrate structure and common decoding helpers. They are informative; normative behavior is defined in Sections 4–12 and 14.
|
||||
|
||||
### 20.1 Decoding Overview
|
||||
|
||||
- Split input into lines; compute depth from leading spaces and indent size (Section 12).
|
||||
- Skip ignorable blank lines outside arrays/tabular rows (Section 12).
|
||||
- Decide root form per Section 5.
|
||||
- For objects at depth d: process lines at depth d; for arrays at depth d: read rows/list items at depth d+1.
|
||||
|
||||
### 20.2 Array Header Parsing
|
||||
|
||||
- Locate the first "[ … ]" segment on the line; parse:
|
||||
- Optional leading "#" marker (ignored semantically).
|
||||
- Length N as decimal integer.
|
||||
- Optional delimiter symbol at the end: HTAB or pipe (comma otherwise).
|
||||
- If a "{ … }" fields segment occurs between the "]" and the ":", parse field names using the active delimiter; unescape quoted names.
|
||||
- Require a colon ":" after the bracket/fields segment.
|
||||
- Return the header (key?, length, delimiter, fields?, hasLengthMarker) and any inline values after the colon.
|
||||
- Absence of a delimiter symbol in the bracket segment ALWAYS means comma for that header (no inheritance).
|
||||
|
||||
### 20.3 parseDelimitedValues
|
||||
|
||||
- Iterate characters left-to-right while maintaining a current token and an inQuotes flag.
|
||||
- On a double quote, toggle inQuotes.
|
||||
- While inQuotes, treat backslash + next char as a literal pair (string parser validates later).
|
||||
- Only split on the active delimiter when not in quotes (unquoted occurrences).
|
||||
- Trim surrounding spaces around each token. Empty tokens decode to empty string.
|
||||
|
||||
### 20.4 Primitive Token Parsing
|
||||
|
||||
- If token starts with a quote, it MUST be a properly quoted string (no trailing characters after the closing quote). Unescape using only the five escapes; otherwise MUST error.
|
||||
- Else if token is true/false/null → boolean/null.
|
||||
- Else if token is numeric without forbidden leading zeros and finite → number.
|
||||
- Else → string.
|
||||
|
||||
### 20.5 Object and List Item Parsing
|
||||
|
||||
- Key-value line: parse a key up to the first colon; missing colon → MUST error. The remainder of the line is the primitive value (if present).
|
||||
- Nested object: "key:" with nothing after colon opens a nested object. If this is:
|
||||
- A field inside a regular object: nested fields are at depth +1 relative to that line.
|
||||
- The first field on a list-item hyphen line: nested fields at depth +2 relative to the hyphen line; subsequent fields at +1.
|
||||
- List items:
|
||||
- Lines start with "- " at one deeper depth than the parent array header.
|
||||
- After "- ":
|
||||
- If "[ … ]:" appears → inline array item; decode with its own header and active delimiter.
|
||||
- Else if a colon appears → object with first field on hyphen line.
|
||||
- Else → primitive token.
|
||||
|
||||
### 20.6 Blank-Line Handling
|
||||
|
||||
- Track blank lines during scanning with line numbers and depth.
|
||||
- For arrays/tabular rows:
|
||||
- In strict mode, any blank line between the first and last item/row line MUST error.
|
||||
- In non-strict mode, blank lines MAY be ignored and not counted as items/rows.
|
||||
- Outside arrays/tabular rows:
|
||||
- Blank lines SHOULD be ignored (do not affect root-form detection or object boundaries).
|
||||
|
||||
## 21. Test Suite and Compliance (Informative)
|
||||
|
||||
Implementations are encouraged to validate against a comprehensive test suite covering:
|
||||
- Primitive encoding/decoding, quoting, control-character escaping.
|
||||
- Object key encoding/decoding and order preservation.
|
||||
- Primitive arrays (inline), empty arrays.
|
||||
- Arrays of arrays (expanded), mixed-length and empty inner arrays.
|
||||
- Tabular detection and formatting, including delimiter variations.
|
||||
- Mixed arrays and objects-as-list-items behavior, including nested arrays and objects.
|
||||
- Whitespace invariants (no trailing spaces/newline).
|
||||
- Normalization (BigInt, Date, undefined, NaN/Infinity, functions, symbols).
|
||||
- Decoder strict-mode errors: count mismatches, invalid escapes, missing colon, delimiter mismatches, indentation errors, blank-line handling.
|
||||
|
||||
## 22. TOON Core Profile (Normative Subset)
|
||||
|
||||
This profile captures the most common, memory-friendly rules.
|
||||
|
||||
- Character set: UTF-8; LF line endings.
|
||||
- Indentation: 2 spaces per level (configurable indentSize).
|
||||
- Strict mode: leading spaces MUST be a multiple of indentSize; tabs in indentation MUST error.
|
||||
- Keys:
|
||||
- Unquoted if they match ^[A-Za-z_][\w.]*$; otherwise quoted.
|
||||
- A colon MUST follow a key.
|
||||
- Strings:
|
||||
- Only these escapes allowed in quotes: \\, \", \n, \r, \t.
|
||||
- Quote if empty; leading/trailing whitespace; equals true/false/null; numeric-like; contains colon/backslash/quote/brackets/braces/control char; contains the relevant delimiter (active inside arrays, document otherwise); equals "-" or starts with "-".
|
||||
- Numbers:
|
||||
- Encoder emits non-exponential decimal; -0 → 0.
|
||||
- Decoder accepts decimal and exponent forms; tokens with forbidden leading zeros decode as strings.
|
||||
- Arrays and headers:
|
||||
- Header: [#?N[delim?]] where delim is absent (comma), HTAB (tab), or "|" (pipe).
|
||||
- Keyed header: key[#?N[delim?]]:. Optional fields: {f1<delim>f2}.
|
||||
- Primitive arrays inline: key[N]: v1<delim>v2. Empty arrays: key[0]: (no values).
|
||||
- Tabular arrays: key[N]{fields}: then N rows at depth +1.
|
||||
- Otherwise list form: key[N]: then N items, each starting with "- ".
|
||||
- Delimiters:
|
||||
- Only split on the active delimiter from the nearest header. Non-active delimiters never split.
|
||||
- Objects as list items:
|
||||
- "- value" (primitive), "- [M]: …" (inline array), or "- key: …" (object).
|
||||
- If first field is "- key:" with nested object: nested fields at +2; subsequent sibling fields at +1.
|
||||
- Root form:
|
||||
- Root array if the first depth-0 line is a header (per Section 6).
|
||||
- Root primitive if exactly one non-empty line and it is not a header or key-value.
|
||||
- Otherwise object.
|
||||
- Strict mode checks:
|
||||
- All count/width checks; missing colon; invalid escapes; indentation multiple-of-indentSize; delimiter mismatches via count checks; blank lines inside arrays/tabular rows; empty input.
|
||||
|
||||
## 23. Versioning and Extensibility
|
||||
|
||||
- Backward-compatible evolutions SHOULD preserve current headers, quoting rules, and indentation semantics.
|
||||
- Reserved/structural characters (colon, brackets, braces, hyphen) MUST retain current meanings.
|
||||
- Future work (non-normative): schemas, comments/annotations, additional delimiter profiles, optional \uXXXX escapes (if added, must be precisely defined).
|
||||
|
||||
## 24. Acknowledgments and License
|
||||
|
||||
- Credits: Author and contributors; ports in other languages (Elixir, PHP, Python, Ruby, Java, .NET, Swift, Go).
|
||||
- License: MIT (see repository for details).
|
||||
|
||||
---
|
||||
|
||||
Appendix: Cross-check With Reference Behavior (Informative)
|
||||
|
||||
- The reference encoder/decoder test suites implement:
|
||||
- Safe-unquoted string rules and delimiter-aware quoting (document vs active delimiter).
|
||||
- Header formation and delimiter-aware parsing with active delimiter scoping.
|
||||
- Length marker propagation (encoding) and acceptance (decoding).
|
||||
- Tabular detection requiring uniform keys and primitive-only values.
|
||||
- Objects-as-list-items parsing (+2 nested object rule; +1 siblings).
|
||||
- Whitespace invariants for encoding and strict-mode indentation enforcement for decoding.
|
||||
- Blank-line handling and trailing-newline acceptance.
|
||||
@@ -0,0 +1,421 @@
|
||||
using System.Text.Json.Nodes;
|
||||
using Xunit;
|
||||
|
||||
namespace ToonSharp.Tests;
|
||||
|
||||
public class ToonSerializerTests
|
||||
{
|
||||
[Fact]
|
||||
public void Serialize_SimpleObject_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
id = 123,
|
||||
name = "Ada",
|
||||
active = true
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "id: 123\nname: Ada\nactive: true";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_NestedObject_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
user = new
|
||||
{
|
||||
id = 123,
|
||||
name = "Ada"
|
||||
}
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "user:\n id: 123\n name: Ada";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_PrimitiveArray_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
tags = new[] { "admin", "ops", "dev" }
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "tags[3]: admin,ops,dev";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_ArrayOfArrays_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
pairs = new[]
|
||||
{
|
||||
new[] { 1, 2 },
|
||||
new[] { 3, 4 }
|
||||
}
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "pairs[2]:\n - [2]: 1,2\n - [2]: 3,4";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_TabularArray_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
items = new[]
|
||||
{
|
||||
new { sku = "A1", qty = 2, price = 9.99 },
|
||||
new { sku = "B2", qty = 1, price = 14.5 }
|
||||
}
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_MixedArray_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var items = new JsonArray
|
||||
{
|
||||
JsonValue.Create(1),
|
||||
new JsonObject { ["a"] = 1 },
|
||||
JsonValue.Create("text")
|
||||
};
|
||||
var obj = new JsonObject { ["items"] = items };
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
var expected = "items[3]:\n - 1\n - a: 1\n - text";
|
||||
Assert.Equal(expected, toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_ObjectsAsListItems_ReturnsCorrectToon()
|
||||
{
|
||||
// Arrange
|
||||
var items = new JsonArray
|
||||
{
|
||||
new JsonObject { ["id"] = 1, ["name"] = "First" },
|
||||
new JsonObject { ["id"] = 2, ["name"] = "Second", ["extra"] = true }
|
||||
};
|
||||
var obj = new JsonObject { ["items"] = items };
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("items[2]", toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Deserialize_SimpleObject_ReturnsCorrectObject()
|
||||
{
|
||||
// Arrange
|
||||
var toon = "id: 123\nname: Ada\nactive: true";
|
||||
|
||||
// Act
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
var obj = result.AsObject();
|
||||
Assert.Equal(123, obj["id"]!.GetValue<double>());
|
||||
Assert.Equal("Ada", obj["name"]!.GetValue<string>());
|
||||
Assert.True(obj["active"]!.GetValue<bool>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Deserialize_PrimitiveArray_ReturnsCorrectArray()
|
||||
{
|
||||
// Arrange
|
||||
var toon = "tags[3]: admin,ops,dev";
|
||||
|
||||
// Act
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
var obj = result.AsObject();
|
||||
var tags = obj["tags"] as JsonArray;
|
||||
Assert.NotNull(tags);
|
||||
Assert.Equal(3, tags.Count);
|
||||
Assert.Equal("admin", tags[0]!.GetValue<string>());
|
||||
Assert.Equal("ops", tags[1]!.GetValue<string>());
|
||||
Assert.Equal("dev", tags[2]!.GetValue<string>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Deserialize_TabularArray_ReturnsCorrectObjects()
|
||||
{
|
||||
// Arrange
|
||||
var toon = "items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5";
|
||||
|
||||
// Act
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
var obj = result.AsObject();
|
||||
var items = obj["items"] as JsonArray;
|
||||
Assert.NotNull(items);
|
||||
Assert.Equal(2, items.Count);
|
||||
|
||||
var first = items[0] as JsonObject;
|
||||
Assert.NotNull(first);
|
||||
Assert.Equal("A1", first["sku"]!.GetValue<string>());
|
||||
Assert.Equal(2, first["qty"]!.GetValue<double>());
|
||||
Assert.Equal(9.99, first["price"]!.GetValue<double>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_EmptyObject_ReturnsEmptyString()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new { };
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("", toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_NullValue_ReturnsNull()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
value = (string?)null
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
Assert.Equal("value: null", toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_QuotedStrings_HandlesSpecialCharacters()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
colon = "a:b",
|
||||
comma = "a,b",
|
||||
quote = "a\"b",
|
||||
newline = "a\nb",
|
||||
empty = ""
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("colon: \"a:b\"", toon);
|
||||
Assert.Contains("comma: \"a,b\"", toon);
|
||||
Assert.Contains("quote: \"a\\\"b\"", toon);
|
||||
Assert.Contains("newline: \"a\\nb\"", toon);
|
||||
Assert.Contains("empty: \"\"", toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_WithTabDelimiter_UsesTabsInHeader()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
tags = new[] { "reading", "gaming", "coding" }
|
||||
};
|
||||
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
Delimiter = ToonDelimiter.Tab
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj, options);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("tags[3\t]:", toon);
|
||||
Assert.Contains("\t", toon.Split(':')[1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_WithPipeDelimiter_UsesPipesInHeader()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
tags = new[] { "reading", "gaming", "coding" }
|
||||
};
|
||||
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
Delimiter = ToonDelimiter.Pipe
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj, options);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("tags[3|]:", toon);
|
||||
Assert.Contains("|", toon.Split(':')[1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_WithLengthMarker_IncludesHashInHeader()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
tags = new[] { "reading", "gaming", "coding" }
|
||||
};
|
||||
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
UseLengthMarker = true
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj, options);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("tags[#3]:", toon);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RoundTrip_ComplexObject_PreservesData()
|
||||
{
|
||||
// Arrange
|
||||
var original = new
|
||||
{
|
||||
id = 123,
|
||||
name = "Test User",
|
||||
scores = new[] { 95, 87, 92 },
|
||||
settings = new
|
||||
{
|
||||
theme = "dark",
|
||||
notifications = true
|
||||
},
|
||||
tags = new[] { "admin", "developer" }
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(original);
|
||||
var result = ToonSerializer.Deserialize(toon);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
var deserialized = result.AsObject();
|
||||
Assert.Equal(123, deserialized["id"]!.GetValue<double>());
|
||||
Assert.Equal("Test User", deserialized["name"]!.GetValue<string>());
|
||||
|
||||
var scores = deserialized["scores"] as JsonArray;
|
||||
Assert.NotNull(scores);
|
||||
Assert.Equal(3, scores.Count);
|
||||
|
||||
var settings = deserialized["settings"] as JsonObject;
|
||||
Assert.NotNull(settings);
|
||||
Assert.Equal("dark", settings["theme"]!.GetValue<string>());
|
||||
Assert.True(settings["notifications"]!.GetValue<bool>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Deserialize_StrictMode_ThrowsOnCountMismatch()
|
||||
{
|
||||
// Arrange
|
||||
var toon = "tags[3]: admin,ops"; // Only 2 values, not 3
|
||||
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
Strict = true
|
||||
};
|
||||
|
||||
// Act & Assert
|
||||
Assert.Throws<ToonException>(() => ToonSerializer.Deserialize(toon, options));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Deserialize_NonStrictMode_AllowsCountMismatch()
|
||||
{
|
||||
// Arrange
|
||||
var toon = "tags[3]: admin,ops"; // Only 2 values, not 3
|
||||
|
||||
var options = new ToonSerializerOptions
|
||||
{
|
||||
Strict = false
|
||||
};
|
||||
|
||||
// Act
|
||||
var result = ToonSerializer.Deserialize(toon, options);
|
||||
|
||||
// Assert
|
||||
Assert.NotNull(result);
|
||||
var obj = result.AsObject();
|
||||
var tags = obj["tags"] as JsonArray;
|
||||
Assert.NotNull(tags);
|
||||
Assert.Equal(2, tags.Count); // Should have 2 items despite header saying 3
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Serialize_NumbersWithoutExponent_UsesDecimalNotation()
|
||||
{
|
||||
// Arrange
|
||||
var obj = new
|
||||
{
|
||||
large = 1000000,
|
||||
small = 0.000001,
|
||||
value = 42
|
||||
};
|
||||
|
||||
// Act
|
||||
var toon = ToonSerializer.Serialize(obj);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("large: 1000000", toon);
|
||||
Assert.Contains("small: 0.000001", toon);
|
||||
Assert.Contains("value: 42", toon);
|
||||
// Check that scientific notation (e+ or e-) is not used
|
||||
Assert.DoesNotContain("e+", toon);
|
||||
Assert.DoesNotContain("e-", toon);
|
||||
Assert.DoesNotContain("E+", toon);
|
||||
Assert.DoesNotContain("E-", toon);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="coverlet.collector" Version="6.0.2" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageReference Include="xunit" Version="2.9.2" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Using Include="Xunit" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ToonSharp\ToonSharp.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,45 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToonSharp", "ToonSharp\ToonSharp.csproj", "{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToonSharp.Tests", "ToonSharp.Tests\ToonSharp.Tests.csproj", "{CC73B156-3947-43E3-8521-B9D20AF943CD}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|Any CPU = Release|Any CPU
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x64.Build.0 = Release|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{9E0E4DC3-F067-463E-B684-0B2AFEC06BDD}.Release|x86.Build.0 = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x64.Build.0 = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{CC73B156-3947-43E3-8521-B9D20AF943CD}.Release|x86.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
@@ -0,0 +1,22 @@
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Specifies the delimiter character used for separating array values and tabular fields in TOON format.
|
||||
/// </summary>
|
||||
public enum ToonDelimiter : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Comma delimiter (,) - the default delimiter.
|
||||
/// </summary>
|
||||
Comma = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Tab delimiter (HTAB, U+0009) for tab-separated values.
|
||||
/// </summary>
|
||||
Tab = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Pipe delimiter (|) for pipe-separated values.
|
||||
/// </summary>
|
||||
Pipe = 2
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// The exception that is thrown when an error occurs during TOON serialization or deserialization.
|
||||
/// </summary>
|
||||
public sealed class ToonException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the line number where the error occurred, or null if not applicable.
|
||||
/// </summary>
|
||||
public int? LineNumber { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the column number where the error occurred, or null if not applicable.
|
||||
/// </summary>
|
||||
public int? ColumnNumber { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ToonException"/> class.
|
||||
/// </summary>
|
||||
public ToonException()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ToonException"/> class with a specified error message.
|
||||
/// </summary>
|
||||
/// <param name="message">The message that describes the error.</param>
|
||||
public ToonException(string message) : base(message)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ToonException"/> class with a specified error message
|
||||
/// and line number.
|
||||
/// </summary>
|
||||
/// <param name="message">The message that describes the error.</param>
|
||||
/// <param name="lineNumber">The line number where the error occurred.</param>
|
||||
public ToonException(string message, int lineNumber) : base(FormatMessage(message, lineNumber, null))
|
||||
{
|
||||
LineNumber = lineNumber;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ToonException"/> class with a specified error message,
|
||||
/// line number, and column number.
|
||||
/// </summary>
|
||||
/// <param name="message">The message that describes the error.</param>
|
||||
/// <param name="lineNumber">The line number where the error occurred.</param>
|
||||
/// <param name="columnNumber">The column number where the error occurred.</param>
|
||||
public ToonException(string message, int lineNumber, int columnNumber)
|
||||
: base(FormatMessage(message, lineNumber, columnNumber))
|
||||
{
|
||||
LineNumber = lineNumber;
|
||||
ColumnNumber = columnNumber;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ToonException"/> class with a specified error message
|
||||
/// and a reference to the inner exception that is the cause of this exception.
|
||||
/// </summary>
|
||||
/// <param name="message">The message that describes the error.</param>
|
||||
/// <param name="innerException">The exception that is the cause of the current exception.</param>
|
||||
public ToonException(string message, Exception innerException) : base(message, innerException)
|
||||
{
|
||||
}
|
||||
|
||||
private static string FormatMessage(string message, int lineNumber, int? columnNumber)
|
||||
{
|
||||
return columnNumber.HasValue
|
||||
? $"Line {lineNumber}, Column {columnNumber}: {message}"
|
||||
: $"Line {lineNumber}: {message}";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Internal helper methods for TOON serialization and deserialization.
|
||||
/// </summary>
|
||||
internal static class ToonHelpers
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the character representation of a delimiter.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static char GetDelimiterChar(ToonDelimiter delimiter) => delimiter switch
|
||||
{
|
||||
ToonDelimiter.Comma => ',',
|
||||
ToonDelimiter.Tab => '\t',
|
||||
ToonDelimiter.Pipe => '|',
|
||||
_ => ','
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Gets the string representation of a delimiter for headers.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static string GetDelimiterString(ToonDelimiter delimiter) => delimiter switch
|
||||
{
|
||||
ToonDelimiter.Comma => "",
|
||||
ToonDelimiter.Tab => "\t",
|
||||
ToonDelimiter.Pipe => "|",
|
||||
_ => ""
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Tries to parse a delimiter from a character.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool TryParseDelimiter(char c, out ToonDelimiter delimiter)
|
||||
{
|
||||
delimiter = c switch
|
||||
{
|
||||
',' => ToonDelimiter.Comma,
|
||||
'\t' => ToonDelimiter.Tab,
|
||||
'|' => ToonDelimiter.Pipe,
|
||||
_ => ToonDelimiter.Comma
|
||||
};
|
||||
return c is ',' or '\t' or '|';
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a string needs to be quoted according to TOON quoting rules.
|
||||
/// </summary>
|
||||
public static bool RequiresQuoting(ReadOnlySpan<char> value, ToonDelimiter delimiter)
|
||||
{
|
||||
if (value.IsEmpty)
|
||||
return true;
|
||||
|
||||
// Check for leading/trailing whitespace
|
||||
if (char.IsWhiteSpace(value[0]) || char.IsWhiteSpace(value[^1]))
|
||||
return true;
|
||||
|
||||
// Check for reserved literals
|
||||
if (value is "true" || value is "false" || value is "null")
|
||||
return true;
|
||||
|
||||
// Check for hyphen at the start
|
||||
if (value[0] == '-')
|
||||
return true;
|
||||
|
||||
// Check for numeric patterns
|
||||
if (IsNumericLike(value))
|
||||
return true;
|
||||
|
||||
var delimiterChar = GetDelimiterChar(delimiter);
|
||||
|
||||
// Check each character
|
||||
foreach (var c in value)
|
||||
{
|
||||
if (c == ':' || c == '"' || c == '\\' ||
|
||||
c == '[' || c == ']' || c == '{' || c == '}' ||
|
||||
c == '\n' || c == '\r' || c == '\t' ||
|
||||
c == delimiterChar)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a string looks like a number.
|
||||
/// </summary>
|
||||
private static bool IsNumericLike(ReadOnlySpan<char> value)
|
||||
{
|
||||
if (value.IsEmpty)
|
||||
return false;
|
||||
|
||||
var span = value;
|
||||
var index = 0;
|
||||
|
||||
// Optional negative sign
|
||||
if (span[0] == '-')
|
||||
{
|
||||
if (span.Length == 1)
|
||||
return false;
|
||||
index = 1;
|
||||
}
|
||||
|
||||
// Check for leading zeros (e.g., "05", "0001")
|
||||
if (span.Length > index + 1 && span[index] == '0' && char.IsDigit(span[index + 1]))
|
||||
return true; // Forbidden leading zeros
|
||||
|
||||
var hasDigit = false;
|
||||
var hasDot = false;
|
||||
var hasE = false;
|
||||
|
||||
for (; index < span.Length; index++)
|
||||
{
|
||||
var c = span[index];
|
||||
|
||||
if (char.IsDigit(c))
|
||||
{
|
||||
hasDigit = true;
|
||||
}
|
||||
else if (c == '.' && !hasDot && !hasE)
|
||||
{
|
||||
hasDot = true;
|
||||
}
|
||||
else if ((c == 'e' || c == 'E') && !hasE && hasDigit)
|
||||
{
|
||||
hasE = true;
|
||||
hasDigit = false; // Reset for exponent part
|
||||
|
||||
// Check for optional sign after 'e'
|
||||
if (index + 1 < span.Length && (span[index + 1] == '+' || span[index + 1] == '-'))
|
||||
index++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return hasDigit;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a key can be unquoted (matches an identifier pattern).
|
||||
/// </summary>
|
||||
public static bool IsValidUnquotedKey(ReadOnlySpan<char> key)
|
||||
{
|
||||
if (key.IsEmpty)
|
||||
return false;
|
||||
|
||||
var first = key[0];
|
||||
if (!char.IsLetter(first) && first != '_')
|
||||
return false;
|
||||
|
||||
for (int i = 1; i < key.Length; i++)
|
||||
{
|
||||
var c = key[i];
|
||||
if (!char.IsLetterOrDigit(c) && c != '_' && c != '.')
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Escapes a string for use in TOON format.
|
||||
/// </summary>
|
||||
public static string Escape(ReadOnlySpan<char> value)
|
||||
{
|
||||
// Fast path: no escaping needed
|
||||
var needsEscaping = false;
|
||||
foreach (var c in value)
|
||||
{
|
||||
if (c is '\\' or '"' or '\n' or '\r' or '\t')
|
||||
{
|
||||
needsEscaping = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!needsEscaping)
|
||||
return new string(value);
|
||||
|
||||
var sb = new StringBuilder(value.Length + 4);
|
||||
|
||||
foreach (var c in value)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\\':
|
||||
sb.Append("\\\\");
|
||||
break;
|
||||
case '"':
|
||||
sb.Append("\\\"");
|
||||
break;
|
||||
case '\n':
|
||||
sb.Append("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
sb.Append("\\r");
|
||||
break;
|
||||
case '\t':
|
||||
sb.Append("\\t");
|
||||
break;
|
||||
default:
|
||||
sb.Append(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unescapes a TOON-escaped string.
|
||||
/// </summary>
|
||||
public static string Unescape(ReadOnlySpan<char> value)
|
||||
{
|
||||
var index = value.IndexOf('\\');
|
||||
if (index < 0)
|
||||
return new string(value);
|
||||
|
||||
var sb = new StringBuilder(value.Length);
|
||||
var pos = 0;
|
||||
|
||||
while (index >= 0)
|
||||
{
|
||||
// Append characters before the escape
|
||||
sb.Append(value.Slice(pos, index - pos));
|
||||
|
||||
if (index + 1 >= value.Length)
|
||||
throw new ToonException("Unterminated escape sequence");
|
||||
|
||||
var escapeChar = value[index + 1];
|
||||
switch (escapeChar)
|
||||
{
|
||||
case '\\':
|
||||
sb.Append('\\');
|
||||
break;
|
||||
case '"':
|
||||
sb.Append('"');
|
||||
break;
|
||||
case 'n':
|
||||
sb.Append('\n');
|
||||
break;
|
||||
case 'r':
|
||||
sb.Append('\r');
|
||||
break;
|
||||
case 't':
|
||||
sb.Append('\t');
|
||||
break;
|
||||
default:
|
||||
throw new ToonException($"Invalid escape sequence: \\{escapeChar}");
|
||||
}
|
||||
|
||||
pos = index + 2;
|
||||
index = value.Slice(pos).IndexOf('\\');
|
||||
if (index >= 0)
|
||||
index += pos;
|
||||
}
|
||||
|
||||
// Append remaining characters
|
||||
if (pos < value.Length)
|
||||
sb.Append(value.Slice(pos));
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an indentation string with the specified depth and size.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static string GetIndentation(int depth, int indentSize)
|
||||
{
|
||||
if (depth == 0)
|
||||
return string.Empty;
|
||||
|
||||
var totalSpaces = depth * indentSize;
|
||||
return new string(' ', totalSpaces);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,645 @@
|
||||
using System.Globalization;
|
||||
using System.Text.Json.Nodes;
|
||||
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Reads TOON-formatted text and produces JsonNode structures.
|
||||
/// </summary>
|
||||
internal sealed class ToonReader
|
||||
{
|
||||
private readonly ToonSerializerOptions _options;
|
||||
private string[] _lines = [];
|
||||
private int _currentLine;
|
||||
|
||||
public ToonReader(ToonSerializerOptions options)
|
||||
{
|
||||
_options = options;
|
||||
}
|
||||
|
||||
public JsonNode? Read(string toon)
|
||||
{
|
||||
_lines = toon.Split('\n');
|
||||
_currentLine = 0;
|
||||
|
||||
// Skip blank lines and handle empty input
|
||||
var nonEmptyLines = _lines
|
||||
.Select((line, index) => (line, index))
|
||||
.Where(x => !IsBlankLine(x.line))
|
||||
.ToList();
|
||||
|
||||
if (nonEmptyLines.Count == 0)
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Empty input");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Determine root form
|
||||
var firstLine = nonEmptyLines[0].line;
|
||||
var firstDepth = GetDepth(firstLine, nonEmptyLines[0].index + 1);
|
||||
|
||||
if (firstDepth != 0)
|
||||
throw new ToonException("First non-empty line must be at depth 0", nonEmptyLines[0].index + 1);
|
||||
|
||||
// Check if it's a root array header
|
||||
if (IsRootArrayHeader(firstLine))
|
||||
{
|
||||
return ParseRootArray();
|
||||
}
|
||||
|
||||
// Check if it's a single primitive (exactly one line, not a header, not key:value)
|
||||
if (nonEmptyLines.Count == 1 && !firstLine.Contains(':'))
|
||||
{
|
||||
return ParsePrimitive(firstLine.Trim(), ToonDelimiter.Comma);
|
||||
}
|
||||
|
||||
// Otherwise, it's an object
|
||||
return ParseRootObject();
|
||||
}
|
||||
|
||||
private bool IsBlankLine(string line)
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(line);
|
||||
}
|
||||
|
||||
private bool IsRootArrayHeader(string line)
|
||||
{
|
||||
var trimmed = line.TrimStart();
|
||||
return trimmed.StartsWith('[') && trimmed.Contains(']') && trimmed.Contains(':');
|
||||
}
|
||||
|
||||
private JsonNode ParseRootObject()
|
||||
{
|
||||
var obj = new JsonObject();
|
||||
ParseObjectFields(obj, 0);
|
||||
return obj;
|
||||
}
|
||||
|
||||
private JsonNode ParseRootArray()
|
||||
{
|
||||
var line = GetCurrentLine();
|
||||
var header = ParseArrayHeader(line, _currentLine + 1, null);
|
||||
_currentLine++;
|
||||
|
||||
var array = new JsonArray();
|
||||
|
||||
if (header.Count == 0)
|
||||
return array;
|
||||
|
||||
if (header.IsInline)
|
||||
{
|
||||
// Inline primitive array
|
||||
ParseInlineArrayValues(array, header);
|
||||
}
|
||||
else if (header.Fields is not null)
|
||||
{
|
||||
// Tabular array
|
||||
ParseTabularRows(array, header, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Expanded array
|
||||
ParseExpandedArrayItems(array, header, 1);
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
private void ParseObjectFields(JsonObject obj, int expectedDepth)
|
||||
{
|
||||
while (_currentLine < _lines.Length)
|
||||
{
|
||||
var line = _lines[_currentLine];
|
||||
|
||||
if (IsBlankLine(line))
|
||||
{
|
||||
_currentLine++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var depth = GetDepth(line, _currentLine + 1);
|
||||
|
||||
if (depth < expectedDepth)
|
||||
break;
|
||||
|
||||
if (depth > expectedDepth)
|
||||
throw new ToonException($"Unexpected indentation depth {depth}, expected {expectedDepth}", _currentLine + 1);
|
||||
|
||||
var trimmed = line.TrimStart();
|
||||
|
||||
// Parse key-value or nested structure
|
||||
var colonIndex = FindUnquotedChar(trimmed, ':');
|
||||
if (colonIndex < 0)
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Missing colon after key", _currentLine + 1);
|
||||
_currentLine++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var keyPart = trimmed.Substring(0, colonIndex);
|
||||
var valuePart = trimmed.Substring(colonIndex + 1).TrimStart();
|
||||
|
||||
// Check if it's an array header
|
||||
if (keyPart.Contains('[') && keyPart.Contains(']'))
|
||||
{
|
||||
// Extract key before the bracket
|
||||
var bracketIndex = keyPart.IndexOf('[');
|
||||
var keyBeforeBracket = keyPart.Substring(0, bracketIndex);
|
||||
var key = ParseKey(keyBeforeBracket);
|
||||
|
||||
var header = ParseArrayHeader(trimmed, _currentLine + 1, key);
|
||||
_currentLine++;
|
||||
|
||||
var array = new JsonArray();
|
||||
|
||||
if (header.IsInline)
|
||||
{
|
||||
ParseInlineArrayValues(array, header);
|
||||
}
|
||||
else if (header.Fields is not null)
|
||||
{
|
||||
ParseTabularRows(array, header, expectedDepth + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ParseExpandedArrayItems(array, header, expectedDepth + 1);
|
||||
}
|
||||
|
||||
obj[key] = array;
|
||||
}
|
||||
else
|
||||
{
|
||||
var key = ParseKey(keyPart);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(valuePart))
|
||||
{
|
||||
// Nested object
|
||||
_currentLine++;
|
||||
var nestedObj = new JsonObject();
|
||||
ParseObjectFields(nestedObj, expectedDepth + 1);
|
||||
obj[key] = nestedObj;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Primitive value
|
||||
var value = ParsePrimitive(valuePart, _options.Delimiter);
|
||||
obj[key] = value;
|
||||
_currentLine++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseInlineArrayValues(JsonArray array, ArrayHeader header)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(header.InlineValues))
|
||||
return;
|
||||
|
||||
var values = SplitByDelimiter(header.InlineValues, header.Delimiter);
|
||||
|
||||
if (_options.Strict && values.Count != header.Count)
|
||||
{
|
||||
throw new ToonException(
|
||||
$"Array count mismatch: expected {header.Count}, got {values.Count}",
|
||||
_currentLine + 1);
|
||||
}
|
||||
|
||||
foreach (var val in values)
|
||||
{
|
||||
array.Add(ParsePrimitive(val.Trim(), header.Delimiter));
|
||||
}
|
||||
}
|
||||
|
||||
private void ParseTabularRows(JsonArray array, ArrayHeader header, int expectedDepth)
|
||||
{
|
||||
var rowCount = 0;
|
||||
|
||||
while (_currentLine < _lines.Length)
|
||||
{
|
||||
var line = _lines[_currentLine];
|
||||
|
||||
if (IsBlankLine(line))
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Blank lines not allowed inside tabular arrays", _currentLine + 1);
|
||||
_currentLine++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var depth = GetDepth(line, _currentLine + 1);
|
||||
|
||||
if (depth < expectedDepth)
|
||||
break;
|
||||
|
||||
if (depth > expectedDepth)
|
||||
throw new ToonException($"Unexpected indentation in tabular row", _currentLine + 1);
|
||||
|
||||
var trimmed = line.TrimStart();
|
||||
|
||||
// Check if this is a row or a new key-value line
|
||||
if (IsTabularRow(trimmed, header.Delimiter))
|
||||
{
|
||||
var values = SplitByDelimiter(trimmed, header.Delimiter);
|
||||
|
||||
if (_options.Strict && header.Fields != null && values.Count != header.Fields.Count)
|
||||
{
|
||||
throw new ToonException(
|
||||
$"Row width mismatch: expected {header.Fields.Count} values, got {values.Count}",
|
||||
_currentLine + 1);
|
||||
}
|
||||
|
||||
var obj = new JsonObject();
|
||||
if (header.Fields != null)
|
||||
for (int i = 0; i < Math.Min(values.Count, header.Fields.Count); i++)
|
||||
{
|
||||
obj[header.Fields[i]] = ParsePrimitive(values[i].Trim(), header.Delimiter);
|
||||
}
|
||||
|
||||
array.Add(obj);
|
||||
rowCount++;
|
||||
_currentLine++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// End of rows
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (_options.Strict && rowCount != header.Count)
|
||||
{
|
||||
throw new ToonException(
|
||||
$"Tabular array count mismatch: expected {header.Count} rows, got {rowCount}",
|
||||
_currentLine + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsTabularRow(string line, ToonDelimiter delimiter)
|
||||
{
|
||||
var delimiterIndex = FindUnquotedChar(line, ToonHelpers.GetDelimiterChar(delimiter));
|
||||
var colonIndex = FindUnquotedChar(line, ':');
|
||||
|
||||
if (colonIndex < 0)
|
||||
return true; // No colon = row
|
||||
|
||||
if (delimiterIndex < 0)
|
||||
return false; // Colon but no delimiter = key-value
|
||||
|
||||
return delimiterIndex < colonIndex; // Delimiter before colon = row
|
||||
}
|
||||
|
||||
private void ParseExpandedArrayItems(JsonArray array, ArrayHeader header, int expectedDepth)
|
||||
{
|
||||
var itemCount = 0;
|
||||
|
||||
while (_currentLine < _lines.Length)
|
||||
{
|
||||
var line = _lines[_currentLine];
|
||||
|
||||
if (IsBlankLine(line))
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Blank lines not allowed inside arrays", _currentLine + 1);
|
||||
_currentLine++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var depth = GetDepth(line, _currentLine + 1);
|
||||
|
||||
if (depth < expectedDepth)
|
||||
break;
|
||||
|
||||
if (depth > expectedDepth)
|
||||
throw new ToonException($"Unexpected indentation in array item", _currentLine + 1);
|
||||
|
||||
var trimmed = line.TrimStart();
|
||||
|
||||
if (!trimmed.StartsWith("- "))
|
||||
break;
|
||||
|
||||
var itemContent = trimmed.Substring(2);
|
||||
|
||||
// Determine item type
|
||||
if (itemContent.StartsWith('['))
|
||||
{
|
||||
// Inline array item
|
||||
var itemHeader = ParseArrayHeader(itemContent, _currentLine + 1, null);
|
||||
var innerArray = new JsonArray();
|
||||
ParseInlineArrayValues(innerArray, itemHeader);
|
||||
array.Add(innerArray);
|
||||
_currentLine++;
|
||||
}
|
||||
else if (itemContent.Contains(':'))
|
||||
{
|
||||
// Object item
|
||||
var obj = ParseObjectListItem(itemContent, expectedDepth);
|
||||
array.Add(obj);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Primitive item
|
||||
array.Add(ParsePrimitive(itemContent, header.Delimiter));
|
||||
_currentLine++;
|
||||
}
|
||||
|
||||
itemCount++;
|
||||
}
|
||||
|
||||
if (_options.Strict && itemCount != header.Count)
|
||||
{
|
||||
throw new ToonException(
|
||||
$"Array count mismatch: expected {header.Count} items, got {itemCount}",
|
||||
_currentLine + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private JsonObject ParseObjectListItem(string firstFieldLine, int itemDepth)
|
||||
{
|
||||
var obj = new JsonObject();
|
||||
|
||||
// Parse first field from the hyphen line
|
||||
var colonIndex = FindUnquotedChar(firstFieldLine, ':');
|
||||
if (colonIndex < 0)
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Missing colon in object field", _currentLine + 1);
|
||||
_currentLine++;
|
||||
return obj;
|
||||
}
|
||||
|
||||
var keyPart = firstFieldLine.Substring(0, colonIndex);
|
||||
var valuePart = firstFieldLine.Substring(colonIndex + 1).TrimStart();
|
||||
|
||||
var key = ParseKey(keyPart);
|
||||
|
||||
// Check if it's an array
|
||||
if (keyPart.Contains('[') && keyPart.Contains(']'))
|
||||
{
|
||||
var header = ParseArrayHeader(firstFieldLine, _currentLine + 1, key);
|
||||
_currentLine++;
|
||||
|
||||
var array = new JsonArray();
|
||||
|
||||
if (header.IsInline)
|
||||
{
|
||||
ParseInlineArrayValues(array, header);
|
||||
}
|
||||
else if (header.Fields is not null)
|
||||
{
|
||||
ParseTabularRows(array, header, itemDepth + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ParseExpandedArrayItems(array, header, itemDepth + 1);
|
||||
}
|
||||
|
||||
obj[key] = array;
|
||||
}
|
||||
else if (string.IsNullOrWhiteSpace(valuePart))
|
||||
{
|
||||
// Nested object - fields at depth + 2
|
||||
_currentLine++;
|
||||
var nestedObj = new JsonObject();
|
||||
ParseObjectFields(nestedObj, itemDepth + 2);
|
||||
obj[key] = nestedObj;
|
||||
}
|
||||
else
|
||||
{
|
||||
obj[key] = ParsePrimitive(valuePart, _options.Delimiter);
|
||||
_currentLine++;
|
||||
}
|
||||
|
||||
// Parse remaining fields at itemDepth
|
||||
ParseObjectFields(obj, itemDepth);
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
private ArrayHeader ParseArrayHeader(string line, int lineNumber, string? key)
|
||||
{
|
||||
var bracketStart = line.IndexOf('[');
|
||||
var bracketEnd = line.IndexOf(']');
|
||||
|
||||
if (bracketStart < 0 || bracketEnd < 0 || bracketEnd <= bracketStart)
|
||||
throw new ToonException("Invalid array header", lineNumber);
|
||||
|
||||
var bracketContent = line.Substring(bracketStart + 1, bracketEnd - bracketStart - 1);
|
||||
|
||||
// Parse length marker and delimiter
|
||||
var hasLengthMarker = bracketContent.StartsWith('#');
|
||||
if (hasLengthMarker)
|
||||
bracketContent = bracketContent.Substring(1);
|
||||
|
||||
// Detect delimiter
|
||||
var delimiter = ToonDelimiter.Comma;
|
||||
if (bracketContent.EndsWith('\t'))
|
||||
{
|
||||
delimiter = ToonDelimiter.Tab;
|
||||
bracketContent = bracketContent.Substring(0, bracketContent.Length - 1);
|
||||
}
|
||||
else if (bracketContent.EndsWith('|'))
|
||||
{
|
||||
delimiter = ToonDelimiter.Pipe;
|
||||
bracketContent = bracketContent.Substring(0, bracketContent.Length - 1);
|
||||
}
|
||||
|
||||
if (!int.TryParse(bracketContent, out var count))
|
||||
throw new ToonException($"Invalid array length: {bracketContent}", lineNumber);
|
||||
|
||||
// Parse fields if present
|
||||
List<string>? fields = null;
|
||||
var fieldsStart = line.IndexOf('{', bracketEnd);
|
||||
var fieldsEnd = line.IndexOf('}', bracketEnd);
|
||||
|
||||
if (fieldsStart >= 0 && fieldsEnd > fieldsStart)
|
||||
{
|
||||
var fieldsContent = line.Substring(fieldsStart + 1, fieldsEnd - fieldsStart - 1);
|
||||
var fieldNames = SplitByDelimiter(fieldsContent, delimiter);
|
||||
fields = fieldNames.Select(f => ParseKey(f.Trim())).ToList();
|
||||
}
|
||||
|
||||
// Check for inline values
|
||||
var colonIndex = line.IndexOf(':', bracketEnd);
|
||||
if (colonIndex < 0)
|
||||
throw new ToonException("Missing colon after array header", lineNumber);
|
||||
|
||||
var afterColon = line.Substring(colonIndex + 1).TrimStart();
|
||||
var isInline = !string.IsNullOrWhiteSpace(afterColon);
|
||||
|
||||
return new ArrayHeader
|
||||
{
|
||||
Key = key,
|
||||
Count = count,
|
||||
Delimiter = delimiter,
|
||||
Fields = fields,
|
||||
IsInline = isInline,
|
||||
InlineValues = isInline ? afterColon : null,
|
||||
HasLengthMarker = hasLengthMarker
|
||||
};
|
||||
}
|
||||
|
||||
private string ParseKey(string keyText)
|
||||
{
|
||||
var trimmed = keyText.Trim();
|
||||
|
||||
if (trimmed.StartsWith('"') && trimmed.EndsWith('"'))
|
||||
{
|
||||
var content = trimmed.Substring(1, trimmed.Length - 2);
|
||||
return ToonHelpers.Unescape(content);
|
||||
}
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
private JsonValue? ParsePrimitive(string text, ToonDelimiter delimiter)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return JsonValue.Create("");
|
||||
|
||||
var trimmed = text.Trim();
|
||||
|
||||
// Quoted string
|
||||
if (trimmed.StartsWith('"') && trimmed.EndsWith('"') && trimmed.Length >= 2)
|
||||
{
|
||||
var content = trimmed.Substring(1, trimmed.Length - 2);
|
||||
return JsonValue.Create(ToonHelpers.Unescape(content));
|
||||
}
|
||||
|
||||
switch (trimmed)
|
||||
{
|
||||
// Literals
|
||||
case "null":
|
||||
return JsonValue.Create((string?)null);
|
||||
case "true":
|
||||
return JsonValue.Create(true);
|
||||
case "false":
|
||||
return JsonValue.Create(false);
|
||||
}
|
||||
|
||||
// Try parsing as number
|
||||
return TryParseNumber(trimmed, out var number) ? JsonValue.Create(number) :
|
||||
// Otherwise it's an unquoted string
|
||||
JsonValue.Create(trimmed);
|
||||
}
|
||||
|
||||
private bool TryParseNumber(string text, out double number)
|
||||
{
|
||||
// Check for forbidden leading zeros
|
||||
if (text.Length <= 1 || text[0] != '0' || !char.IsDigit(text[1]))
|
||||
return double.TryParse(text, NumberStyles.Float, CultureInfo.InvariantCulture, out number);
|
||||
number = 0;
|
||||
return false; // Treat as string
|
||||
|
||||
}
|
||||
|
||||
private List<string> SplitByDelimiter(string text, ToonDelimiter delimiter)
|
||||
{
|
||||
var result = new List<string>();
|
||||
var current = new System.Text.StringBuilder();
|
||||
var inQuotes = false;
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(delimiter);
|
||||
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
{
|
||||
var c = text[i];
|
||||
|
||||
if (c == '"')
|
||||
{
|
||||
inQuotes = !inQuotes;
|
||||
current.Append(c);
|
||||
}
|
||||
else if (c == '\\' && inQuotes && i + 1 < text.Length)
|
||||
{
|
||||
current.Append(c);
|
||||
current.Append(text[++i]);
|
||||
}
|
||||
else if (c == delimiterChar && !inQuotes)
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
result.Add(current.ToString());
|
||||
return result;
|
||||
}
|
||||
|
||||
private int FindUnquotedChar(string text, char target)
|
||||
{
|
||||
var inQuotes = false;
|
||||
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
{
|
||||
var c = text[i];
|
||||
|
||||
if (c == '"')
|
||||
{
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
else if (c == '\\' && inQuotes && i + 1 < text.Length)
|
||||
{
|
||||
i++; // Skip escaped character
|
||||
}
|
||||
else if (c == target && !inQuotes)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int GetDepth(string line, int lineNumber)
|
||||
{
|
||||
var spaces = 0;
|
||||
|
||||
foreach (var c in line)
|
||||
{
|
||||
if (c == ' ')
|
||||
{
|
||||
spaces++;
|
||||
}
|
||||
else if (c == '\t')
|
||||
{
|
||||
if (_options.Strict)
|
||||
throw new ToonException("Tabs are not allowed in indentation", lineNumber);
|
||||
spaces += _options.IndentSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (_options.Strict && spaces % _options.IndentSize != 0)
|
||||
{
|
||||
throw new ToonException(
|
||||
$"Indentation must be an exact multiple of {_options.IndentSize} spaces",
|
||||
lineNumber);
|
||||
}
|
||||
|
||||
return spaces / _options.IndentSize;
|
||||
}
|
||||
|
||||
private string GetCurrentLine()
|
||||
{
|
||||
return _currentLine < _lines.Length ? _lines[_currentLine] : string.Empty;
|
||||
}
|
||||
|
||||
private record ArrayHeader
|
||||
{
|
||||
public required string? Key { get; init; }
|
||||
public required int Count { get; init; }
|
||||
public required ToonDelimiter Delimiter { get; init; }
|
||||
public required List<string>? Fields { get; init; }
|
||||
public required bool IsInline { get; init; }
|
||||
public required string? InlineValues { get; init; }
|
||||
public required bool HasLengthMarker { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Provides functionality to serialize objects to TOON format and deserialize TOON data to objects.
|
||||
/// </summary>
|
||||
public static class ToonSerializer
|
||||
{
|
||||
/// <summary>
|
||||
/// Converts the provided value to a TOON string.
|
||||
/// </summary>
|
||||
/// <typeparam name="TValue">The type of the value to serialize.</typeparam>
|
||||
/// <param name="value">The value to convert.</param>
|
||||
/// <param name="options">Options to control serialization behavior.</param>
|
||||
/// <returns>A TOON string representation of the value.</returns>
|
||||
public static string Serialize<TValue>(TValue value, ToonSerializerOptions? options = null)
|
||||
{
|
||||
options ??= ToonSerializerOptions.Default;
|
||||
|
||||
// Convert to JsonNode first for normalization
|
||||
var jsonNode = JsonSerializer.SerializeToNode(value);
|
||||
|
||||
var writer = new ToonWriter(options);
|
||||
return writer.Write(jsonNode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts the provided value to a TOON string asynchronously.
|
||||
/// </summary>
|
||||
/// <typeparam name="TValue">The type of the value to serialize.</typeparam>
|
||||
/// <param name="stream">The UTF-8 stream to write the TOON data to.</param>
|
||||
/// <param name="value">The value to convert.</param>
|
||||
/// <param name="options">Options to control serialization behavior.</param>
|
||||
/// <param name="cancellationToken">A cancellation token to observe.</param>
|
||||
/// <returns>A task representing the asynchronous operation.</returns>
|
||||
public static async Task SerializeAsync<TValue>(
|
||||
Stream stream,
|
||||
TValue value,
|
||||
ToonSerializerOptions? options = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(stream);
|
||||
|
||||
var toonString = Serialize(value, options);
|
||||
var writer = new StreamWriter(stream, leaveOpen: true);
|
||||
await writer.WriteAsync(toonString.AsMemory(), cancellationToken).ConfigureAwait(false);
|
||||
await writer.FlushAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the TOON string and returns the result as a <see cref="JsonNode"/>.
|
||||
/// </summary>
|
||||
/// <param name="toon">The TOON string to parse.</param>
|
||||
/// <param name="options">Options to control deserialization behavior.</param>
|
||||
/// <returns>A <see cref="JsonNode"/> representation of the TOON data.</returns>
|
||||
public static JsonNode? Deserialize(string toon, ToonSerializerOptions? options = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(toon);
|
||||
|
||||
options ??= ToonSerializerOptions.Default;
|
||||
var reader = new ToonReader(options);
|
||||
return reader.Read(toon);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the TOON string and returns a value of the type specified by a generic type parameter.
|
||||
/// </summary>
|
||||
/// <typeparam name="TValue">The target type to deserialize to.</typeparam>
|
||||
/// <param name="toon">The TOON string to parse.</param>
|
||||
/// <param name="options">Options to control deserialization behavior.</param>
|
||||
/// <returns>A <typeparamref name="TValue"/> representation of the TOON data.</returns>
|
||||
public static TValue? Deserialize<TValue>(string toon, ToonSerializerOptions? options = null)
|
||||
{
|
||||
var jsonNode = Deserialize(toon, options);
|
||||
return jsonNode is null ? default : jsonNode.Deserialize<TValue>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads the UTF-8 encoded stream and returns a value of the type specified by a generic type parameter.
|
||||
/// </summary>
|
||||
/// <typeparam name="TValue">The target type to deserialize to.</typeparam>
|
||||
/// <param name="stream">The UTF-8 stream to read the TOON data from.</param>
|
||||
/// <param name="options">Options to control deserialization behavior.</param>
|
||||
/// <param name="cancellationToken">A cancellation token to observe.</param>
|
||||
/// <returns>A task representing the asynchronous operation with a <typeparamref name="TValue"/> representation of the TOON data.</returns>
|
||||
public static async Task<TValue?> DeserializeAsync<TValue>(
|
||||
Stream stream,
|
||||
ToonSerializerOptions? options = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(stream);
|
||||
|
||||
using var reader = new StreamReader(stream, leaveOpen: true);
|
||||
var toonString = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
return Deserialize<TValue>(toonString, options);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to parse the TOON string and returns a value that indicates whether the operation succeeded.
|
||||
/// </summary>
|
||||
/// <typeparam name="TValue">The target type to deserialize to.</typeparam>
|
||||
/// <param name="toon">The TOON string to parse.</param>
|
||||
/// <param name="result">When this method returns, contains the parsed value.</param>
|
||||
/// <param name="options">Options to control deserialization behavior.</param>
|
||||
/// <returns><see langword="true"/> if the TOON string was converted successfully; otherwise, <see langword="false"/>.</returns>
|
||||
public static bool TryDeserialize<TValue>(
|
||||
string toon,
|
||||
[NotNullWhen(true)] out TValue? result,
|
||||
ToonSerializerOptions? options = null)
|
||||
{
|
||||
try
|
||||
{
|
||||
result = Deserialize<TValue>(toon, options);
|
||||
return result is not null;
|
||||
}
|
||||
catch
|
||||
{
|
||||
result = default;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Provides options for controlling TOON serialization and deserialization behavior.
|
||||
/// </summary>
|
||||
public sealed class ToonSerializerOptions
|
||||
{
|
||||
private int _indentSize = 2;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the number of spaces per indentation level.
|
||||
/// Default is 2 spaces per level.
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentOutOfRangeException">
|
||||
/// Thrown when the value is less than 1 or greater than 8.
|
||||
/// </exception>
|
||||
public int IndentSize
|
||||
{
|
||||
get => _indentSize;
|
||||
set
|
||||
{
|
||||
if (value < 1 || value > 8)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(value), "IndentSize must be between 1 and 8.");
|
||||
}
|
||||
_indentSize = value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the delimiter character used for array values and tabular fields.
|
||||
/// Default is <see cref="ToonDelimiter.Comma"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This is the document delimiter used outside of any array scope.
|
||||
/// Individual arrays can override this with their own header declarations.
|
||||
/// </remarks>
|
||||
public ToonDelimiter Delimiter { get; set; } = ToonDelimiter.Comma;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to include the length marker ("#") in array headers.
|
||||
/// Default is false (length marker omitted).
|
||||
/// </summary>
|
||||
public bool UseLengthMarker { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to enable strict mode during deserialization.
|
||||
/// Default is true.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When enabled, the parser enforces:
|
||||
/// <list type="bullet">
|
||||
/// <item>Array count and tabular row width must match declared lengths</item>
|
||||
/// <item>Indentation must be exact multiples of <see cref="IndentSize"/></item>
|
||||
/// <item>Tabs cannot be used for indentation</item>
|
||||
/// <item>Invalid escape sequences cause errors</item>
|
||||
/// <item>Missing colons after keys cause errors</item>
|
||||
/// <item>Blank lines inside arrays/tabular rows cause errors</item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public bool Strict { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the default options with standard settings.
|
||||
/// </summary>
|
||||
public static ToonSerializerOptions Default { get; } = new();
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<LangVersion>12</LangVersion>
|
||||
|
||||
<!-- Package Information -->
|
||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||
<Version>1.0.0</Version>
|
||||
<Authors>0xZunia</Authors>
|
||||
<AssemblyVersion>1.0.0</AssemblyVersion>
|
||||
<FileVersion>1.0.0</FileVersion>
|
||||
<NeutralLanguage>en</NeutralLanguage>
|
||||
<PackageId>ToonSharp</PackageId>
|
||||
<Description>A high-performance .NET 9 library for the TOON data serialization format - human-readable, line-oriented data format optimized for LLM contexts.</Description>
|
||||
<PackageTags>toon;serialization;data-format;json;parser;encoder;decoder;tabular;llm</PackageTags>
|
||||
<PackageLicenseExpression>MIT</PackageLicenseExpression>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
|
||||
<!-- Documentation -->
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<IncludeSymbols>true</IncludeSymbols>
|
||||
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
|
||||
|
||||
<!-- Performance -->
|
||||
<TieredCompilation>true</TieredCompilation>
|
||||
<TieredCompilationQuickJit>true</TieredCompilationQuickJit>
|
||||
<PackageProjectUrl>https://github.com/0xZunia/ToonSharp</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/0xZunia/ToonSharp</RepositoryUrl>
|
||||
<RepositoryType>git</RepositoryType>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="../README.md" Pack="true" PackagePath="/" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,640 @@
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
|
||||
namespace ToonSharp;
|
||||
|
||||
/// <summary>
|
||||
/// Writes TOON-formatted output from JsonNode structures.
|
||||
/// </summary>
|
||||
internal sealed class ToonWriter
|
||||
{
|
||||
private readonly ToonSerializerOptions _options;
|
||||
private readonly StringBuilder _sb;
|
||||
|
||||
public ToonWriter(ToonSerializerOptions options)
|
||||
{
|
||||
_options = options;
|
||||
_sb = new StringBuilder();
|
||||
}
|
||||
|
||||
public string Write(JsonNode? node)
|
||||
{
|
||||
_sb.Clear();
|
||||
|
||||
if (node is null)
|
||||
{
|
||||
_sb.Append("null");
|
||||
return _sb.ToString();
|
||||
}
|
||||
|
||||
WriteValue(node, 0, _options.Delimiter, isRoot: true);
|
||||
|
||||
// Spec: No trailing newline at end of document
|
||||
return _sb.ToString();
|
||||
}
|
||||
|
||||
private void WriteValue(JsonNode node, int depth, ToonDelimiter activeDelimiter, bool isRoot = false, string? forcedKey = null)
|
||||
{
|
||||
switch (node)
|
||||
{
|
||||
case JsonObject obj:
|
||||
if (isRoot)
|
||||
{
|
||||
WriteObject(obj, depth, activeDelimiter);
|
||||
}
|
||||
else if (forcedKey is not null)
|
||||
{
|
||||
WriteKey(forcedKey, depth);
|
||||
if (obj.Count == 0)
|
||||
{
|
||||
// Empty object
|
||||
_sb.Append(':');
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.AppendLine(":");
|
||||
WriteObject(obj, depth + 1, activeDelimiter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteObject(obj, depth, activeDelimiter);
|
||||
}
|
||||
break;
|
||||
|
||||
case JsonArray arr:
|
||||
if (isRoot)
|
||||
{
|
||||
WriteRootArray(arr, depth, activeDelimiter);
|
||||
}
|
||||
else if (forcedKey is not null)
|
||||
{
|
||||
WriteArrayWithKey(forcedKey, arr, depth, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new ToonException("Array without key in non-root context");
|
||||
}
|
||||
break;
|
||||
|
||||
case JsonValue val:
|
||||
if (forcedKey is not null)
|
||||
{
|
||||
WriteKey(forcedKey, depth);
|
||||
_sb.Append(": ");
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else if (isRoot)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteObject(JsonObject obj, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (obj.Count == 0 && depth == 0)
|
||||
{
|
||||
// Empty root object = empty document
|
||||
return;
|
||||
}
|
||||
|
||||
var first = true;
|
||||
foreach (var kvp in obj)
|
||||
{
|
||||
if (!first)
|
||||
_sb.AppendLine();
|
||||
first = false;
|
||||
|
||||
var key = kvp.Key;
|
||||
var value = kvp.Value;
|
||||
|
||||
if (value is null)
|
||||
{
|
||||
WriteKey(key, depth);
|
||||
_sb.Append(": null");
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteValue(value, depth, activeDelimiter, isRoot: false, forcedKey: key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteRootArray(JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
WriteArrayInternal(null, array, depth, activeDelimiter);
|
||||
}
|
||||
|
||||
private void WriteArrayWithKey(string key, JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
WriteArrayInternal(key, array, depth, activeDelimiter);
|
||||
}
|
||||
|
||||
private void WriteArrayInternal(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
// Determine array format: inline primitives, tabular, or expanded list
|
||||
if (array.Count == 0)
|
||||
{
|
||||
WriteEmptyArray(key, depth, activeDelimiter);
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsPrimitiveArray(array))
|
||||
{
|
||||
WriteInlineArray(key, array, depth, activeDelimiter);
|
||||
}
|
||||
else if (IsTabularArray(array, out var fields))
|
||||
{
|
||||
if (fields != null) WriteTabularArray(key, array, fields, depth, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteExpandedArray(key, array, depth, activeDelimiter);
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteEmptyArray(string? key, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (key is not null)
|
||||
WriteKey(key, depth);
|
||||
|
||||
_sb.Append('[');
|
||||
if (_options.UseLengthMarker)
|
||||
_sb.Append('#');
|
||||
_sb.Append('0');
|
||||
_sb.Append(ToonHelpers.GetDelimiterString(activeDelimiter));
|
||||
_sb.Append("]:");
|
||||
}
|
||||
|
||||
private bool IsPrimitiveArray(JsonArray array)
|
||||
{
|
||||
foreach (var item in array)
|
||||
{
|
||||
if (item is not JsonValue)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private bool IsTabularArray(JsonArray array, out List<string>? fields)
|
||||
{
|
||||
fields = null;
|
||||
|
||||
// All elements must be objects
|
||||
if (array.Count == 0)
|
||||
return false;
|
||||
|
||||
var firstKeys = new List<string>();
|
||||
JsonObject? firstObj = null;
|
||||
|
||||
foreach (var item in array)
|
||||
{
|
||||
if (item is not JsonObject obj)
|
||||
return false;
|
||||
|
||||
if (firstObj is null)
|
||||
{
|
||||
firstObj = obj;
|
||||
firstKeys.AddRange(obj.Select(kvp => kvp.Key));
|
||||
}
|
||||
else
|
||||
{
|
||||
// All objects must have the same keys
|
||||
if (obj.Count != firstKeys.Count)
|
||||
return false;
|
||||
|
||||
foreach (var key in firstKeys)
|
||||
{
|
||||
if (!obj.ContainsKey(key))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All values must be primitives
|
||||
foreach (var kvp in obj)
|
||||
{
|
||||
if (kvp.Value is not JsonValue)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fields = firstKeys;
|
||||
return firstKeys.Count > 0;
|
||||
}
|
||||
|
||||
private void WriteInlineArray(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (key is not null)
|
||||
WriteKey(key, depth);
|
||||
|
||||
WriteArrayHeader(array.Count, activeDelimiter, fields: null);
|
||||
_sb.Append(": ");
|
||||
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter);
|
||||
|
||||
for (int i = 0; i < array.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
_sb.Append(delimiterChar);
|
||||
|
||||
var item = array[i];
|
||||
if (item is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteTabularArray(string? key, JsonArray array, List<string> fields, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (key is not null)
|
||||
WriteKey(key, depth);
|
||||
|
||||
WriteArrayHeader(array.Count, activeDelimiter, fields);
|
||||
_sb.AppendLine(":");
|
||||
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter);
|
||||
|
||||
foreach (var item in array)
|
||||
{
|
||||
if (item is not JsonObject obj)
|
||||
continue;
|
||||
|
||||
WriteIndent(depth + 1);
|
||||
|
||||
for (int i = 0; i < fields.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
_sb.Append(delimiterChar);
|
||||
|
||||
var fieldValue = obj[fields[i]];
|
||||
if (fieldValue is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.Append("null");
|
||||
}
|
||||
}
|
||||
|
||||
if (item != array[^1])
|
||||
_sb.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteExpandedArray(string? key, JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (key is not null)
|
||||
WriteKey(key, depth);
|
||||
|
||||
WriteArrayHeader(array.Count, activeDelimiter, fields: null);
|
||||
_sb.AppendLine(":");
|
||||
|
||||
for (int i = 0; i < array.Count; i++)
|
||||
{
|
||||
var item = array[i];
|
||||
WriteIndent(depth + 1);
|
||||
_sb.Append("- ");
|
||||
|
||||
if (item is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else if (item is JsonArray innerArray)
|
||||
{
|
||||
WriteInlineArray(null, innerArray, 0, activeDelimiter);
|
||||
}
|
||||
else if (item is JsonObject obj)
|
||||
{
|
||||
WriteObjectAsListItem(obj, depth + 1, activeDelimiter);
|
||||
}
|
||||
|
||||
if (i < array.Count - 1)
|
||||
_sb.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteObjectAsListItem(JsonObject obj, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (obj.Count == 0)
|
||||
{
|
||||
// Empty object is just "-"
|
||||
return;
|
||||
}
|
||||
|
||||
// First field on the hyphen line (no indentation, already on "- " line)
|
||||
var first = true;
|
||||
foreach (var kvp in obj)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
var key = kvp.Key;
|
||||
var value = kvp.Value;
|
||||
|
||||
// Write key without indentation
|
||||
WriteKeyUnquoted(key);
|
||||
|
||||
if (value is null)
|
||||
{
|
||||
_sb.Append(": null");
|
||||
}
|
||||
else if (value is JsonValue val)
|
||||
{
|
||||
_sb.Append(": ");
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else if (value is JsonArray arr)
|
||||
{
|
||||
WriteArrayHeader(arr.Count, activeDelimiter, fields: null);
|
||||
if (IsPrimitiveArray(arr))
|
||||
{
|
||||
_sb.Append(": ");
|
||||
WriteInlineArrayValues(arr, activeDelimiter);
|
||||
}
|
||||
else if (IsTabularArray(arr, out var fields))
|
||||
{
|
||||
WriteArrayHeader(arr.Count, activeDelimiter, fields);
|
||||
_sb.AppendLine(":");
|
||||
if (fields != null) WriteTabularArrayRows(arr, fields, depth, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.AppendLine(":");
|
||||
WriteExpandedArrayValues(arr, depth, activeDelimiter);
|
||||
}
|
||||
}
|
||||
else if (value is JsonObject nestedObj)
|
||||
{
|
||||
_sb.AppendLine(":");
|
||||
WriteObject(nestedObj, depth + 2, activeDelimiter); // +2 for nested object in list item
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.AppendLine();
|
||||
var key = kvp.Key;
|
||||
var value = kvp.Value;
|
||||
|
||||
if (value is null)
|
||||
{
|
||||
WriteKey(key, depth);
|
||||
_sb.Append(": null");
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteValue(value, depth, activeDelimiter, isRoot: false, forcedKey: key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteInlineArrayValues(JsonArray array, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter);
|
||||
for (int i = 0; i < array.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
_sb.Append(delimiterChar);
|
||||
|
||||
if (array[i] is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteTabularArrayRows(JsonArray array, List<string> fields, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(activeDelimiter);
|
||||
|
||||
foreach (var item in array)
|
||||
{
|
||||
if (item is not JsonObject obj)
|
||||
continue;
|
||||
|
||||
WriteIndent(depth + 1);
|
||||
|
||||
for (int i = 0; i < fields.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
_sb.Append(delimiterChar);
|
||||
|
||||
var fieldValue = obj[fields[i]];
|
||||
if (fieldValue is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.Append("null");
|
||||
}
|
||||
}
|
||||
|
||||
if (item != array[^1])
|
||||
_sb.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteExpandedArrayValues(JsonArray array, int depth, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
for (int i = 0; i < array.Count; i++)
|
||||
{
|
||||
var item = array[i];
|
||||
WriteIndent(depth + 1);
|
||||
_sb.Append("- ");
|
||||
|
||||
if (item is JsonValue val)
|
||||
{
|
||||
WritePrimitive(val, activeDelimiter);
|
||||
}
|
||||
else if (item is JsonArray innerArray)
|
||||
{
|
||||
WriteInlineArray(null, innerArray, 0, activeDelimiter);
|
||||
}
|
||||
else if (item is JsonObject obj)
|
||||
{
|
||||
WriteObjectAsListItem(obj, depth + 1, activeDelimiter);
|
||||
}
|
||||
|
||||
if (i < array.Count - 1)
|
||||
_sb.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteArrayHeader(int count, ToonDelimiter delimiter, List<string>? fields)
|
||||
{
|
||||
_sb.Append('[');
|
||||
|
||||
if (_options.UseLengthMarker)
|
||||
_sb.Append('#');
|
||||
|
||||
_sb.Append(count);
|
||||
_sb.Append(ToonHelpers.GetDelimiterString(delimiter));
|
||||
_sb.Append(']');
|
||||
|
||||
if (fields is not null && fields.Count > 0)
|
||||
{
|
||||
_sb.Append('{');
|
||||
var delimiterChar = ToonHelpers.GetDelimiterChar(delimiter);
|
||||
|
||||
for (int i = 0; i < fields.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
_sb.Append(delimiterChar);
|
||||
|
||||
WriteKeyUnquoted(fields[i]);
|
||||
}
|
||||
|
||||
_sb.Append('}');
|
||||
}
|
||||
}
|
||||
|
||||
private void WritePrimitive(JsonValue value, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
var obj = value.GetValue<object>();
|
||||
|
||||
switch (obj)
|
||||
{
|
||||
case null:
|
||||
_sb.Append("null");
|
||||
break;
|
||||
|
||||
case bool b:
|
||||
_sb.Append(b ? "true" : "false");
|
||||
break;
|
||||
|
||||
case string s:
|
||||
WriteString(s, activeDelimiter);
|
||||
break;
|
||||
|
||||
case JsonElement elem:
|
||||
WritePrimitiveFromElement(elem, activeDelimiter);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Numbers
|
||||
if (obj is byte or sbyte or short or ushort or int or uint or long or ulong or float or double or decimal)
|
||||
{
|
||||
WriteNumber(obj);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteString(obj.ToString() ?? "null", activeDelimiter);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void WritePrimitiveFromElement(JsonElement elem, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
switch (elem.ValueKind)
|
||||
{
|
||||
case JsonValueKind.String:
|
||||
WriteString(elem.GetString() ?? "", activeDelimiter);
|
||||
break;
|
||||
case JsonValueKind.Number:
|
||||
WriteNumber(elem.GetDouble());
|
||||
break;
|
||||
case JsonValueKind.True:
|
||||
_sb.Append("true");
|
||||
break;
|
||||
case JsonValueKind.False:
|
||||
_sb.Append("false");
|
||||
break;
|
||||
case JsonValueKind.Null:
|
||||
case JsonValueKind.Undefined:
|
||||
_sb.Append("null");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteNumber(object num)
|
||||
{
|
||||
// Spec: Numbers must be rendered without scientific notation, -0 → 0
|
||||
var str = num switch
|
||||
{
|
||||
float f => NormalizeNumber(f),
|
||||
double d => NormalizeNumber(d),
|
||||
decimal m => m.ToString("0.##################", CultureInfo.InvariantCulture),
|
||||
_ => Convert.ToString(num, CultureInfo.InvariantCulture) ?? "0"
|
||||
};
|
||||
|
||||
_sb.Append(str);
|
||||
}
|
||||
|
||||
private string NormalizeNumber(double d)
|
||||
{
|
||||
// Handle -0
|
||||
if (d == 0)
|
||||
return "0";
|
||||
|
||||
// Handle NaN and Infinity (should be null per spec)
|
||||
if (double.IsNaN(d) || double.IsInfinity(d))
|
||||
return "null";
|
||||
|
||||
// Format without scientific notation
|
||||
return d.ToString("0.##################", CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
private void WriteString(string str, ToonDelimiter activeDelimiter)
|
||||
{
|
||||
if (ToonHelpers.RequiresQuoting(str, activeDelimiter))
|
||||
{
|
||||
_sb.Append('"');
|
||||
_sb.Append(ToonHelpers.Escape(str));
|
||||
_sb.Append('"');
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.Append(str);
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteKey(string key, int depth)
|
||||
{
|
||||
WriteIndent(depth);
|
||||
|
||||
if (ToonHelpers.IsValidUnquotedKey(key))
|
||||
{
|
||||
_sb.Append(key);
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.Append('"');
|
||||
_sb.Append(ToonHelpers.Escape(key));
|
||||
_sb.Append('"');
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteKeyUnquoted(string key)
|
||||
{
|
||||
if (ToonHelpers.IsValidUnquotedKey(key))
|
||||
{
|
||||
_sb.Append(key);
|
||||
}
|
||||
else
|
||||
{
|
||||
_sb.Append('"');
|
||||
_sb.Append(ToonHelpers.Escape(key));
|
||||
_sb.Append('"');
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteIndent(int depth)
|
||||
{
|
||||
if (depth > 0)
|
||||
{
|
||||
_sb.Append(ToonHelpers.GetIndentation(depth, _options.IndentSize));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user