Improved normalization algorithm

This commit is contained in:
Tony Bark 2024-04-09 09:26:59 -04:00
parent 84624111ea
commit d1c589e2c0
4 changed files with 105 additions and 126 deletions

View file

@ -8,107 +8,79 @@ namespace CSTNet;
public static class CST public static class CST
{ {
const char CARET = '^'; const char CARET = '^';
const string LF = "\u000A"; const string LF = "\u000A";
const string CR = "\u000D"; const string CR = "\u000D";
const string CRLF = "\u000D\u000A"; const string CRLF = "\u000D\u000A";
const string LS = "\u2028"; const string LS = "\u2028";
/// <summary> /// <summary>
/// Gets the value from the digit-based key. /// Gets the value from the digit-based key.
/// </summary> /// </summary>
/// <returns>Returns the entry</returns> /// <returns>Returns the entry</returns>
public static string Parse(string content, int key) => Parse(content, key.ToString()); public static string Parse(string content, int key) => Parse(content, key.ToString());
/// <summary> /// <summary>
/// Gets the value from the string-based key. /// Gets the value from the string-based key.
/// </summary> /// </summary>
/// <returns>Returns the entry</returns> /// <returns>Returns the entry</returns>
public static string Parse(string content, string key) public static string Parse(string content, string key)
{ {
var entries = NormalizeEntries(content); var entries = NormalizeEntries(content);
return GetEntry(entries, key); return GetEntry(entries, key);
} }
#if (NET8_0 && DEBUG) #if (NET8_0 && DEBUG)
[UnmanagedCallersOnly(EntryPoint = "parse")] [UnmanagedCallersOnly(EntryPoint = "parse")]
public static IntPtr Parse(IntPtr content, IntPtr key) public static IntPtr Parse(IntPtr content, IntPtr key)
{ {
// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key)); // => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content)); var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key))); return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
} }
#endif #endif
/// <summary>
/// Normalizes the content by replacing various newline characters with Environment.NewLine and filters out comments.
/// </summary>
/// <param name="content">The content to normalize.</param>
/// <returns>An enumerable of normalized lines.</returns>
public static IEnumerable<string> NormalizeEntries(string content)
{
var newLines = new[] { LF, CR, CRLF, LS };
/// <summary> content = newLines.Aggregate(content, (current, nl) => current.Replace(nl, Environment.NewLine));
/// Replaces the document's line endings with the native system line endings.
/// </summary>
/// <remarks>This stage ensures there are no crashes during parsing.</remarks>
/// <param name="content">The content of the document.</param>
/// <returns>The document's content with native system line endings.</returns>
static IEnumerable<string> NormalizeEntries(string content)
{
// Check if the document already uses native system line endings.
if (!content.Contains(Environment.NewLine))
{
// If not, check for and replace other line ending types.
if (content.Contains(LF))
content = content.Replace(LF,
Environment.NewLine);
if (content.Contains(CR)) return content.Split($"{CARET}{Environment.NewLine}", StringSplitOptions.RemoveEmptyEntries)
content = content.Replace(CR, .Where(line => !line.StartsWith("//") && !line.StartsWith('#') && !line.StartsWith("/*") && !line.EndsWith("*/"));
Environment.NewLine); }
if (content.Contains(CRLF)) /// <summary>
content = content.Replace(CRLF, /// Retrieves the value for the specified key from the given entries.
Environment.NewLine); /// </summary>
/// <param name="entries">The entries to search through.</param>
/// <param name="key">The key to search for.</param>
/// <returns>The value for the specified key, or a default string if not found.</returns>
static string GetEntry(IEnumerable<string> entries, string key)
{
// Iterate through the entries.
foreach (var entry in entries)
{
// If the line doesn't start with the key, keep searching.
if (!entry.StartsWith(key))
continue;
if (content.Contains(LS)) // Locate the index of the caret character.
content = content.Replace(LS, var startIndex = entry.IndexOf(CARET);
Environment.NewLine); // Get the line from the caret character to the end of the string.
} var line = entry[startIndex..];
// Split the content by the caret and newline characters. // Return the line with the caret characters trimmed.
var lines = content.Split(new[] { $"{CARET}{Environment.NewLine}" }, return line.TrimStart(CARET).TrimEnd(CARET);
StringSplitOptions.RemoveEmptyEntries); }
// Filter out any lines that start with "//", "#", "/*", or end with "*/". // If no entry is found, return a default string.
return lines.Where(line => return "***MISSING***";
!line.StartsWith("//") && }
!line.StartsWith("#") &&
!line.StartsWith("/*") &&
!line.EndsWith("*/"))
.AsEnumerable();
}
/// <summary>
/// Retrieves the value for the specified key from the given entries.
/// </summary>
/// <param name="entries">The entries to search through.</param>
/// <param name="key">The key to search for.</param>
/// <returns>The value for the specified key, or a default string if not found.</returns>
static string GetEntry(IEnumerable<string> entries, string key)
{
// Iterate through the entries.
foreach (var entry in entries)
{
// If the line doesn't start with the key, keep searching.
if (!entry.StartsWith(key))
continue;
// Locate the index of the caret character.
var startIndex = entry.IndexOf(CARET);
// Get the line from the caret character to the end of the string.
var line = entry[startIndex..];
// Return the line with the caret characters trimmed.
return line.TrimStart(CARET).TrimEnd(CARET);
}
// If no entry is found, return a default string.
return "***MISSING***";
}
} }

View file

@ -1,25 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks> <TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<Version>2.1.100</Version> <Version>2.1.101-alpha</Version>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Authors>Tony Bark</Authors> <Authors>Tony Bark</Authors>
<PackageDescription> <PackageDescription>
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
as keys and the value as text enclosed between carets. ([key] ^[value]^) as keys and the value as text enclosed between carets. ([key] ^[value]^)
CSTNet provides you the framework for parsing the CST format. CSTNet provides you the framework for parsing the CST format.
</PackageDescription> </PackageDescription>
<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl> <RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression> <PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
</PropertyGroup> </PropertyGroup>
<!-- Support AOT on .NET 8+ --> <!-- Support AOT on .NET 8+ -->
<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' "> <PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
<IsAotCompatible>true</IsAotCompatible> <IsAotCompatible>true</IsAotCompatible>
</PropertyGroup> </PropertyGroup>
</Project> </Project>

View file

@ -4,10 +4,10 @@ namespace CSTNet;
public class UIText : IUIText public class UIText : IUIText
{ {
/// <summary> /// <summary>
/// The language of the text. /// The language of the text.
/// </summary> /// </summary>
string Language { get; set; } = "english"; string Language { get; set; } = "english";
/// <summary> /// <summary>
/// The base directory for the language files. /// The base directory for the language files.

View file

@ -1,17 +1,10 @@
# CST.NET # [CST.NET](http://CST.NET)
<p align="center"> &lt;p align="center"&gt;&lt;a href="https://github.com/tonytins/cstdotnet/blob/main/LICENSE"&gt;&lt;img src="https://img.shields.io/github/license/tonytins/cstdotnet" alt="GitHub license"&gt;&lt;/a&gt; &lt;a href="https://github.com/tonytins/cstdotnet/actions?query=workflow%3Abuild.yml"&gt;&lt;img src="https://img.shields.io/github/actions/workflow/status/tonytins/cstdotnet/build.yml" alt="GitHub Workflow Status"&gt;&lt;/a&gt; &lt;img src="https://img.shields.io/github/commit-activity/w/tonytins/cstdotnet" alt="GitHub commit activity"&gt; &lt;a href="code_of_conduct.md"&gt;&lt;/br&gt; &lt;img src="https://img.shields.io/codeclimate/maintainability-percentage/tonytins/cstdotnet" alt="Code Climate maintainability"&gt; &lt;img src="https://img.shields.io/nuget/dt/CSTNet" alt="NuGet Downloads"&gt; &lt;a href="https://www.nuget.org/packages/tonybark.updatetools"&gt;&lt;img src="https://img.shields.io/nuget/v/cstnet.svg" /&gt;&lt;/a&gt;&lt;/br&gt;&lt;img src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg" alt="Contributor Covenant"&gt;&lt;/a&gt;&lt;/br&gt; &lt;/p&gt;
<a href="https://github.com/tonytins/cstdotnet/blob/main/LICENSE"><img src="https://img.shields.io/github/license/tonytins/cstdotnet" alt="GitHub license"></a>
<a href="https://github.com/tonytins/cstdotnet/actions?query=workflow%3Abuild.yml"><img src="https://img.shields.io/github/actions/workflow/status/tonytins/cstdotnet/build.yml" alt="GitHub Workflow Status"></a>
<img src="https://img.shields.io/github/commit-activity/w/tonytins/cstdotnet" alt="GitHub commit activity">
<a href="code_of_conduct.md"></br>
<img src="https://img.shields.io/codeclimate/maintainability-percentage/tonytins/cstdotnet" alt="Code Climate maintainability">
<img src="https://img.shields.io/nuget/dt/CSTNet" alt="NuGet Downloads"> <a href="https://www.nuget.org/packages/tonybark.updatetools"><img src="https://img.shields.io/nuget/v/cstnet.svg" /></a></br><img src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg" alt="Contributor Covenant"></a></br>
</p>
CST.NET is a library for parsing Maxis' key-value pair format. It can be used in conjunction with your own custom frameworks, or the original ``UIText`` APIs. CST.NET is a library for parsing Maxis' key-value pair format. It can be used in conjunction with your own custom frameworks, or the original `UIText` APIs.
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words as keys and the value as text enclosed between carets. (e.g. ``<key> ^<text>^``) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character. Caret-Separated Text (or CST) is a key-value pair format represented by digits or words as keys and the value as text enclosed between carets. (e.g. `<key> ^<text>^`) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character.
## Changelog ## Changelog
@ -23,7 +16,7 @@ See [/docs](./doc/README.md).
## To-do ## To-do
- [ ] Support for parameters (e.g. ``%1``) - [ ] Support for parameters (e.g. `%1`)
## Known issues ## Known issues
@ -38,6 +31,20 @@ See [/docs](./doc/README.md).
- [.NET Interactive](https://github.com/dotnet/interactive/blob/main/README.md) for notebooks (optional). - [.NET Interactive](https://github.com/dotnet/interactive/blob/main/README.md) for notebooks (optional).
- [VSCode Extension](https://marketplace.visualstudio.com/items?itemName=ms-dotnettools.dotnet-interactive-vscode) or [nteract](https://nteract.io/). - [VSCode Extension](https://marketplace.visualstudio.com/items?itemName=ms-dotnettools.dotnet-interactive-vscode) or [nteract](https://nteract.io/).
# Contributing
You can contribute to CST.NET by testing cutting edge features in the latest releases, filing bugs, and joining in the discussion on our forums!
* [Getting Started](https://github.com/tonytins/cstdotnet/wiki)
* [Project Structure](https://github.com/tonytins/cstdotnet/wiki/Project-structure)
* [Coding Standards](https://github.com/tonytins/cstdotnet/wiki/Coding-standards)
* [Pull Requests](https://github.com/tonytins/cstdotnet/pulls): [Open](https://github.com/tonytins/cstdotnet/pulls)/[Closed](https://github.com/tonytins/cstdotnet/issues?q=is%3Apr+is%3Aclosed)
Looking for something to do? Check out the issues tagged as [help wanted](https://github.com/tonytins/cstdotnet/labels/help%20wanted) to get started.
Regarding translations, full object and UI translations should currently be released on the forums. This is far from perfect and is due to be reworked. Stay tuned!
## License ## License
I license this project under the BSD-3-Clause license - see [LICENSE](LICENSE) for details. I license this project under the BSD-3-Clause license - see [LICENSE](LICENSE) for details.