Improved normalization algorithm

This commit is contained in:
Tony Bark 2024-04-09 09:26:59 -04:00
parent 84624111ea
commit d1c589e2c0
4 changed files with 105 additions and 126 deletions

View file

@ -8,107 +8,79 @@ namespace CSTNet;
public static class CST
{
const char CARET = '^';
const string LF = "\u000A";
const string CR = "\u000D";
const string CRLF = "\u000D\u000A";
const string LS = "\u2028";
const char CARET = '^';
const string LF = "\u000A";
const string CR = "\u000D";
const string CRLF = "\u000D\u000A";
const string LS = "\u2028";
/// <summary>
/// Gets the value from the digit-based key.
/// </summary>
/// <returns>Returns the entry</returns>
public static string Parse(string content, int key) => Parse(content, key.ToString());
/// <summary>
/// Gets the value from the digit-based key.
/// </summary>
/// <returns>Returns the entry</returns>
public static string Parse(string content, int key) => Parse(content, key.ToString());
/// <summary>
/// Gets the value from the string-based key.
/// </summary>
/// <returns>Returns the entry</returns>
public static string Parse(string content, string key)
{
var entries = NormalizeEntries(content);
return GetEntry(entries, key);
}
/// <summary>
/// Gets the value from the string-based key.
/// </summary>
/// <returns>Returns the entry</returns>
public static string Parse(string content, string key)
{
var entries = NormalizeEntries(content);
return GetEntry(entries, key);
}
#if (NET8_0 && DEBUG)
[UnmanagedCallersOnly(EntryPoint = "parse")]
public static IntPtr Parse(IntPtr content, IntPtr key)
{
// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
}
[UnmanagedCallersOnly(EntryPoint = "parse")]
public static IntPtr Parse(IntPtr content, IntPtr key)
{
// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
}
#endif
/// <summary>
/// Normalizes the content by replacing various newline characters with Environment.NewLine and filters out comments.
/// </summary>
/// <param name="content">The content to normalize.</param>
/// <returns>An enumerable of normalized lines.</returns>
public static IEnumerable<string> NormalizeEntries(string content)
{
var newLines = new[] { LF, CR, CRLF, LS };
/// <summary>
/// Replaces the document's line endings with the native system line endings.
/// </summary>
/// <remarks>This stage ensures there are no crashes during parsing.</remarks>
/// <param name="content">The content of the document.</param>
/// <returns>The document's content with native system line endings.</returns>
static IEnumerable<string> NormalizeEntries(string content)
{
// Check if the document already uses native system line endings.
if (!content.Contains(Environment.NewLine))
{
// If not, check for and replace other line ending types.
if (content.Contains(LF))
content = content.Replace(LF,
Environment.NewLine);
content = newLines.Aggregate(content, (current, nl) => current.Replace(nl, Environment.NewLine));
if (content.Contains(CR))
content = content.Replace(CR,
Environment.NewLine);
return content.Split($"{CARET}{Environment.NewLine}", StringSplitOptions.RemoveEmptyEntries)
.Where(line => !line.StartsWith("//") && !line.StartsWith('#') && !line.StartsWith("/*") && !line.EndsWith("*/"));
}
if (content.Contains(CRLF))
content = content.Replace(CRLF,
Environment.NewLine);
/// <summary>
/// Retrieves the value for the specified key from the given entries.
/// </summary>
/// <param name="entries">The entries to search through.</param>
/// <param name="key">The key to search for.</param>
/// <returns>The value for the specified key, or a default string if not found.</returns>
static string GetEntry(IEnumerable<string> entries, string key)
{
// Iterate through the entries.
foreach (var entry in entries)
{
// If the line doesn't start with the key, keep searching.
if (!entry.StartsWith(key))
continue;
if (content.Contains(LS))
content = content.Replace(LS,
Environment.NewLine);
}
// Locate the index of the caret character.
var startIndex = entry.IndexOf(CARET);
// Get the line from the caret character to the end of the string.
var line = entry[startIndex..];
// Split the content by the caret and newline characters.
var lines = content.Split(new[] { $"{CARET}{Environment.NewLine}" },
StringSplitOptions.RemoveEmptyEntries);
// Return the line with the caret characters trimmed.
return line.TrimStart(CARET).TrimEnd(CARET);
}
// Filter out any lines that start with "//", "#", "/*", or end with "*/".
return lines.Where(line =>
!line.StartsWith("//") &&
!line.StartsWith("#") &&
!line.StartsWith("/*") &&
!line.EndsWith("*/"))
.AsEnumerable();
}
/// <summary>
/// Retrieves the value for the specified key from the given entries.
/// </summary>
/// <param name="entries">The entries to search through.</param>
/// <param name="key">The key to search for.</param>
/// <returns>The value for the specified key, or a default string if not found.</returns>
static string GetEntry(IEnumerable<string> entries, string key)
{
// Iterate through the entries.
foreach (var entry in entries)
{
// If the line doesn't start with the key, keep searching.
if (!entry.StartsWith(key))
continue;
// Locate the index of the caret character.
var startIndex = entry.IndexOf(CARET);
// Get the line from the caret character to the end of the string.
var line = entry[startIndex..];
// Return the line with the caret characters trimmed.
return line.TrimStart(CARET).TrimEnd(CARET);
}
// If no entry is found, return a default string.
return "***MISSING***";
}
// If no entry is found, return a default string.
return "***MISSING***";
}
}

View file

@ -1,25 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<Version>2.1.100</Version>
<Nullable>enable</Nullable>
<LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Authors>Tony Bark</Authors>
<PackageDescription>
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
as keys and the value as text enclosed between carets. ([key] ^[value]^)
<PropertyGroup>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<Version>2.1.101-alpha</Version>
<Nullable>enable</Nullable>
<LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Authors>Tony Bark</Authors>
<PackageDescription>
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
as keys and the value as text enclosed between carets. ([key] ^[value]^)
CSTNet provides you the framework for parsing the CST format.
</PackageDescription>
<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
</PropertyGroup>
CSTNet provides you the framework for parsing the CST format.
</PackageDescription>
<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
</PropertyGroup>
<!-- Support AOT on .NET 8+ -->
<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
<IsAotCompatible>true</IsAotCompatible>
</PropertyGroup>
<!-- Support AOT on .NET 8+ -->
<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
<IsAotCompatible>true</IsAotCompatible>
</PropertyGroup>
</Project>

View file

@ -4,10 +4,10 @@ namespace CSTNet;
public class UIText : IUIText
{
/// <summary>
/// The language of the text.
/// </summary>
string Language { get; set; } = "english";
/// <summary>
/// The language of the text.
/// </summary>
string Language { get; set; } = "english";
/// <summary>
/// The base directory for the language files.