Improved normalization algorithm

2025-07-05 06:00:26 -04:00 · 2024-04-09 09:26:59 -04:00 · 2024-04-09 09:26:59 -04:00 · d1c589e2c0
commit d1c589e2c0
parent 84624111ea
4 changed files with 105 additions and 126 deletions
--- a/CSTNet/CST.cs
+++ b/CSTNet/CST.cs
@ -8,107 +8,79 @@ namespace CSTNet;

 public static class CST
 {
-    const char CARET = '^';
-    const string LF = "\u000A";
-    const string CR = "\u000D";
-    const string CRLF = "\u000D\u000A";
-    const string LS = "\u2028";
+	const char CARET = '^';
+	const string LF = "\u000A";
+	const string CR = "\u000D";
+	const string CRLF = "\u000D\u000A";
+	const string LS = "\u2028";

-    /// <summary>
-    /// Gets the value from the digit-based key.
-    /// </summary>
-    /// <returns>Returns the entry</returns>
-    public static string Parse(string content, int key) => Parse(content, key.ToString());
+	/// <summary>
+	/// Gets the value from the digit-based key.
+	/// </summary>
+	/// <returns>Returns the entry</returns>
+	public static string Parse(string content, int key) => Parse(content, key.ToString());

-    /// <summary>
-    /// Gets the value from the string-based key.
-    /// </summary>
-    /// <returns>Returns the entry</returns>
-    public static string Parse(string content, string key)
-    {
-        var entries = NormalizeEntries(content);
-        return GetEntry(entries, key);
-    }
+	/// <summary>
+	/// Gets the value from the string-based key.
+	/// </summary>
+	/// <returns>Returns the entry</returns>
+	public static string Parse(string content, string key)
+	{
+		var entries = NormalizeEntries(content);
+		return GetEntry(entries, key);
+	}

 #if (NET8_0 && DEBUG)
-    [UnmanagedCallersOnly(EntryPoint = "parse")]
-    public static IntPtr Parse(IntPtr content, IntPtr key)
-    {
-        // => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
-        var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
-        return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
-    }
+	[UnmanagedCallersOnly(EntryPoint = "parse")]
+	public static IntPtr Parse(IntPtr content, IntPtr key)
+	{
+		// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
+		var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
+		return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
+	}
 #endif
+	/// <summary>
+	/// Normalizes the content by replacing various newline characters with Environment.NewLine and filters out comments.
+	/// </summary>
+	/// <param name="content">The content to normalize.</param>
+	/// <returns>An enumerable of normalized lines.</returns>
+	public static IEnumerable<string> NormalizeEntries(string content)
+	{
+		var newLines = new[] { LF, CR, CRLF, LS };

-    /// <summary>
-    /// Replaces the document's line endings with the native system line endings.
-    /// </summary>
-    /// <remarks>This stage ensures there are no crashes during parsing.</remarks>
-    /// <param name="content">The content of the document.</param>
-    /// <returns>The document's content with native system line endings.</returns>
-    static IEnumerable<string> NormalizeEntries(string content)
-    {
-        // Check if the document already uses native system line endings.
-        if (!content.Contains(Environment.NewLine))
-        {
-            // If not, check for and replace other line ending types.
-            if (content.Contains(LF))
-                content = content.Replace(LF,
-	                Environment.NewLine);
+		content = newLines.Aggregate(content, (current, nl) => current.Replace(nl, Environment.NewLine));

-            if (content.Contains(CR))
-                content = content.Replace(CR,
-	                Environment.NewLine);
+		return content.Split($"{CARET}{Environment.NewLine}", StringSplitOptions.RemoveEmptyEntries)
+		.Where(line => !line.StartsWith("//") && !line.StartsWith('#') && !line.StartsWith("/*") && !line.EndsWith("*/"));
+	}

-            if (content.Contains(CRLF))
-                content = content.Replace(CRLF,
-	                Environment.NewLine);
+	/// <summary>
+	/// Retrieves the value for the specified key from the given entries.
+	/// </summary>
+	/// <param name="entries">The entries to search through.</param>
+	/// <param name="key">The key to search for.</param>
+	/// <returns>The value for the specified key, or a default string if not found.</returns>
+	static string GetEntry(IEnumerable<string> entries, string key)
+	{
+		// Iterate through the entries.
+		foreach (var entry in entries)
+		{
+			// If the line doesn't start with the key, keep searching.
+			if (!entry.StartsWith(key))
+				continue;

-            if (content.Contains(LS))
-                content = content.Replace(LS,
-	                Environment.NewLine);
-        }
+			// Locate the index of the caret character.
+			var startIndex = entry.IndexOf(CARET);
+			// Get the line from the caret character to the end of the string.
+			var line = entry[startIndex..];

-        // Split the content by the caret and newline characters.
-        var lines = content.Split(new[] { $"{CARET}{Environment.NewLine}" },
-            StringSplitOptions.RemoveEmptyEntries);
+			// Return the line with the caret characters trimmed.
+			return line.TrimStart(CARET).TrimEnd(CARET);
+		}

-        // Filter out any lines that start with "//", "#", "/*", or end with "*/".
-        return lines.Where(line =>
-            !line.StartsWith("//") &&
-            !line.StartsWith("#") &&
-            !line.StartsWith("/*") &&
-            !line.EndsWith("*/"))
-            .AsEnumerable();
-    }
-
-    /// <summary>
-    /// Retrieves the value for the specified key from the given entries.
-    /// </summary>
-    /// <param name="entries">The entries to search through.</param>
-    /// <param name="key">The key to search for.</param>
-    /// <returns>The value for the specified key, or a default string if not found.</returns>
-    static string GetEntry(IEnumerable<string> entries, string key)
-    {
-        // Iterate through the entries.
-        foreach (var entry in entries)
-        {
-            // If the line doesn't start with the key, keep searching.
-            if (!entry.StartsWith(key))
-                continue;
-
-            // Locate the index of the caret character.
-            var startIndex = entry.IndexOf(CARET);
-            // Get the line from the caret character to the end of the string.
-            var line = entry[startIndex..];
-
-            // Return the line with the caret characters trimmed.
-            return line.TrimStart(CARET).TrimEnd(CARET);
-        }
-
-        // If no entry is found, return a default string.
-        return "***MISSING***";
-    }
+		// If no entry is found, return a default string.
+		return "***MISSING***";
+	}

 }

--- a/CSTNet/CSTNet.csproj
+++ b/CSTNet/CSTNet.csproj
@ -1,25 +1,25 @@
 <Project Sdk="Microsoft.NET.Sdk">

-    <PropertyGroup>
-        <TargetFrameworks>net6.0;net8.0</TargetFrameworks>
-        <Version>2.1.100</Version>
-        <Nullable>enable</Nullable>
-        <LangVersion>latest</LangVersion>
-        <ImplicitUsings>enable</ImplicitUsings>
-        <Authors>Tony Bark</Authors>
-        <PackageDescription>
-            Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
-            as keys and the value as text enclosed between carets. ([key] ^[value]^)
+	<PropertyGroup>
+		<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
+		<Version>2.1.101-alpha</Version>
+		<Nullable>enable</Nullable>
+		<LangVersion>latest</LangVersion>
+		<ImplicitUsings>enable</ImplicitUsings>
+		<Authors>Tony Bark</Authors>
+		<PackageDescription>
+			Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
+			as keys and the value as text enclosed between carets. ([key] ^[value]^)

-            CSTNet provides you the framework for parsing the CST format.
-        </PackageDescription>
-        <RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
-        <PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
-    </PropertyGroup>
+			CSTNet provides you the framework for parsing the CST format.
+		</PackageDescription>
+		<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
+		<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
+	</PropertyGroup>

-    <!-- Support AOT on .NET 8+ -->
-    <PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
-        <IsAotCompatible>true</IsAotCompatible>
-    </PropertyGroup>
+	<!-- Support AOT on .NET 8+ -->
+	<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
+		<IsAotCompatible>true</IsAotCompatible>
+	</PropertyGroup>

 </Project>
--- a/CSTNet/UIText.cs
+++ b/CSTNet/UIText.cs
@ -4,10 +4,10 @@ namespace CSTNet;

 public class UIText : IUIText
 {
-    /// <summary>
-    /// The language of the text.
-    /// </summary>
-    string Language { get; set; } = "english";
+	/// <summary>
+	/// The language of the text.
+	/// </summary>
+	string Language { get; set; } = "english";

    /// <summary>
    /// The base directory for the language files.