From cd95b7c6ae60e38ff89e084cb2cfc65e5eb59eb6 Mon Sep 17 00:00:00 2001 From: Tony Bark Date: Fri, 11 Dec 2020 01:05:46 -0500 Subject: [PATCH] Rewrote normalizing algorithm The mormalizing algorithm was rewritten to be more efficient and hopefully more reliable. See changelog.md for more info. --- .github/ISSUE_TEMPLATE.md | 2 +- .gitignore | 33 ++-- CSTNet.Tests/BasicTests.cs | 40 ----- CSTNet.Tests/CSTHelper.cs | 16 ++ CSTNet.Tests/CSTNet.Tests.csproj | 35 ---- CSTNet.Tests/MultilineTests.cs | 25 +++ CSTNet.Tests/QuickFennec.CST.Tests.csproj | 35 ++++ CSTNet.Tests/SingleLineTests.cs | 25 +++ CSTNet.Tests/v1.cst | 2 +- CSTNet.sln | 5 +- CSTNet/CaretSeparatedText.cs | 85 +++++++--- .../{CSTNet.csproj => QuickFennec.CST.csproj} | 4 +- README.md | 4 +- changelog.md | 22 +++ notebooks/cst.ipynb | 159 +++++++++++------- notebooks/data/v2.cst | 4 +- 16 files changed, 313 insertions(+), 183 deletions(-) delete mode 100644 CSTNet.Tests/BasicTests.cs create mode 100644 CSTNet.Tests/CSTHelper.cs delete mode 100644 CSTNet.Tests/CSTNet.Tests.csproj create mode 100644 CSTNet.Tests/MultilineTests.cs create mode 100644 CSTNet.Tests/QuickFennec.CST.Tests.csproj create mode 100644 CSTNet.Tests/SingleLineTests.cs rename CSTNet/{CSTNet.csproj => QuickFennec.CST.csproj} (82%) create mode 100644 changelog.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4bbf63f..65667de 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -14,4 +14,4 @@ - Version: - Platform: - - Subsystem: \ No newline at end of file + - Operating System: \ No newline at end of file diff --git a/.gitignore b/.gitignore index 924b732..0d33fac 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ -# Created by https://www.gitignore.io/api/rider,linux,macos,backup,windows,visualstudio,visualstudiocode -# Edit at https://www.gitignore.io/?templates=rider,linux,macos,backup,windows,visualstudio,visualstudiocode +# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig + +# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,visualstudio,rider,macos,linux,backup +# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,visualstudio,rider,macos,linux,backup ### Backup ### *.bak @@ -32,6 +34,7 @@ # Icon must end with two \r Icon + # Thumbnails ._* @@ -52,17 +55,19 @@ Temporary Items .apdisk ### Rider ### -# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff .idea/** -**/.idea/** # Gradle and Maven with auto-import # When using Gradle or Maven with auto-import, you should exclude module files, # since they will be recreated, and may cause churn. Uncomment if using # auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml # .idea/modules.xml # .idea/*.iml # .idea/modules @@ -104,14 +109,14 @@ fabric.properties ### VisualStudioCode ### .vscode/* -!.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json -!.vscode/extensions.json +*.code-workspace ### VisualStudioCode Patch ### # Ignore all local history of files .history +.ionide ### Windows ### # Windows thumbnail cache files @@ -171,6 +176,7 @@ bld/ [Bb]in/ [Oo]bj/ [Ll]og/ +[Ll]ogs/ # Visual Studio 2015/2017 cache/options directory .vs/ @@ -210,6 +216,7 @@ StyleCopReport.xml *_p.c *_h.h *.ilk +*.meta *.obj *.iobj *.pch @@ -266,9 +273,6 @@ _ReSharper*/ *.[Rr]e[Ss]harper *.DotSettings.user -# JustCode is a .NET coding add-in -.JustCode - # TeamCity is a build add-in _TeamCity* @@ -279,6 +283,9 @@ _TeamCity* .axoCover/* !.axoCover/settings.json +# Coverlet is a free, cross platform Code Coverage Tool +coverage*[.json, .xml, .info] + # Visual Studio code coverage results *.coverage *.coveragexml @@ -487,4 +494,10 @@ healthchecksdb # Backup folder for Package Reference Convert tool in Visual Studio 2017 MigrationBackup/ -# End of https://www.gitignore.io/api/rider,linux,macos,backup,windows,visualstudio,visualstudiocode +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,visualstudio,rider,macos,linux,backup + +# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) + diff --git a/CSTNet.Tests/BasicTests.cs b/CSTNet.Tests/BasicTests.cs deleted file mode 100644 index 5c31184..0000000 --- a/CSTNet.Tests/BasicTests.cs +++ /dev/null @@ -1,40 +0,0 @@ -// This project is licensed under the MIT license. -using System; -using System.IO; -using Xunit; - -namespace CSTNet.Tests -{ - public class BasicTests - { - string CSTFile(string cst, string key) - { - var path = Path.Combine(AppContext.BaseDirectory, cst); - var file = File.ReadAllText(path); - - return CaretSeparatedText.Parse(file, key); - } - - [Theory] - [InlineData(1, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.")] - [InlineData(4, @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat. - -Vivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus.")] - public void V1Test(int key, string expected) - { - var actual = CSTFile("v1.cst", key.ToString()); - Assert.Equal(expected, actual); - } - - [Theory] - [InlineData("Singleline", "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.")] - [InlineData("Multiline", @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum. - -Quisque vehicula, nisi ut scelerisque sodales, nisi ipsum sodales ipsum, in rutrum tellus lacus sed nibh. Etiam mauris velit, elementum sed placerat et, elementum et tellus. Duis vitae elit fermentum, viverra lorem in, lobortis elit.")] - public void V2Test(string key, string expected) - { - var actual = CSTFile("v2.cst", key); - Assert.Equal(expected, actual); - } - } -} diff --git a/CSTNet.Tests/CSTHelper.cs b/CSTNet.Tests/CSTHelper.cs new file mode 100644 index 0000000..e203e46 --- /dev/null +++ b/CSTNet.Tests/CSTHelper.cs @@ -0,0 +1,16 @@ +using System; +using System.IO; + +namespace CSTNet.Tests +{ + static class CSTHelper + { + public static string CSTFile(string cst, string key) + { + var path = Path.Combine(AppContext.BaseDirectory, cst); + var file = File.ReadAllText(path); + + return CaretSeparatedText.Parse(file, key); + } + } +} diff --git a/CSTNet.Tests/CSTNet.Tests.csproj b/CSTNet.Tests/CSTNet.Tests.csproj deleted file mode 100644 index b638f32..0000000 --- a/CSTNet.Tests/CSTNet.Tests.csproj +++ /dev/null @@ -1,35 +0,0 @@ - - - - netcoreapp3.1 - - false - - - - - - - runtime; build; native; contentfiles; analyzers; buildtransitive - all - - - runtime; build; native; contentfiles; analyzers; buildtransitive - all - - - - - - - - - - PreserveNewest - - - PreserveNewest - - - - diff --git a/CSTNet.Tests/MultilineTests.cs b/CSTNet.Tests/MultilineTests.cs new file mode 100644 index 0000000..4db5f89 --- /dev/null +++ b/CSTNet.Tests/MultilineTests.cs @@ -0,0 +1,25 @@ +using System; +using Xunit; + +namespace CSTNet.Tests +{ + public class MultilineTests + { + [Fact] + public void MiltilineV1() + { + var four = 4; + var expected = $"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat.{Environment.NewLine}{Environment.NewLine}Vivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus."; + var actual = CSTHelper.CSTFile("v1.cst", four.ToString()); + Assert.Equal(expected, actual); + } + + [Fact] + public void MiltilineV2() + { + var expected = $"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum.{Environment.NewLine}{Environment.NewLine}Quisque vehicula, nisi ut scelerisque sodales, nisi ipsum sodales ipsum, in rutrum tellus lacus sed nibh. Etiam mauris velit, elementum sed placerat et, elementum et tellus. Duis vitae elit fermentum, viverra lorem in, lobortis elit."; + var actual = CSTHelper.CSTFile("v2.cst", "Multiline"); + Assert.Equal(expected, actual); + } + } +} diff --git a/CSTNet.Tests/QuickFennec.CST.Tests.csproj b/CSTNet.Tests/QuickFennec.CST.Tests.csproj new file mode 100644 index 0000000..cf5e35c --- /dev/null +++ b/CSTNet.Tests/QuickFennec.CST.Tests.csproj @@ -0,0 +1,35 @@ + + + + netcoreapp3.1 + + false + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + + diff --git a/CSTNet.Tests/SingleLineTests.cs b/CSTNet.Tests/SingleLineTests.cs new file mode 100644 index 0000000..bc217f9 --- /dev/null +++ b/CSTNet.Tests/SingleLineTests.cs @@ -0,0 +1,25 @@ +// This project is licensed under the MIT license. +using Xunit; + +namespace CSTNet.Tests +{ + public class SingleLineTests + { + [Theory] + [InlineData(1, @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.")] + [InlineData(3, @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam venenatis ac odio ut pretium. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec semper turpis tempor, bibendum sapien at, blandit neque. Vivamus hendrerit imperdiet elit, vel sollicitudin nulla luctus vel. Vivamus nisl quam, feugiat a diam aliquam, iaculis vestibulum nunc. Maecenas euismod leo enim, faucibus ultrices ipsum semper eu. Praesent ullamcorper justo at maximus ultricies.")] + public void V1Test(int key, string expected) + { + var actual = CSTHelper.CSTFile("v1.cst", key.ToString()); + Assert.Equal(expected, actual); + } + + [Theory] + [InlineData("Singleline", "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.")] + public void V2Test(string key, string expected) + { + var actual = CSTHelper.CSTFile("v2.cst", key); + Assert.Equal(expected, actual); + } + } +} diff --git a/CSTNet.Tests/v1.cst b/CSTNet.Tests/v1.cst index 23de77a..4198047 100644 --- a/CSTNet.Tests/v1.cst +++ b/CSTNet.Tests/v1.cst @@ -3,6 +3,6 @@ Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nunc vel dictum eros, vitae mattis risus. Curabitur eget nisi interdum, euismod nisl in, fermentum turpis. Morbi a feugiat lacus. Duis ligula felis, commodo quis sodales ac, congue sit amet tortor. Sed vulputate, velit id interdum convallis, purus nisl interdum lorem, sit amet aliquam lacus sapien ac neque. Proin sit amet ultricies mi.^ 3 ^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam venenatis ac odio ut pretium. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec semper turpis tempor, bibendum sapien at, blandit neque. Vivamus hendrerit imperdiet elit, vel sollicitudin nulla luctus vel. Vivamus nisl quam, feugiat a diam aliquam, iaculis vestibulum nunc. Maecenas euismod leo enim, faucibus ultrices ipsum semper eu. Praesent ullamcorper justo at maximus ultricies.^ -4^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat. +4 ^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat. Vivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus.^ \ No newline at end of file diff --git a/CSTNet.sln b/CSTNet.sln index 64af97e..cdcdfc9 100644 --- a/CSTNet.sln +++ b/CSTNet.sln @@ -3,15 +3,16 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.30717.126 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSTNet", "CSTNet\CSTNet.csproj", "{82775826-A366-46F0-A5D2-5BE7658C75E4}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "QuickFennec.CST", "CSTNet\QuickFennec.CST.csproj", "{82775826-A366-46F0-A5D2-5BE7658C75E4}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{CCFCE2DB-C18F-4D88-B025-19ED62BD2A1D}" ProjectSection(SolutionItems) = preProject .editorconfig = .editorconfig + changelog.md = changelog.md README.md = README.md EndProjectSection EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSTNet.Tests", "CSTNet.Tests\CSTNet.Tests.csproj", "{B6A98C64-1419-4B9A-99CA-72BB11D29472}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "QuickFennec.CST.Tests", "CSTNet.Tests\QuickFennec.CST.Tests.csproj", "{B6A98C64-1419-4B9A-99CA-72BB11D29472}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/CSTNet/CaretSeparatedText.cs b/CSTNet/CaretSeparatedText.cs index c4eb6f5..7244695 100644 --- a/CSTNet/CaretSeparatedText.cs +++ b/CSTNet/CaretSeparatedText.cs @@ -6,6 +6,12 @@ namespace CSTNet { public static class CaretSeparatedText { + const char CARET = '^'; + static readonly string _lf = "\u000A"; + static readonly string _cr = "\u000D"; + static readonly string _crlf = "\u000D\u000A"; + static readonly string _ls = "\u2028"; + /// /// Gets the value from the integer-based key. /// @@ -13,7 +19,7 @@ namespace CSTNet public static string Parse(string content, int key) { var entries = NormalizeEntries(content); - return GetEntry(entries, $"{key}"); + return GetEntry(entries, key.ToString()); } /// @@ -32,52 +38,79 @@ namespace CSTNet /// This stage ensures there are no crashes during parsing. static IEnumerable NormalizeEntries(string content) { - var lineBreaks = new string[] + + /* + I tried putting the end carets with the different + line endings in with the split function but it didn't work + */ + if (!content.Contains($"{CARET}{Environment.NewLine}")) { - "^\u000A", // LF - "^\u000D", // CR - "^\u000D\u000A", // CR+LF - "^\u2028" // LS - }; + if (content.Contains($"{CARET}{_lf}")) + content = content.Replace($"{CARET}{_lf}", + $"{CARET}{Environment.NewLine}"); - foreach (var line in lineBreaks) - { - var eol = Environment.NewLine; // System's line break + if (content.Contains($"{CARET}{_cr}")) + content = content.Replace($"{CARET}{_cr}", + $"{CARET}{Environment.NewLine}"); - // If the new line matches the system's, do nothing - if (line.Contains(eol)) - continue; + if (content.Contains($"{CARET}{_crlf}")) + content = content.Replace($"{CARET}{_crlf}", + $"{CARET}{Environment.NewLine}"); - content.Replace(line, eol); + if (content.Contains($"{CARET}{_ls}")) + content = content.Replace($"{CARET}{_ls}", + $"{CARET}{Environment.NewLine}"); } - return content.Split(lineBreaks, StringSplitOptions.RemoveEmptyEntries); + var entries = content.Split(new[] { $"{CARET}{Environment.NewLine}" }, + StringSplitOptions.RemoveEmptyEntries); + var newContent = new List(); + + foreach (var entry in entries) + { + // Skip comments + if (entry.StartsWith(@"//") || entry.StartsWith("#") || + entry.StartsWith("/*") || entry.EndsWith("*/")) + continue; + + newContent.Add(entry); + } + + return newContent; } - // TODO: support argument parameters static string GetEntry(IEnumerable entries, string key) { - var translation = "[ENTRY NOT FOUND]"; - - // Search through array + // Search through list foreach (var entry in entries) { // Locate index, trim carets and return translation if (!entry.StartsWith(key)) continue; - const char caret = '^'; - - var startIndex = entry.IndexOf(caret.ToString(), - StringComparison.OrdinalIgnoreCase); - + var startIndex = entry.IndexOf(CARET); var line = entry.Substring(startIndex); - translation = line.Trim(caret); + if (!line.Contains(Environment.NewLine)) + { + if (line.Contains(_lf)) + line = line.Replace(_lf, Environment.NewLine); + + if (line.Contains(_cr)) + line = line.Replace(_cr, Environment.NewLine); + + if (line.Contains(_crlf)) + line = line.Replace(_crlf, Environment.NewLine); + + if (line.Contains(_ls)) + line = line.Replace(_ls, Environment.NewLine); + } + + return line.TrimStart(CARET).TrimEnd(CARET); } - return translation; + return "[ENTRY NOT FOUND]"; } } } diff --git a/CSTNet/CSTNet.csproj b/CSTNet/QuickFennec.CST.csproj similarity index 82% rename from CSTNet/CSTNet.csproj rename to CSTNet/QuickFennec.CST.csproj index 41eeb6e..d008079 100644 --- a/CSTNet/CSTNet.csproj +++ b/CSTNet/QuickFennec.CST.csproj @@ -2,13 +2,13 @@ netstandard2.0 - 1.0 + 1.0.1 Tony Bark Caret-Separated Text (or CST) is a key-value pair format represented by numbers or words as keys and the value is the string enclosed between carets (^) that contains the contents. CST.NET is a library for prasing the CST format. https://github.com/tonytins/cstnet MIT - https://github.com/tonytins/cstnet/blob/master/LICENSE CST.Net + CSTNet diff --git a/README.md b/README.md index c11accb..3997023 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# CST.NET +# CSTNet [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md) Caret-Separated Text (or CST) is a key-value pair format represented by numbers or words as keys and the value is the string enclosed between carets (^) that contains the contents. Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character. -CST.NET is a library for parsing the CST format. +CSTNet is a library for parsing the CST format. ## Usage diff --git a/changelog.md b/changelog.md new file mode 100644 index 0000000..d7cd0c3 --- /dev/null +++ b/changelog.md @@ -0,0 +1,22 @@ +# Change Log + +## 1.0.1 + +Despite only being a point release, this includes a major refinement to the normalizing algorithm. + +### Rewrote normalizing algorithm + +The normalizing algorithm has been rewritten to be more efficient and hopefully more reliable. The new algorithm de-constructs each line after converting it to the system's native line ending. Then it searches for the key and returns value. The rewrite also normalizes line endings to match the system's within the entry itself before returning the final output. This should make things more stable and predictable. + +### CSTNet compatibility + +For point releases (such as this), QuickFennec.CST will remain under the CSTNet namespace for compatibility reasons. CSTNet will be moved to QuickFennec.CST namespace starting with 1.1. + +### Known issues + +- Skipping comments is still a little buggy. +- Multiline parsing with the v2 format is still a little unpredictable. + +## 1.0.0 + +- Initial release. \ No newline at end of file diff --git a/notebooks/cst.ipynb b/notebooks/cst.ipynb index 5ce3b54..577aa7d 100644 --- a/notebooks/cst.ipynb +++ b/notebooks/cst.ipynb @@ -19,7 +19,8 @@ "metadata": {}, "source": [ "using System.IO;\n", - "using System.Text;" + "using System.Collections.Generic;\n", + "using System.Text.RegularExpressions;" ], "outputs": [] }, @@ -30,89 +31,111 @@ "source": [ "public static class CST\n", "{\n", - " public static string Parse(string cst, int key, params string[] args)\n", + " const char CARET = '^';\n", + " static readonly string _lf = \"\\u000A\";\n", + " static readonly string _cr = \"\\u000D\";\n", + " static readonly string _crlf = \"\\u000D\\u000A\";\n", + " static readonly string _ls = \"\\u2028\";\n", + "\n", + " /// \n", + " /// Gets the value from the integer-based key.\n", + " /// \n", + " /// Returns the entry\n", + " public static string Parse(string content, int key)\n", " {\n", - " var entries = NormalizeEntries(cst);\n", - " return GetEntry(entries, $\"{key}\", args);\n", + " var entries = NormalizeEntries(content);\n", + " return GetEntry(entries, key.ToString());\n", " }\n", "\n", - " public static string Parse(string cst, string key, params string[] args)\n", + " /// \n", + " /// Gets the value from the string-based key.\n", + " /// \n", + " /// Returns the entry\n", + " public static string Parse(string content, string key)\n", " {\n", - " var entries = NormalizeEntries(cst);\n", - " return GetEntry(entries, key, args);\n", + " var entries = NormalizeEntries(content);\n", + " return GetEntry(entries, key);\n", " }\n", "\n", - " static IEnumerable NormalizeEntries(string cst)\n", + " /// \n", + " /// Replaces the document's line endings with the native system line endings.\n", + " /// \n", + " /// This stage ensures there are no crashes during parsing.\n", + " static IEnumerable NormalizeEntries(string content)\n", " {\n", - " var lineBreaks = new string[] { \"^\\u000A\", \"^\\u000D\", \"^\\u000A\" };\n", "\n", - " foreach (var line in lineBreaks)\n", + " /* \n", + " I tried putting the end carets with the different\n", + " line endings in with the split function but it didn't work \n", + " */\n", + " if (!content.Contains($\"{CARET}{Environment.NewLine}\"))\n", " {\n", - " var eol = Environment.NewLine; // System's line break\n", + " if (content.Contains($\"{CARET}{_lf}\"))\n", + " content = content.Replace($\"{CARET}{_lf}\",\n", + " $\"{CARET}{Environment.NewLine}\");\n", "\n", - " // If the new line matches the system's, do nothing\n", - " if (line.Contains(eol))\n", + " if (content.Contains($\"{CARET}{_cr}\"))\n", + " content = content.Replace($\"{CARET}{_cr}\",\n", + " $\"{CARET}{Environment.NewLine}\");\n", + "\n", + " if (content.Contains($\"{CARET}{_crlf}\"))\n", + " content = content.Replace($\"{CARET}{_crlf}\",\n", + " $\"{CARET}{Environment.NewLine}\");\n", + "\n", + " if (content.Contains($\"{CARET}{_ls}\"))\n", + " content = content.Replace($\"{CARET}{_ls}\",\n", + " $\"{CARET}{Environment.NewLine}\");\n", + " }\n", + "\n", + "\n", + " var entries = content.Split(new[] { $\"{CARET}{Environment.NewLine}\" },\n", + " StringSplitOptions.RemoveEmptyEntries);\n", + " var newContent = new List();\n", + "\n", + " foreach (var entry in entries)\n", + " {\n", + " // Skip comments\n", + " if (entry.StartsWith(@\"//\") || entry.StartsWith(\"#\") ||\n", + " entry.StartsWith(\"/*\") || entry.EndsWith(\"*/\"))\n", " continue;\n", "\n", - " cst.Replace(line, eol);\n", + " newContent.Add(entry);\n", " }\n", "\n", - " return cst.Split(lineBreaks, StringSplitOptions.RemoveEmptyEntries);\n", - "\n", + " return newContent;\n", " }\n", "\n", - " static string ArgumentParser(string content, string[] args)\n", + " static string GetEntry(IEnumerable entries, string key)\n", " {\n", - " var sb = new StringBuilder();\n", - "\n", - " for (var i = 0; i < content.Length; i++)\n", - " {\n", - " var curArgs = content.Substring(i, 1);\n", - " var argsCounter = 0;\n", - "\n", - " if (curArgs.Contains(\"%\"))\n", - " {\n", - " if (argsCounter < args.Length)\n", - " {\n", - " sb.Append(curArgs.Replace(\"%\", args[argsCounter]));\n", - " i++;\n", - " }\n", - " }\n", - " else\n", - " sb.Append(curArgs);\n", - " }\n", - "\n", - " return sb.ToString();\n", - " }\n", - "\n", - " static string GetEntry(IEnumerable entries, string key,\n", - " params string[] args)\n", - " {\n", - " var translation = \"[ENTRY NOT FOUND]\";\n", - "\n", - " // Search through array\n", + " // Search through list\n", " foreach (var entry in entries)\n", " {\n", " // Locate index, trim carets and return translation\n", " if (!entry.StartsWith(key))\n", " continue;\n", - " \n", - " const char caret = '^';\n", - "\n", - " var startIndex = entry.IndexOf(caret.ToString(),\n", - " StringComparison.OrdinalIgnoreCase);\n", "\n", + " var startIndex = entry.IndexOf(CARET);\n", " var line = entry.Substring(startIndex);\n", "\n", - " var content = line.TrimStart(caret).TrimEnd(caret);\n", + " if (!line.Contains(Environment.NewLine))\n", + " {\n", + " if (line.Contains(_lf))\n", + " line = line.Replace(_lf, Environment.NewLine);\n", "\n", - " if (args.Length > 0)\n", - " translation = ArgumentParser(content, args);\n", - " else\n", - " translation = content;\n", + " if (line.Contains(_cr))\n", + " line = line.Replace(_cr, Environment.NewLine);\n", + "\n", + " if (line.Contains(_crlf))\n", + " line = line.Replace(_crlf, Environment.NewLine);\n", + "\n", + " if (line.Contains(_ls))\n", + " line = line.Replace(_ls, Environment.NewLine);\n", + " }\n", + "\n", + " return line.TrimStart(CARET).TrimEnd(CARET);\n", " }\n", "\n", - " return translation;\n", + " return \"[ENTRY NOT FOUND]\";\n", " }\n", "}" ], @@ -125,16 +148,18 @@ "source": [ "var v1Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v1.cst\");\n", "var v1File = File.ReadAllText(v1Path);\n", - "var singleLine = CST.Parse(v1File, 1, \"FENNEC\");\n", - "var multiLine = CST.Parse(v1File, 2);\n", - "Console.WriteLine($\"Single line:{Environment.NewLine}{singleLine}\");\n", - "Console.WriteLine($\"Multiline:{Environment.NewLine}{multiLine}\");" + "var one = CST.Parse(v1File, 1);\n", + "var three = CST.Parse(v1File, 3);\n", + "var four = CST.Parse(v1File, 4);\n", + "Console.WriteLine($\"One:{Environment.NewLine}{one}\");\n", + "Console.WriteLine($\"Three:{Environment.NewLine}{three}\");\n", + "Console.WriteLine($\"Four:{Environment.NewLine}{four}\");" ], "outputs": [ { "output_type": "execute_result", "data": { - "text/plain": "Single line:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, FENNEC eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n" + "text/plain": "One:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, %1 eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n" }, "execution_count": 1, "metadata": {} @@ -142,7 +167,15 @@ { "output_type": "execute_result", "data": { - "text/plain": "Multiline:\r\n[ENTRY NOT FOUND]\r\n" + "text/plain": "Three:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam venenatis ac odio ut pretium. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec semper turpis tempor, bibendum sapien at, blandit neque. Vivamus hendrerit imperdiet elit, vel sollicitudin nulla luctus vel. Vivamus nisl quam, feugiat a diam aliquam, iaculis vestibulum nunc. Maecenas euismod leo enim, faucibus ultrices ipsum semper eu. Praesent ullamcorper justo at maximus ultricies.\r\n" + }, + "execution_count": 1, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/plain": "Four:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat.\r\n\r\nVivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus.\r\n" }, "execution_count": 1, "metadata": {} @@ -157,7 +190,7 @@ "var v2Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v2.cst\");\n", "var v2File = File.ReadAllText(v2Path);\n", "var singleLineV2 = CST.Parse(v2File, \"Singleline\");\n", - "var multiLineV2 = CST.Parse(v2File, \"Multiline\", \"DOG\", \"CAT\");;\n", + "var multiLineV2 = CST.Parse(v2File, \"Multiline\");\n", "Console.WriteLine($\"Single line v2:{Environment.NewLine}{singleLineV2}\");\n", "Console.WriteLine($\"Multiline v2:{Environment.NewLine}{multiLineV2}\");" ], diff --git a/notebooks/data/v2.cst b/notebooks/data/v2.cst index 842d506..2949b37 100644 --- a/notebooks/data/v2.cst +++ b/notebooks/data/v2.cst @@ -1,4 +1,6 @@ Singleline^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.^ -Multiline ^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum. Ut mattis eu tortor in bibendum. Integer mattis tincidunt aliquet. Vestibulum ante ipsum primis in faucibus orci %2 luctus et ultrices posuere cubilia Curae; Fusce quis orci nisl. +/* this is a +test comment */ +Multiline^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum. Ut mattis eu tortor in bibendum. Integer mattis tincidunt aliquet. Vestibulum ante ipsum primis in faucibus orci %2 luctus et ultrices posuere cubilia Curae; Fusce quis orci nisl. Quisque vehicula, nisi ut scelerisque sodales, nisi ipsum sodales ipsum, in rutrum tellus lacus sed nibh. Etiam mauris velit, elementum sed placerat et, elementum et tellus. Duis vitae elit fermentum, viverra lorem in, lobortis elit. Maecenas eget nibh et lectus auctor dignissim.^ \ No newline at end of file