Rewrote normalizing algorithm

The mormalizing algorithm was rewritten to be more efficient and hopefully more reliable. See changelog.md for more info.
This commit is contained in:
Tony Bark 2020-12-11 01:05:46 -05:00
parent ccb4da5b86
commit cd95b7c6ae
16 changed files with 313 additions and 183 deletions

View file

@ -19,7 +19,8 @@
"metadata": {},
"source": [
"using System.IO;\n",
"using System.Text;"
"using System.Collections.Generic;\n",
"using System.Text.RegularExpressions;"
],
"outputs": []
},
@ -30,89 +31,111 @@
"source": [
"public static class CST\n",
"{\n",
" public static string Parse(string cst, int key, params string[] args)\n",
" const char CARET = '^';\n",
" static readonly string _lf = \"\\u000A\";\n",
" static readonly string _cr = \"\\u000D\";\n",
" static readonly string _crlf = \"\\u000D\\u000A\";\n",
" static readonly string _ls = \"\\u2028\";\n",
"\n",
" /// <summary>\n",
" /// Gets the value from the integer-based key.\n",
" /// </summary>\n",
" /// <returns>Returns the entry</returns>\n",
" public static string Parse(string content, int key)\n",
" {\n",
" var entries = NormalizeEntries(cst);\n",
" return GetEntry(entries, $\"{key}\", args);\n",
" var entries = NormalizeEntries(content);\n",
" return GetEntry(entries, key.ToString());\n",
" }\n",
"\n",
" public static string Parse(string cst, string key, params string[] args)\n",
" /// <summary>\n",
" /// Gets the value from the string-based key.\n",
" /// </summary>\n",
" /// <returns>Returns the entry</returns>\n",
" public static string Parse(string content, string key)\n",
" {\n",
" var entries = NormalizeEntries(cst);\n",
" return GetEntry(entries, key, args);\n",
" var entries = NormalizeEntries(content);\n",
" return GetEntry(entries, key);\n",
" }\n",
"\n",
" static IEnumerable<string> NormalizeEntries(string cst)\n",
" /// <summary>\n",
" /// Replaces the document's line endings with the native system line endings.\n",
" /// </summary>\n",
" /// <remarks>This stage ensures there are no crashes during parsing.</remarks>\n",
" static IEnumerable<string> NormalizeEntries(string content)\n",
" {\n",
" var lineBreaks = new string[] { \"^\\u000A\", \"^\\u000D\", \"^\\u000A\" };\n",
"\n",
" foreach (var line in lineBreaks)\n",
" /* \n",
" I tried putting the end carets with the different\n",
" line endings in with the split function but it didn't work \n",
" */\n",
" if (!content.Contains($\"{CARET}{Environment.NewLine}\"))\n",
" {\n",
" var eol = Environment.NewLine; // System's line break\n",
" if (content.Contains($\"{CARET}{_lf}\"))\n",
" content = content.Replace($\"{CARET}{_lf}\",\n",
" $\"{CARET}{Environment.NewLine}\");\n",
"\n",
" // If the new line matches the system's, do nothing\n",
" if (line.Contains(eol))\n",
" if (content.Contains($\"{CARET}{_cr}\"))\n",
" content = content.Replace($\"{CARET}{_cr}\",\n",
" $\"{CARET}{Environment.NewLine}\");\n",
"\n",
" if (content.Contains($\"{CARET}{_crlf}\"))\n",
" content = content.Replace($\"{CARET}{_crlf}\",\n",
" $\"{CARET}{Environment.NewLine}\");\n",
"\n",
" if (content.Contains($\"{CARET}{_ls}\"))\n",
" content = content.Replace($\"{CARET}{_ls}\",\n",
" $\"{CARET}{Environment.NewLine}\");\n",
" }\n",
"\n",
"\n",
" var entries = content.Split(new[] { $\"{CARET}{Environment.NewLine}\" },\n",
" StringSplitOptions.RemoveEmptyEntries);\n",
" var newContent = new List<string>();\n",
"\n",
" foreach (var entry in entries)\n",
" {\n",
" // Skip comments\n",
" if (entry.StartsWith(@\"//\") || entry.StartsWith(\"#\") ||\n",
" entry.StartsWith(\"/*\") || entry.EndsWith(\"*/\"))\n",
" continue;\n",
"\n",
" cst.Replace(line, eol);\n",
" newContent.Add(entry);\n",
" }\n",
"\n",
" return cst.Split(lineBreaks, StringSplitOptions.RemoveEmptyEntries);\n",
"\n",
" return newContent;\n",
" }\n",
"\n",
" static string ArgumentParser(string content, string[] args)\n",
" static string GetEntry(IEnumerable<string> entries, string key)\n",
" {\n",
" var sb = new StringBuilder();\n",
"\n",
" for (var i = 0; i < content.Length; i++)\n",
" {\n",
" var curArgs = content.Substring(i, 1);\n",
" var argsCounter = 0;\n",
"\n",
" if (curArgs.Contains(\"%\"))\n",
" {\n",
" if (argsCounter < args.Length)\n",
" {\n",
" sb.Append(curArgs.Replace(\"%\", args[argsCounter]));\n",
" i++;\n",
" }\n",
" }\n",
" else\n",
" sb.Append(curArgs);\n",
" }\n",
"\n",
" return sb.ToString();\n",
" }\n",
"\n",
" static string GetEntry(IEnumerable<string> entries, string key,\n",
" params string[] args)\n",
" {\n",
" var translation = \"[ENTRY NOT FOUND]\";\n",
"\n",
" // Search through array\n",
" // Search through list\n",
" foreach (var entry in entries)\n",
" {\n",
" // Locate index, trim carets and return translation\n",
" if (!entry.StartsWith(key))\n",
" continue;\n",
" \n",
" const char caret = '^';\n",
"\n",
" var startIndex = entry.IndexOf(caret.ToString(),\n",
" StringComparison.OrdinalIgnoreCase);\n",
"\n",
" var startIndex = entry.IndexOf(CARET);\n",
" var line = entry.Substring(startIndex);\n",
"\n",
" var content = line.TrimStart(caret).TrimEnd(caret);\n",
" if (!line.Contains(Environment.NewLine))\n",
" {\n",
" if (line.Contains(_lf))\n",
" line = line.Replace(_lf, Environment.NewLine);\n",
"\n",
" if (args.Length > 0)\n",
" translation = ArgumentParser(content, args);\n",
" else\n",
" translation = content;\n",
" if (line.Contains(_cr))\n",
" line = line.Replace(_cr, Environment.NewLine);\n",
"\n",
" if (line.Contains(_crlf))\n",
" line = line.Replace(_crlf, Environment.NewLine);\n",
"\n",
" if (line.Contains(_ls))\n",
" line = line.Replace(_ls, Environment.NewLine);\n",
" }\n",
"\n",
" return line.TrimStart(CARET).TrimEnd(CARET);\n",
" }\n",
"\n",
" return translation;\n",
" return \"[ENTRY NOT FOUND]\";\n",
" }\n",
"}"
],
@ -125,16 +148,18 @@
"source": [
"var v1Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v1.cst\");\n",
"var v1File = File.ReadAllText(v1Path);\n",
"var singleLine = CST.Parse(v1File, 1, \"FENNEC\");\n",
"var multiLine = CST.Parse(v1File, 2);\n",
"Console.WriteLine($\"Single line:{Environment.NewLine}{singleLine}\");\n",
"Console.WriteLine($\"Multiline:{Environment.NewLine}{multiLine}\");"
"var one = CST.Parse(v1File, 1);\n",
"var three = CST.Parse(v1File, 3);\n",
"var four = CST.Parse(v1File, 4);\n",
"Console.WriteLine($\"One:{Environment.NewLine}{one}\");\n",
"Console.WriteLine($\"Three:{Environment.NewLine}{three}\");\n",
"Console.WriteLine($\"Four:{Environment.NewLine}{four}\");"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "Single line:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, FENNEC eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n"
"text/plain": "One:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, %1 eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n"
},
"execution_count": 1,
"metadata": {}
@ -142,7 +167,15 @@
{
"output_type": "execute_result",
"data": {
"text/plain": "Multiline:\r\n[ENTRY NOT FOUND]\r\n"
"text/plain": "Three:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam venenatis ac odio ut pretium. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec semper turpis tempor, bibendum sapien at, blandit neque. Vivamus hendrerit imperdiet elit, vel sollicitudin nulla luctus vel. Vivamus nisl quam, feugiat a diam aliquam, iaculis vestibulum nunc. Maecenas euismod leo enim, faucibus ultrices ipsum semper eu. Praesent ullamcorper justo at maximus ultricies.\r\n"
},
"execution_count": 1,
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": "Four:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat.\r\n\r\nVivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus.\r\n"
},
"execution_count": 1,
"metadata": {}
@ -157,7 +190,7 @@
"var v2Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v2.cst\");\n",
"var v2File = File.ReadAllText(v2Path);\n",
"var singleLineV2 = CST.Parse(v2File, \"Singleline\");\n",
"var multiLineV2 = CST.Parse(v2File, \"Multiline\", \"DOG\", \"CAT\");;\n",
"var multiLineV2 = CST.Parse(v2File, \"Multiline\");\n",
"Console.WriteLine($\"Single line v2:{Environment.NewLine}{singleLineV2}\");\n",
"Console.WriteLine($\"Multiline v2:{Environment.NewLine}{multiLineV2}\");"
],

View file

@ -1,4 +1,6 @@
Singleline^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.^
Multiline ^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum. Ut mattis eu tortor in bibendum. Integer mattis tincidunt aliquet. Vestibulum ante ipsum primis in faucibus orci %2 luctus et ultrices posuere cubilia Curae; Fusce quis orci nisl.
/* this is a
test comment */
Multiline^Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc gravida nunc non justo pretium consectetur. Sed tempus libero ac ligula aliquam elementum. Duis vitae interdum leo. Sed semper nulla %1 a lectus dictum dictum. Ut mattis eu tortor in bibendum. Integer mattis tincidunt aliquet. Vestibulum ante ipsum primis in faucibus orci %2 luctus et ultrices posuere cubilia Curae; Fusce quis orci nisl.
Quisque vehicula, nisi ut scelerisque sodales, nisi ipsum sodales ipsum, in rutrum tellus lacus sed nibh. Etiam mauris velit, elementum sed placerat et, elementum et tellus. Duis vitae elit fermentum, viverra lorem in, lobortis elit. Maecenas eget nibh et lectus auctor dignissim.^