cstdotnet/notebooks/cst.ipynb
Tony Bark 8f21bf3d26 Initial WIP of argument parsing
- Fixed the way CST files were properly laid out in TSO in the README.
2020-12-09 02:14:14 -05:00

200 lines
No EOL
6.6 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Caret-Separated Text\n",
"\n",
"Caret-Separated Text (or CST) is a key-value pair format represented by numbers as keys and the value is the string enclosed between carets (^) that contains the translation. Any text which is not enclosed with carets is considered a comment and ignored.\n",
"\n",
"## CST.NET\n",
"\n",
"CST.NET uses .NET's built-in indexing extension function to accomplish locating of each respective key. As a consequence, it does not matter what you use for keys. I added an additional normalizion to the pipeline that converts the document's line endings to the system's, in order to prevent crashes."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"source": [
"using System.IO;\n",
"using System.Text;"
],
"outputs": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"source": [
"public static class CST\n",
"{\n",
" public static string Parse(string cst, int key, params string[] args)\n",
" {\n",
" var entries = NormalizeEntries(cst);\n",
" return GetEntry(entries, $\"{key}\", args);\n",
" }\n",
"\n",
" public static string Parse(string cst, string key, params string[] args)\n",
" {\n",
" var entries = NormalizeEntries(cst);\n",
" return GetEntry(entries, key, args);\n",
" }\n",
"\n",
" static IEnumerable<string> NormalizeEntries(string cst)\n",
" {\n",
" var lineBreaks = new string[] { \"^\\u000A\", \"^\\u000D\", \"^\\u000A\" };\n",
"\n",
" foreach (var line in lineBreaks)\n",
" {\n",
" var eol = Environment.NewLine; // System's line break\n",
"\n",
" // If the new line matches the system's, do nothing\n",
" if (line.Contains(eol))\n",
" continue;\n",
"\n",
" cst.Replace(line, eol);\n",
" }\n",
"\n",
" return cst.Split(lineBreaks, StringSplitOptions.RemoveEmptyEntries);\n",
"\n",
" }\n",
"\n",
" static string ArgumentParser(string content, string[] args)\n",
" {\n",
" var sb = new StringBuilder();\n",
"\n",
" for (var i = 0; i < content.Length; i++)\n",
" {\n",
" var curArgs = content.Substring(i, 1);\n",
" var argsCounter = 0;\n",
"\n",
" if (curArgs.Contains(\"%\"))\n",
" {\n",
" if (argsCounter < args.Length)\n",
" {\n",
" sb.Append(curArgs.Replace(\"%\", args[argsCounter]));\n",
" i++;\n",
" }\n",
" }\n",
" else\n",
" sb.Append(curArgs);\n",
" }\n",
"\n",
" return sb.ToString();\n",
" }\n",
"\n",
" static string GetEntry(IEnumerable<string> entries, string key,\n",
" params string[] args)\n",
" {\n",
" var translation = \"[ENTRY NOT FOUND]\";\n",
"\n",
" // Search through array\n",
" foreach (var entry in entries)\n",
" {\n",
" // Locate index, trim carets and return translation\n",
" if (!entry.StartsWith(key))\n",
" continue;\n",
" \n",
" const char caret = '^';\n",
"\n",
" var startIndex = entry.IndexOf(caret.ToString(),\n",
" StringComparison.OrdinalIgnoreCase);\n",
"\n",
" var line = entry.Substring(startIndex);\n",
"\n",
" var content = line.TrimStart(caret).TrimEnd(caret);\n",
"\n",
" if (args.Length > 0)\n",
" translation = ArgumentParser(content, args);\n",
" else\n",
" translation = content;\n",
" }\n",
"\n",
" return translation;\n",
" }\n",
"}"
],
"outputs": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"source": [
"var v1Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v1.cst\");\n",
"var v1File = File.ReadAllText(v1Path);\n",
"var singleLine = CST.Parse(v1File, 1, \"FENNEC\");\n",
"var multiLine = CST.Parse(v1File, 2);\n",
"Console.WriteLine($\"Single line:{Environment.NewLine}{singleLine}\");\n",
"Console.WriteLine($\"Multiline:{Environment.NewLine}{multiLine}\");"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "Single line:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, FENNEC eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n"
},
"execution_count": 1,
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": "Multiline:\r\n[ENTRY NOT FOUND]\r\n"
},
"execution_count": 1,
"metadata": {}
}
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"source": [
"var v2Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v2.cst\");\n",
"var v2File = File.ReadAllText(v2Path);\n",
"var singleLineV2 = CST.Parse(v2File, \"Singleline\");\n",
"var multiLineV2 = CST.Parse(v2File, \"Multiline\", \"DOG\", \"CAT\");;\n",
"Console.WriteLine($\"Single line v2:{Environment.NewLine}{singleLineV2}\");\n",
"Console.WriteLine($\"Multiline v2:{Environment.NewLine}{multiLineV2}\");"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "Single line v2:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.\r\n"
},
"execution_count": 1,
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": "Multiline v2:\r\n[ENTRY NOT FOUND]\r\n"
},
"execution_count": 1,
"metadata": {}
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".NET (C#)",
"language": "C#",
"name": ".net-csharp"
},
"language_info": {
"file_extension": ".cs",
"mimetype": "text/x-csharp",
"name": "C#",
"pygments_lexer": "csharp",
"version": "8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}