mirror of
https://github.com/tonytins/cstdotnet.git
synced 2025-03-15 14:11:26 +00:00
The mormalizing algorithm was rewritten to be more efficient and hopefully more reliable. See changelog.md for more info.
233 lines
No EOL
9 KiB
Text
233 lines
No EOL
9 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Caret-Separated Text\n",
|
|
"\n",
|
|
"Caret-Separated Text (or CST) is a key-value pair format represented by numbers as keys and the value is the string enclosed between carets (^) that contains the translation. Any text which is not enclosed with carets is considered a comment and ignored.\n",
|
|
"\n",
|
|
"## CST.NET\n",
|
|
"\n",
|
|
"CST.NET uses .NET's built-in indexing extension function to accomplish locating of each respective key. As a consequence, it does not matter what you use for keys. I added an additional normalizion to the pipeline that converts the document's line endings to the system's, in order to prevent crashes."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"using System.IO;\n",
|
|
"using System.Collections.Generic;\n",
|
|
"using System.Text.RegularExpressions;"
|
|
],
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"public static class CST\n",
|
|
"{\n",
|
|
" const char CARET = '^';\n",
|
|
" static readonly string _lf = \"\\u000A\";\n",
|
|
" static readonly string _cr = \"\\u000D\";\n",
|
|
" static readonly string _crlf = \"\\u000D\\u000A\";\n",
|
|
" static readonly string _ls = \"\\u2028\";\n",
|
|
"\n",
|
|
" /// <summary>\n",
|
|
" /// Gets the value from the integer-based key.\n",
|
|
" /// </summary>\n",
|
|
" /// <returns>Returns the entry</returns>\n",
|
|
" public static string Parse(string content, int key)\n",
|
|
" {\n",
|
|
" var entries = NormalizeEntries(content);\n",
|
|
" return GetEntry(entries, key.ToString());\n",
|
|
" }\n",
|
|
"\n",
|
|
" /// <summary>\n",
|
|
" /// Gets the value from the string-based key.\n",
|
|
" /// </summary>\n",
|
|
" /// <returns>Returns the entry</returns>\n",
|
|
" public static string Parse(string content, string key)\n",
|
|
" {\n",
|
|
" var entries = NormalizeEntries(content);\n",
|
|
" return GetEntry(entries, key);\n",
|
|
" }\n",
|
|
"\n",
|
|
" /// <summary>\n",
|
|
" /// Replaces the document's line endings with the native system line endings.\n",
|
|
" /// </summary>\n",
|
|
" /// <remarks>This stage ensures there are no crashes during parsing.</remarks>\n",
|
|
" static IEnumerable<string> NormalizeEntries(string content)\n",
|
|
" {\n",
|
|
"\n",
|
|
" /* \n",
|
|
" I tried putting the end carets with the different\n",
|
|
" line endings in with the split function but it didn't work \n",
|
|
" */\n",
|
|
" if (!content.Contains($\"{CARET}{Environment.NewLine}\"))\n",
|
|
" {\n",
|
|
" if (content.Contains($\"{CARET}{_lf}\"))\n",
|
|
" content = content.Replace($\"{CARET}{_lf}\",\n",
|
|
" $\"{CARET}{Environment.NewLine}\");\n",
|
|
"\n",
|
|
" if (content.Contains($\"{CARET}{_cr}\"))\n",
|
|
" content = content.Replace($\"{CARET}{_cr}\",\n",
|
|
" $\"{CARET}{Environment.NewLine}\");\n",
|
|
"\n",
|
|
" if (content.Contains($\"{CARET}{_crlf}\"))\n",
|
|
" content = content.Replace($\"{CARET}{_crlf}\",\n",
|
|
" $\"{CARET}{Environment.NewLine}\");\n",
|
|
"\n",
|
|
" if (content.Contains($\"{CARET}{_ls}\"))\n",
|
|
" content = content.Replace($\"{CARET}{_ls}\",\n",
|
|
" $\"{CARET}{Environment.NewLine}\");\n",
|
|
" }\n",
|
|
"\n",
|
|
"\n",
|
|
" var entries = content.Split(new[] { $\"{CARET}{Environment.NewLine}\" },\n",
|
|
" StringSplitOptions.RemoveEmptyEntries);\n",
|
|
" var newContent = new List<string>();\n",
|
|
"\n",
|
|
" foreach (var entry in entries)\n",
|
|
" {\n",
|
|
" // Skip comments\n",
|
|
" if (entry.StartsWith(@\"//\") || entry.StartsWith(\"#\") ||\n",
|
|
" entry.StartsWith(\"/*\") || entry.EndsWith(\"*/\"))\n",
|
|
" continue;\n",
|
|
"\n",
|
|
" newContent.Add(entry);\n",
|
|
" }\n",
|
|
"\n",
|
|
" return newContent;\n",
|
|
" }\n",
|
|
"\n",
|
|
" static string GetEntry(IEnumerable<string> entries, string key)\n",
|
|
" {\n",
|
|
" // Search through list\n",
|
|
" foreach (var entry in entries)\n",
|
|
" {\n",
|
|
" // Locate index, trim carets and return translation\n",
|
|
" if (!entry.StartsWith(key))\n",
|
|
" continue;\n",
|
|
"\n",
|
|
" var startIndex = entry.IndexOf(CARET);\n",
|
|
" var line = entry.Substring(startIndex);\n",
|
|
"\n",
|
|
" if (!line.Contains(Environment.NewLine))\n",
|
|
" {\n",
|
|
" if (line.Contains(_lf))\n",
|
|
" line = line.Replace(_lf, Environment.NewLine);\n",
|
|
"\n",
|
|
" if (line.Contains(_cr))\n",
|
|
" line = line.Replace(_cr, Environment.NewLine);\n",
|
|
"\n",
|
|
" if (line.Contains(_crlf))\n",
|
|
" line = line.Replace(_crlf, Environment.NewLine);\n",
|
|
"\n",
|
|
" if (line.Contains(_ls))\n",
|
|
" line = line.Replace(_ls, Environment.NewLine);\n",
|
|
" }\n",
|
|
"\n",
|
|
" return line.TrimStart(CARET).TrimEnd(CARET);\n",
|
|
" }\n",
|
|
"\n",
|
|
" return \"[ENTRY NOT FOUND]\";\n",
|
|
" }\n",
|
|
"}"
|
|
],
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"var v1Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v1.cst\");\n",
|
|
"var v1File = File.ReadAllText(v1Path);\n",
|
|
"var one = CST.Parse(v1File, 1);\n",
|
|
"var three = CST.Parse(v1File, 3);\n",
|
|
"var four = CST.Parse(v1File, 4);\n",
|
|
"Console.WriteLine($\"One:{Environment.NewLine}{one}\");\n",
|
|
"Console.WriteLine($\"Three:{Environment.NewLine}{three}\");\n",
|
|
"Console.WriteLine($\"Four:{Environment.NewLine}{four}\");"
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": "One:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Proin ac dictum orci, at tincidunt nulla. Donec aliquet, %1 eros non interdum posuere, ipsum sapien molestie nunc, nec facilisis libero ipsum et risus. In sed lorem vel ipsum placerat viverra.\r\n"
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": "Three:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam venenatis ac odio ut pretium. Interdum et malesuada fames ac ante ipsum primis in faucibus. Donec semper turpis tempor, bibendum sapien at, blandit neque. Vivamus hendrerit imperdiet elit, vel sollicitudin nulla luctus vel. Vivamus nisl quam, feugiat a diam aliquam, iaculis vestibulum nunc. Maecenas euismod leo enim, faucibus ultrices ipsum semper eu. Praesent ullamcorper justo at maximus ultricies.\r\n"
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": "Four:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce justo dui, rhoncus a pulvinar sit amet, fermentum vitae lorem. Maecenas nec nisi sit amet eros rutrum congue. In sagittis suscipit arcu, ac vestibulum nunc feugiat volutpat.\r\n\r\nVivamus consequat velit dui, sit amet rhoncus dui malesuada a. Maecenas hendrerit commodo mi et scelerisque. Cras pharetra ultrices aliquam. Praesent ac efficitur magna, vitae scelerisque metus.\r\n"
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"var v2Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v2.cst\");\n",
|
|
"var v2File = File.ReadAllText(v2Path);\n",
|
|
"var singleLineV2 = CST.Parse(v2File, \"Singleline\");\n",
|
|
"var multiLineV2 = CST.Parse(v2File, \"Multiline\");\n",
|
|
"Console.WriteLine($\"Single line v2:{Environment.NewLine}{singleLineV2}\");\n",
|
|
"Console.WriteLine($\"Multiline v2:{Environment.NewLine}{multiLineV2}\");"
|
|
],
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": "Single line v2:\r\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ultricies nulla eu tortor mattis, dictum posuere lacus ornare. Maecenas a massa in ligula finibus luctus eu vitae nibh. Proin imperdiet dapibus mauris quis placerat.\r\n"
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": "Multiline v2:\r\n[ENTRY NOT FOUND]\r\n"
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".NET (C#)",
|
|
"language": "C#",
|
|
"name": ".net-csharp"
|
|
},
|
|
"language_info": {
|
|
"file_extension": ".cs",
|
|
"mimetype": "text/x-csharp",
|
|
"name": "C#",
|
|
"pygments_lexer": "csharp",
|
|
"version": "8.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
} |