cstdotnet/notebooks/cst.ipynb

236 lines
7.7 KiB
Text
Raw Permalink Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Caret-Separated Text\n",
"\n",
"Caret-Separated Text (or CST) is a key-value pair format represented by numbers as keys and the value is the string enclosed between carets (^) that contains the translation. Any text which is not enclosed with carets is considered a comment and ignored.\n",
"\n",
"## CST.NET\n",
"\n",
2022-04-29 06:39:56 -04:00
"CST.NET uses .NET's built-in indexing extension function to accomplish locating of each respective key. As a consequence, it does not matter what you use for keys. I added an additional normalization to the pipeline that converts the document's line endings to the system's, in order to prevent crashes."
]
},
{
"cell_type": "code",
"execution_count": 1,
2022-04-29 06:39:56 -04:00
"metadata": {
"vscode": {
"languageId": "csharp"
}
},
"outputs": [],
"source": [
"using System.IO;\n",
"using System.Collections.Generic;\n",
"using System.Text.RegularExpressions;"
2022-04-29 06:39:56 -04:00
]
},
{
"cell_type": "code",
"execution_count": 1,
2022-04-29 06:39:56 -04:00
"metadata": {
"vscode": {
"languageId": "csharp"
}
},
"outputs": [],
"source": [
"public static class CaretSeparatedText\n",
"{\n",
" const char CARET = '^';\n",
" static readonly string _lf = \"\\u000A\";\n",
" static readonly string _cr = \"\\u000D\";\n",
" static readonly string _crlf = \"\\u000D\\u000A\";\n",
" static readonly string _ls = \"\\u2028\";\n",
"\n",
" /// <summary>\n",
" /// Gets the value from the integer-based key.\n",
" /// </summary>\n",
" /// <returns>Returns the entry</returns>\n",
" public static string Parse(string content, int key)\n",
" {\n",
" var entries = NormalizeEntries(content);\n",
" return GetEntry(entries, key.ToString());\n",
" }\n",
"\n",
" /// <summary>\n",
" /// Gets the value from the string-based key.\n",
" /// </summary>\n",
" /// <returns>Returns the entry</returns>\n",
" public static string Parse(string content, string key)\n",
" {\n",
" var entries = NormalizeEntries(content);\n",
" return GetEntry(entries, key);\n",
" }\n",
"\n",
" /// <summary>\n",
" /// Replaces the document's line endings with the native system line endings.\n",
" /// </summary>\n",
" /// <remarks>This stage ensures there are no crashes during parsing.</remarks>\n",
" static IEnumerable<string> NormalizeEntries(string content)\n",
" {\n",
" if (!content.Contains(Environment.NewLine))\n",
" {\n",
" if (content.Contains(_lf))\n",
" content = content.Replace(_lf, Environment.NewLine);\n",
"\n",
" if (content.Contains(_cr))\n",
" content = content.Replace(_cr, Environment.NewLine);\n",
"\n",
" if (content.Contains(_crlf))\n",
" content = content.Replace(_crlf, Environment.NewLine);\n",
"\n",
" if (content.Contains(_ls))\n",
" content = content.Replace(_ls, Environment.NewLine);\n",
" }\n",
"\n",
" var lines = content.Split(new[] { $\"{CARET}{Environment.NewLine}\" },\n",
" StringSplitOptions.RemoveEmptyEntries);\n",
" var entries = new List<string>();\n",
"\n",
" foreach (var line in lines)\n",
" {\n",
" // Skip comments\n",
" if (line.StartsWith(\"//\") || line.StartsWith(\"#\") ||\n",
" line.StartsWith(\"/*\") || line.EndsWith(\"*/\"))\n",
" continue;\n",
"\n",
" entries.Add(line);\n",
" }\n",
"\n",
" return entries;\n",
" }\n",
"\n",
" static string GetEntry(IEnumerable<string> entries, string key)\n",
" {\n",
" // Search through list\n",
" foreach (var entry in entries)\n",
" {\n",
" // If the line doesn't start with the key, keep searching.\n",
" if (!entry.StartsWith(key))\n",
" continue;\n",
"\n",
" // Locate index, trim carets and return translation.\n",
" var startIndex = entry.IndexOf(CARET);\n",
" var line = entry.Substring(startIndex);\n",
"\n",
" return line.TrimStart(CARET).TrimEnd(CARET);\n",
" }\n",
"\n",
" return \"***MISSING***\";\n",
" }\n",
"}"
2022-04-29 06:39:56 -04:00
]
},
{
"cell_type": "code",
"execution_count": 1,
2022-04-29 06:39:56 -04:00
"metadata": {
"vscode": {
"languageId": "csharp"
}
},
"outputs": [],
"source": [
"class ContentStrings\n",
"{\n",
" string Language { get; set; } = \"english\";\n",
"\n",
" public string GetText(string table, int key) => GetText(table, key.ToString());\n",
"\n",
" public string GetText(string table, string key)\n",
" {\n",
" var baseDir = Path.Combine(Environment.CurrentDirectory, \"data\", \"uitext\", $\"{Language}.dir\");\n",
" var files = Directory.GetFiles(baseDir);\n",
"\n",
" foreach (var file in files)\n",
" {\n",
" var id = Path.GetFileName(file);\n",
" var second = id.IndexOf(\"_\", 1);\n",
"\n",
" if (second == -1)\n",
" continue;\n",
"\n",
" id = id.Substring(1, second - 1);\n",
"\n",
" if (id != table)\n",
" continue;\n",
"\n",
" var content = File.ReadAllText(file);\n",
" return CaretSeparatedText.Parse(content, key);\n",
" }\n",
"\n",
" return \"***MISSING***\";\n",
" }\n",
"\n",
" public static string CSTFile(string cst, string key)\n",
" {\n",
" var path = Path.Combine(AppContext.BaseDirectory, cst);\n",
" var file = File.ReadAllText(path);\n",
"\n",
" return CaretSeparatedText.Parse(file, key);\n",
" }\n",
"}"
2022-04-29 06:39:56 -04:00
]
},
{
"cell_type": "code",
"execution_count": 1,
2022-04-29 06:39:56 -04:00
"metadata": {
"vscode": {
"languageId": "csharp"
}
},
"outputs": [],
"source": [
"var english = new ContentStrings();\n",
"var v1Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v1.cst\");\n",
"var v1File = File.ReadAllText(v1Path);\n",
"var one = english.GetText(\"102\", \"Singleline\");\n",
"/*var three = CaretSeparatedText.Parse(v1File, 3);\n",
"var four = CaretSeparatedText.Parse(v1File, 4); */\n",
"Console.WriteLine($\"One:{Environment.NewLine}{one}\");\n",
"/*Console.WriteLine($\"Three:{Environment.NewLine}{three}\");\n",
"Console.WriteLine($\"Four:{Environment.NewLine}{four}\"); */"
2022-04-29 06:39:56 -04:00
]
},
{
"cell_type": "code",
"execution_count": 1,
2022-04-29 06:39:56 -04:00
"metadata": {
"vscode": {
"languageId": "csharp"
}
},
"outputs": [],
"source": [
"var v2Path = Path.Combine(Environment.CurrentDirectory, \"data\", \"v2.cst\");\n",
"var v2File = File.ReadAllText(v2Path);\n",
"var singleLineV2 = CaretSeparatedText.Parse(v2File, \"Singleline\");\n",
"var multiLineV2 = CaretSeparatedText.Parse(v2File, \"Multiline\");\n",
"Console.WriteLine($\"Single line v2:{Environment.NewLine}{singleLineV2}\");\n",
"Console.WriteLine($\"Multiline v2:{Environment.NewLine}{multiLineV2}\");"
2022-04-29 06:39:56 -04:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".NET (C#)",
"language": "C#",
"name": ".net-csharp"
},
"language_info": {
"file_extension": ".cs",
"mimetype": "text/x-csharp",
"name": "C#",
"pygments_lexer": "csharp",
"version": "8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
2022-04-29 06:39:56 -04:00
}