Fixed the multiline parsing in the v2 format.

This commit is contained in:
Tony Bark 2020-12-12 03:32:09 -05:00
parent 2b69f6402d
commit efa9c5535f
4 changed files with 29 additions and 52 deletions

View file

@ -38,46 +38,36 @@ namespace CSTNet
/// <remarks>This stage ensures there are no crashes during parsing.</remarks>
static IEnumerable<string> NormalizeEntries(string content)
{
/*
I tried putting the end carets with the different
line endings in with the split function but it didn't work
*/
if (!content.Contains($"{CARET}{Environment.NewLine}"))
if (!content.Contains(Environment.NewLine))
{
if (content.Contains($"{CARET}{_lf}"))
content = content.Replace($"{CARET}{_lf}",
$"{CARET}{Environment.NewLine}");
if (content.Contains(_lf))
content = content.Replace(_lf, Environment.NewLine);
if (content.Contains($"{CARET}{_cr}"))
content = content.Replace($"{CARET}{_cr}",
$"{CARET}{Environment.NewLine}");
if (content.Contains(_cr))
content = content.Replace(_cr, Environment.NewLine);
if (content.Contains($"{CARET}{_crlf}"))
content = content.Replace($"{CARET}{_crlf}",
$"{CARET}{Environment.NewLine}");
if (content.Contains(_crlf))
content = content.Replace(_crlf, Environment.NewLine);
if (content.Contains($"{CARET}{_ls}"))
content = content.Replace($"{CARET}{_ls}",
$"{CARET}{Environment.NewLine}");
if (content.Contains(_ls))
content = content.Replace(_ls, Environment.NewLine);
}
var entries = content.Split(new[] { $"{CARET}{Environment.NewLine}" },
var lines = content.Split(new[] { $"{CARET}{Environment.NewLine}" },
StringSplitOptions.RemoveEmptyEntries);
var newContent = new List<string>();
var entries = new List<string>();
foreach (var entry in entries)
foreach (var line in lines)
{
// Skip comments
if (entry.StartsWith(@"//") || entry.StartsWith("#") ||
entry.StartsWith("/*") || entry.EndsWith("*/"))
if (line.StartsWith("//") || line.StartsWith("#") ||
line.StartsWith("/*") || line.EndsWith("*/"))
continue;
newContent.Add(entry);
entries.Add(line);
}
return newContent;
return entries;
}
static string GetEntry(IEnumerable<string> entries, string key)
@ -85,32 +75,18 @@ namespace CSTNet
// Search through list
foreach (var entry in entries)
{
// Locate index, trim carets and return translation
// If the line doesn't start with the key, keep searching.
if (!entry.StartsWith(key))
continue;
// Locate index, trim carets and return translation.
var startIndex = entry.IndexOf(CARET);
var line = entry.Substring(startIndex);
if (!line.Contains(Environment.NewLine))
{
if (line.Contains(_lf))
line = line.Replace(_lf, Environment.NewLine);
if (line.Contains(_cr))
line = line.Replace(_cr, Environment.NewLine);
if (line.Contains(_crlf))
line = line.Replace(_crlf, Environment.NewLine);
if (line.Contains(_ls))
line = line.Replace(_ls, Environment.NewLine);
}
return line.TrimStart(CARET).TrimEnd(CARET);
}
return "[ENTRY NOT FOUND]";
return "***MISSING***";
}
}
}

View file

@ -2,7 +2,7 @@
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<Version>1.0.1</Version>
<Version>1.0.2</Version>
<Authors>Tony Bark</Authors>
<PackageDescription>Caret-Separated Text (or CST) is a key-value pair format represented by numbers or words as keys and the value is the string enclosed between carets (^) that contains the contents. CST.NET is a library for prasing the CST format.</PackageDescription>
<RepositoryUrl>https://github.com/tonytins/cstnet</RepositoryUrl>

View file

@ -2,9 +2,7 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md)
Caret-Separated Text (or CST) is a key-value pair format represented by numbers or words as keys and the value is the string enclosed between carets that contains the contents. (e.g. ``<key> ^<value>^``) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character.
Sixam.CST is a library for parsing the CST format. Though, production version were capable of
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words as keys and the value as text enclosed between carets. (e.g. ``<key> ^<text>^``) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character. Sixam.CST is a library for parsing the CST format.
## Usage
@ -13,7 +11,7 @@ Sixam.CST is a library for parsing the CST format. Though, production version we
```
```csharp
#r "nuget:CSTNet,1.0.1"
#r "nuget:CSTNet,1.0.2"
using System;
using System.IO;
using CSTNet;
@ -26,10 +24,10 @@ Console.WriteLine(example);
See working example on [.NET Fiddle](https://dotnetfiddle.net/ecKb2h).
In production, CST files were used in The Sims Online to provide translations. Each translation was split into their respective directories:
In production, CST files were used in The Sims Online (TSO) to provide translations. Each translation was split into their respective directories:
- ``uitext/english.dir/misc/_154_miscstrings.cst``
- ``uitext/swedish.dir/misc/_154_miscstrings.cst``
- ``uitext/english.dir/_154_miscstrings.cst``
- ``uitext/swedish.dir/_154_miscstrings.cst``
Sixam.CST only provides the basic parsing functionality.
@ -40,7 +38,6 @@ Sixam.CST only provides the basic parsing functionality.
## Known issues
- Skipping comments is a little buggy.
- Multiline parsing with the v2 format is still unpredictable.
## Requirements
### Prerequisites

View file

@ -1,5 +1,9 @@
# Change Log
## 1.0.2
- Fixed the multiline parsing in the v2 format.
## 1.0.1
Despite only being a point release, this includes a major refinement to the normalizing algorithm.