2024-01-07 02:41:10 +08:00
|
|
|
|
using CommunityToolkit.HighPerformance;
|
|
|
|
|
using CommunityToolkit.HighPerformance.Buffers;
|
|
|
|
|
using System;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
using System.Collections.Concurrent;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
using System.Collections.Generic;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
using System.Diagnostics;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
using System.Diagnostics.CodeAnalysis;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Linq;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
using System.Runtime.CompilerServices;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
using System.Runtime.InteropServices;
|
|
|
|
|
using System.Runtime.Intrinsics;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
using System.Text;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
using System.Threading;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
using System.Threading.Tasks;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
using static System.Net.Mime.MediaTypeNames;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
|
|
|
|
|
namespace CodeWalker.GameFiles
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
public class JenkHash
|
|
|
|
|
{
|
|
|
|
|
public JenkHashInputEncoding Encoding { get; set; }
|
|
|
|
|
public string Text { get; set; }
|
|
|
|
|
public int HashInt { get; set; }
|
|
|
|
|
public uint HashUint { get; set; }
|
|
|
|
|
public string HashHex { get; set; }
|
|
|
|
|
|
|
|
|
|
public JenkHash(string text, JenkHashInputEncoding encoding)
|
|
|
|
|
{
|
|
|
|
|
Encoding = encoding;
|
|
|
|
|
Text = text;
|
|
|
|
|
HashUint = GenHash(text, encoding);
|
|
|
|
|
HashInt = (int)HashUint;
|
|
|
|
|
HashHex = "0x" + HashUint.ToString("X");
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
private const int minInclusive = 'A';
|
|
|
|
|
private const int maxInclusive = 'Z' - minInclusive;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static byte ToLower(char c)
|
2023-11-12 01:59:17 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
return ToLower((byte)c);
|
|
|
|
|
//return (c >= 'A' && c <= 'Z') ? (byte)(c - 'A' + 'a') : (byte)c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static byte ToLower(byte c)
|
|
|
|
|
{
|
|
|
|
|
return ('A' <= c && c <= 'Z') ? (byte)(c | 0x20) : c;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
}
|
2017-09-21 18:33:05 +08:00
|
|
|
|
|
|
|
|
|
public static uint GenHash(string text, JenkHashInputEncoding encoding)
|
|
|
|
|
{
|
|
|
|
|
uint h = 0;
|
|
|
|
|
byte[] chars;
|
|
|
|
|
|
|
|
|
|
switch (encoding)
|
|
|
|
|
{
|
|
|
|
|
default:
|
|
|
|
|
case JenkHashInputEncoding.UTF8:
|
|
|
|
|
chars = UTF8Encoding.UTF8.GetBytes(text);
|
|
|
|
|
break;
|
|
|
|
|
case JenkHashInputEncoding.ASCII:
|
|
|
|
|
chars = ASCIIEncoding.ASCII.GetBytes(text);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (uint i = 0; i < chars.Length; i++)
|
|
|
|
|
{
|
|
|
|
|
h += chars[i];
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHashLowerInline(string text)
|
|
|
|
|
{
|
|
|
|
|
return GenHashLower(text.AsSpan());
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-28 03:31:09 +08:00
|
|
|
|
public static uint GenHashLower(string text)
|
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
return GenHashLower(text.AsSpan());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHash(ReadOnlySpan<char> text)
|
|
|
|
|
{
|
2023-10-28 03:31:09 +08:00
|
|
|
|
uint h = 0;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
foreach(var c in text)
|
2023-10-28 03:31:09 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += (byte)c;
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHashInline(string text)
|
|
|
|
|
{
|
|
|
|
|
return GenHash(text.AsSpan());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static uint GenHash(string text)
|
|
|
|
|
{
|
|
|
|
|
return GenHash(text.AsSpan());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHashLower(ReadOnlySpan<byte> data)
|
2023-11-12 01:59:17 +08:00
|
|
|
|
{
|
|
|
|
|
uint h = 0;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
foreach(var b in data)
|
2023-11-12 01:59:17 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += ToLower(b);
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHashLower(ReadOnlySpan<char> text)
|
2023-11-12 01:59:17 +08:00
|
|
|
|
{
|
|
|
|
|
uint h = 0;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
foreach(var c in text)
|
2023-11-12 01:59:17 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += ToLower(c);
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static uint GenHashLower(ReadOnlySpan<char> text, ReadOnlySpan<char> str2)
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
|
|
|
|
uint h = 0;
|
|
|
|
|
for (int i = 0; i < text.Length; i++)
|
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += ToLower(text[i]);
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
for (int i = 0; i < str2.Length; i++)
|
|
|
|
|
{
|
|
|
|
|
h += ToLower(str2[i]);
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
|
|
|
|
}
|
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
public static uint GenHash(ReadOnlySpan<byte> data)
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
|
|
|
|
uint h = 0;
|
2024-01-07 02:41:10 +08:00
|
|
|
|
foreach(var c in data)
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += c;
|
|
|
|
|
h += h << 10;
|
|
|
|
|
h ^= h >> 6;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
2024-01-07 02:41:10 +08:00
|
|
|
|
h += h << 3;
|
|
|
|
|
h ^= h >> 11;
|
|
|
|
|
h += h << 15;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public enum JenkHashInputEncoding
|
|
|
|
|
{
|
|
|
|
|
UTF8 = 0,
|
|
|
|
|
ASCII = 1,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class JenkIndMatch
|
|
|
|
|
{
|
|
|
|
|
public string Hash { get; set; }
|
|
|
|
|
public string Value { get; set; }
|
|
|
|
|
public double Score { get; set; }
|
|
|
|
|
|
|
|
|
|
public JenkIndMatch(string hash, string val)
|
|
|
|
|
{
|
|
|
|
|
Hash = hash;
|
|
|
|
|
Value = val;
|
|
|
|
|
CalculateScore();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void CalculateScore()
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
int wordlength = 0;
|
|
|
|
|
int wordrank = 0;
|
|
|
|
|
|
|
|
|
|
string okwordsymbs = " _-.";
|
|
|
|
|
string goodwordsymbs = "_";
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < Value.Length; i++)
|
|
|
|
|
{
|
|
|
|
|
char c = Value[i];
|
|
|
|
|
|
|
|
|
|
bool wordchar = (char.IsLetter(c) || char.IsDigit(c) || goodwordsymbs.Contains(c));
|
|
|
|
|
|
|
|
|
|
if (wordchar)
|
|
|
|
|
{
|
|
|
|
|
wordlength++;
|
|
|
|
|
}
|
|
|
|
|
else if (okwordsymbs.Contains(c))
|
|
|
|
|
{
|
|
|
|
|
//wordlength++; //don't add this to the score, but allow it to continue the chain
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (wordlength > 2)
|
|
|
|
|
{
|
|
|
|
|
wordrank += wordlength; //linear word increment, ignoring 1-2char matches
|
|
|
|
|
}
|
|
|
|
|
wordlength = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//wordrank += wordlength; //each sequential letter in a word contributes more to the rank, ie. 1+2+3+4+...
|
|
|
|
|
}
|
|
|
|
|
if (wordlength > 2)
|
|
|
|
|
{
|
|
|
|
|
wordrank += wordlength; //linear word increment, ignoring 1-2char matches
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (Value.Length > 0)
|
|
|
|
|
{
|
|
|
|
|
//the max value for a given length when 1+2+3+4+5+..n = n(n+1)/2
|
|
|
|
|
//double n = (double)Value.Length;
|
|
|
|
|
//double maxscore = n * (n + 1.0) * 0.5;
|
|
|
|
|
|
|
|
|
|
double n = (double)Value.Length;
|
|
|
|
|
Score = (((double)wordrank) / n);
|
|
|
|
|
//Score = (((double)wordrank));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Score = 0.0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override string ToString()
|
|
|
|
|
{
|
|
|
|
|
return string.Format("{0} -> {1} ({2:0.##})", Hash, Value, Score);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public class JenkIndProblem
|
|
|
|
|
{
|
|
|
|
|
public string Filename { get; set; }
|
|
|
|
|
public string Excuse { get; set; }
|
|
|
|
|
public int Line { get; set; }
|
|
|
|
|
|
|
|
|
|
public JenkIndProblem(string filepath, string excuse, int line)
|
|
|
|
|
{
|
|
|
|
|
Filename = Path.GetFileName(filepath);
|
|
|
|
|
Excuse = excuse;
|
|
|
|
|
Line = line;
|
|
|
|
|
}
|
|
|
|
|
public override string ToString()
|
|
|
|
|
{
|
|
|
|
|
return string.Format("{0} : {1} at line {2}", Filename, Excuse, Line);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static class JenkIndex
|
|
|
|
|
{
|
2023-11-14 23:16:59 +08:00
|
|
|
|
//public static ConcurrentDictionary<uint, string> Index = new ConcurrentDictionary<uint, string>(Environment.ProcessorCount * 2, 2000000);
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static ConcurrentDictionary<uint, string> Index = new ConcurrentDictionary<uint, string>(Environment.ProcessorCount, 2097152);
|
2017-09-21 18:33:05 +08:00
|
|
|
|
|
2023-11-12 01:59:17 +08:00
|
|
|
|
public static void Ensure(string str)
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
uint hash = JenkHash.GenHashInline(str);
|
|
|
|
|
|
|
|
|
|
addString(str, hash);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
|
|
private static void addString(string str, uint hash)
|
|
|
|
|
{
|
|
|
|
|
//lock(Index)
|
|
|
|
|
//{
|
|
|
|
|
Index.TryAdd(hash, str);
|
|
|
|
|
//}
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-12 01:59:17 +08:00
|
|
|
|
public static void Ensure(string str, uint hash)
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
if (hash == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
addString(str, hash);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void Ensure(ReadOnlySpan<char> span, uint hash)
|
|
|
|
|
{
|
|
|
|
|
if (hash == 0)
|
|
|
|
|
return;
|
2023-11-12 01:59:17 +08:00
|
|
|
|
|
2023-10-28 03:31:09 +08:00
|
|
|
|
if (Index.ContainsKey(hash))
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2023-11-12 01:59:17 +08:00
|
|
|
|
return;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
}
|
2023-11-12 01:59:17 +08:00
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
var str = StringPool.Shared.GetOrAdd(span);
|
|
|
|
|
addString(str, hash);
|
2023-11-14 23:16:59 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static void Ensure(ReadOnlySpan<byte> str, uint hash)
|
2023-11-14 23:16:59 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
if (hash == 0)
|
|
|
|
|
return;
|
2023-11-14 23:16:59 +08:00
|
|
|
|
|
|
|
|
|
if (Index.ContainsKey(hash))
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
addString(Encoding.ASCII.GetString(str), hash);
|
2023-10-28 03:31:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-12 01:59:17 +08:00
|
|
|
|
public static void EnsureLower(string str)
|
2023-10-28 03:31:09 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
uint hash = JenkHash.GenHashLowerInline(str);
|
2023-11-12 01:59:17 +08:00
|
|
|
|
Ensure(str, hash);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-14 23:16:59 +08:00
|
|
|
|
public static void EnsureLower(ReadOnlySpan<char> str)
|
|
|
|
|
{
|
|
|
|
|
uint hash = JenkHash.GenHashLower(str);
|
|
|
|
|
Ensure(str, hash);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-12 01:59:17 +08:00
|
|
|
|
public static void EnsureBoth(string str)
|
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
uint hash = JenkHash.GenHashInline(str);
|
|
|
|
|
uint hashLower = JenkHash.GenHashLowerInline(str);
|
2023-11-12 01:59:17 +08:00
|
|
|
|
Ensure(str, hash);
|
|
|
|
|
if (hash != hashLower)
|
2023-10-28 03:31:09 +08:00
|
|
|
|
{
|
2023-11-12 01:59:17 +08:00
|
|
|
|
Ensure(str, hashLower);
|
2023-10-28 03:31:09 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static void EnsureBoth(ReadOnlySpan<char> strSpan)
|
2023-10-28 03:31:09 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
uint hash = JenkHash.GenHash(strSpan);
|
|
|
|
|
uint hashLower = JenkHash.GenHashLower(strSpan);
|
|
|
|
|
|
|
|
|
|
var contains = Index.ContainsKey(hash);
|
|
|
|
|
var containsLower = Index.ContainsKey(hashLower);
|
|
|
|
|
if (contains && containsLower)
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var str = StringPool.Shared.GetOrAdd(strSpan);
|
|
|
|
|
addString(str, hash);
|
2023-11-14 23:16:59 +08:00
|
|
|
|
if (hash != hashLower)
|
2023-10-28 03:31:09 +08:00
|
|
|
|
{
|
2024-01-07 02:41:10 +08:00
|
|
|
|
addString(str, hashLower);
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static string GetString(uint hash)
|
|
|
|
|
{
|
|
|
|
|
string res;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
if (!Index.TryGetValue(hash, out res))
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2023-10-28 03:31:09 +08:00
|
|
|
|
res = hash.ToString();
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
public static string TryGetString(uint hash)
|
|
|
|
|
{
|
|
|
|
|
string res;
|
2023-10-28 03:31:09 +08:00
|
|
|
|
if (!Index.TryGetValue(hash, out res))
|
2017-09-21 18:33:05 +08:00
|
|
|
|
{
|
2023-10-28 03:31:09 +08:00
|
|
|
|
res = string.Empty;
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-07 02:41:10 +08:00
|
|
|
|
public static bool TryGetString(uint hash, [MaybeNullWhen(false)] out string res) => Index.TryGetValue(hash, out res);
|
|
|
|
|
|
2023-11-12 01:59:17 +08:00
|
|
|
|
public static ICollection<string> GetAllStrings()
|
2017-12-14 21:07:26 +08:00
|
|
|
|
{
|
2023-11-12 01:59:17 +08:00
|
|
|
|
var res = Index.Values;
|
2017-12-14 21:07:26 +08:00
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-21 18:33:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|