diff --git a/TestUniHax/Form1.cs b/TestUniHax/Form1.cs
index 289872a..cde99e2 100644
--- a/TestUniHax/Form1.cs
+++ b/TestUniHax/Form1.cs
@@ -55,7 +55,7 @@ public FormUniMapTest()
comboBoxTransformations.DisplayMember = "Transform";
// Populate unichar properties
- string[] aProps = { Fuzzer.uBOM, Fuzzer.uMVS, Fuzzer.uReservedCodePoint, Fuzzer.uRLO, Fuzzer.uDEAD, Fuzzer.uDAAD, Fuzzer.uPrivate, Fuzzer.uNotACharacter };
+ string[] aProps = { HostileCodePoint.uBOM, HostileCodePoint.uMVS, HostileCodePoint.uReservedCodePoint, HostileCodePoint.uRLO, HostileCodePoint.uDEAD, HostileCodePoint.uDAAD, HostileCodePoint.uPrivate, HostileCodePoint.uNotACharacter };
string sProps = String.Join("\r\n", aProps);
textBoxUnicharProps.Text = sProps;
}
diff --git a/UniHax/Fuzzer.cs b/UniHax/CodePointFuzzer.cs
similarity index 71%
rename from UniHax/Fuzzer.cs
rename to UniHax/CodePointFuzzer.cs
index 4679adc..48d8443 100644
--- a/UniHax/Fuzzer.cs
+++ b/UniHax/CodePointFuzzer.cs
@@ -1,16 +1,17 @@
-
+
// Copyright (c) 2011 by Christopher Weber
-
+// Portions Copyright (c) 2017 by Robert Mooney
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
-
+
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
-
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -18,29 +19,23 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
-
+
// Authors:
// Christopher Weber (chris@lookout.net)
+// Robert Mooney (rjmooney@gmail.com)
using System;
-using System.IO;
+using System.Collections.Generic;
+using System.Text;
namespace UniHax
{
///
- /// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including:
- ///
- /// - non-character, reserved, and private use area code points
- /// - special meaning characters such as the BOM and RLO
- /// - ill-formed byte sequences
- /// - a half-surrogate code point
- ///
- ///
+ /// An enumeration-style container of hostile code points.
///
- public class Fuzzer
+ public static class HostileCodePoint
{
-
///
/// The Byte Order Mark U+FEFF is a special character defining the byte order and endianess
/// of text data.
@@ -136,6 +131,52 @@ public class Fuzzer
///
public static readonly string u1D160 = char.ConvertFromUtf32(0x1D160);
+ ///
+ /// This is a collection of the code points defined above.
+ ///
+ /// Remember to update this when adding new code points.
+ private static readonly string[] _values = new string[]
+ {
+ HostileCodePoint.uBOM,
+ HostileCodePoint.uRLO,
+ HostileCodePoint.uMVS,
+ HostileCodePoint.uWordJoiner,
+ HostileCodePoint.uReservedCodePoint,
+ HostileCodePoint.uNotACharacter,
+ HostileCodePoint.uUnassigned,
+ HostileCodePoint.uDEAD,
+ HostileCodePoint.uDAAD,
+ HostileCodePoint.uPrivate,
+ HostileCodePoint.uFullwidthSolidus,
+ HostileCodePoint.uBoldEight,
+ HostileCodePoint.uIdnaSs,
+ HostileCodePoint.uFDFA,
+ HostileCodePoint.u0390,
+ HostileCodePoint.u1F82,
+ HostileCodePoint.uFB2C,
+ HostileCodePoint.u1D160
+ };
+
+ ///
+ /// Retrieves an array of the values of the constants in the HostileCodePoint enumeration.
+ ///
+ public static string[] GetValues()
+ {
+ return _values;
+ }
+ }
+
+ ///
+ /// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including:
+ ///
+ /// - non-character, reserved, and private use area code points
+ /// - special meaning characters such as the BOM and RLO
+ /// - ill-formed byte sequences
+ /// - a half-surrogate code point
+ ///
+ public class CodePointFuzzer
+ {
+ #region Public Methods
///
/// Gets the requested byte representation of the current Unicode character codepoint
///
@@ -159,7 +200,6 @@ public byte[] GetCharacterBytes(string encoding, string character)
}
return enc.GetBytes(character);
-
}
///
@@ -171,7 +211,7 @@ public byte[] GetCharacterBytes(string encoding, string character)
public byte[] GetCharacterBytesMalformed(string encoding, string character)
{
System.Text.Encoding enc;
-
+
if (encoding == "utf-16le")
{
enc = new System.Text.UnicodeEncoding();
@@ -209,7 +249,7 @@ public byte[] GetCharacterBytesMalformed(string encoding, string character)
public string GetBom()
{
- return Fuzzer.uBOM;
+ return HostileCodePoint.uBOM;
}
///
@@ -220,8 +260,43 @@ public string GetBom()
/// A raw byte array because .NET will not allow illegal code points in the System.String class.
public byte[] OutOfRangeCodePointAsUtf32BE()
{
- byte[] bytes = {0x00, 0x1F, 0xFF, 0xFF};
+ byte[] bytes = { 0x00, 0x1F, 0xFF, 0xFF };
return bytes;
}
+ #endregion
+
+ #region Static Methods
+ ///
+ /// Perform hostile code point substitution on each character in the specified string.
+ ///
+ /// The string on which to perform the substitution
+ /// The next string in the sequence of strings with the next character replaced with a hostile code point
+ public static IEnumerable Substitute(string source)
+ {
+ var lastCodePointLength = 0;
+ var target = new StringBuilder(source);
+ for (int n = 0; n < source.Length; ++n)
+ {
+ foreach (var codepoint in HostileCodePoint.GetValues())
+ {
+ if (n > 0 && lastCodePointLength > 0)
+ {
+ // Remove the last hostile code point replacement and re-insert the original character
+ target.Remove(n - 1, lastCodePointLength);
+ target.Insert(n - 1, source[n - 1]);
+ }
+
+ // Replace the current character of the source with the current code point string
+ target.Remove(n, 1);
+ target.Insert(n, codepoint);
+
+ // Store the length of the code point for the next iteration, when it is removed
+ lastCodePointLength = codepoint.Length;
+
+ yield return target.ToString();
+ }
+ }
+ }
+ #endregion
}
-}
+}
\ No newline at end of file
diff --git a/UniHax/Exceptions.cs b/UniHax/Exceptions.cs
index 8bb7e98..9bce962 100644
--- a/UniHax/Exceptions.cs
+++ b/UniHax/Exceptions.cs
@@ -43,7 +43,6 @@ public override string Message
get
{
return String.Format("Bestfit mapping error:{0}", messageDetails);
- return base.Message;
}
}
}
diff --git a/UniHax/UniHax.csproj b/UniHax/UniHax.csproj
index b3c515e..2dfcf97 100644
--- a/UniHax/UniHax.csproj
+++ b/UniHax/UniHax.csproj
@@ -40,8 +40,8 @@
+
-
diff --git a/UniHax/UnicodeChar.cs b/UniHax/UnicodeChar.cs
index 6cd6e1c..bd5e096 100644
--- a/UniHax/UnicodeChar.cs
+++ b/UniHax/UnicodeChar.cs
@@ -76,11 +76,11 @@ public string ConvertCharacterToString(char character)
{
i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff
}
- catch (FormatException e)
+ catch (FormatException)
{
i = 0;
}
- catch(Exception)
+ catch (Exception)
{
throw;
}
@@ -107,11 +107,11 @@ public string ConvertCodePointToString(string codepoint)
{
i = Convert.ToInt32(codepoint.Trim(), 16); // 0x00 to 0x10ffff
}
- catch (FormatException e)
+ catch (FormatException)
{
i = 0;
}
- catch(ArgumentOutOfRangeException e)
+ catch (ArgumentOutOfRangeException)
{
i = 0;
}