Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion TestUniHax/Form1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public FormUniMapTest()
comboBoxTransformations.DisplayMember = "Transform";

// Populate unichar properties
string[] aProps = { Fuzzer.uBOM, Fuzzer.uMVS, Fuzzer.uReservedCodePoint, Fuzzer.uRLO, Fuzzer.uDEAD, Fuzzer.uDAAD, Fuzzer.uPrivate, Fuzzer.uNotACharacter };
string[] aProps = { HostileCodePoint.uBOM, HostileCodePoint.uMVS, HostileCodePoint.uReservedCodePoint, HostileCodePoint.uRLO, HostileCodePoint.uDEAD, HostileCodePoint.uDAAD, HostileCodePoint.uPrivate, HostileCodePoint.uNotACharacter };
string sProps = String.Join("\r\n", aProps);
textBoxUnicharProps.Text = sProps;
}
Expand Down
117 changes: 96 additions & 21 deletions UniHax/Fuzzer.cs → UniHax/CodePointFuzzer.cs
Original file line number Diff line number Diff line change
@@ -1,46 +1,41 @@


// Copyright (c) 2011 by Christopher Weber

// Portions Copyright (c) 2017 by Robert Mooney

// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:

// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

// Authors:
// Christopher Weber (chris@lookout.net)
// Robert Mooney (rjmooney@gmail.com)


using System;
using System.IO;
using System.Collections.Generic;
using System.Text;

namespace UniHax
{
/// <summary>
/// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including:
///
/// - non-character, reserved, and private use area code points
/// - special meaning characters such as the BOM and RLO
/// - ill-formed byte sequences
/// - a half-surrogate code point
///
///
/// An enumeration-style container of hostile code points.
/// </summary>
public class Fuzzer
public static class HostileCodePoint
{

/// <summary>
/// The Byte Order Mark U+FEFF is a special character defining the byte order and endianess
/// of text data.
Expand Down Expand Up @@ -136,6 +131,52 @@ public class Fuzzer
/// </summary>
public static readonly string u1D160 = char.ConvertFromUtf32(0x1D160);

/// <summary>
/// This is a collection of the code points defined above.
/// </summary>
/// <remarks>Remember to update this when adding new code points.</remarks>
private static readonly string[] _values = new string[]
{
HostileCodePoint.uBOM,
HostileCodePoint.uRLO,
HostileCodePoint.uMVS,
HostileCodePoint.uWordJoiner,
HostileCodePoint.uReservedCodePoint,
HostileCodePoint.uNotACharacter,
HostileCodePoint.uUnassigned,
HostileCodePoint.uDEAD,
HostileCodePoint.uDAAD,
HostileCodePoint.uPrivate,
HostileCodePoint.uFullwidthSolidus,
HostileCodePoint.uBoldEight,
HostileCodePoint.uIdnaSs,
HostileCodePoint.uFDFA,
HostileCodePoint.u0390,
HostileCodePoint.u1F82,
HostileCodePoint.uFB2C,
HostileCodePoint.u1D160
};

/// <summary>
/// Retrieves an array of the values of the constants in the HostileCodePoint enumeration.
/// </summary>
public static string[] GetValues()
{
return _values;
}
}

/// <summary>
/// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including:
///
/// - non-character, reserved, and private use area code points
/// - special meaning characters such as the BOM and RLO
/// - ill-formed byte sequences
/// - a half-surrogate code point
/// </summary>
public class CodePointFuzzer
{
#region Public Methods
/// <summary>
/// Gets the requested byte representation of the current Unicode character codepoint
/// </summary>
Expand All @@ -159,7 +200,6 @@ public byte[] GetCharacterBytes(string encoding, string character)
}

return enc.GetBytes(character);

}

/// <summary>
Expand All @@ -171,7 +211,7 @@ public byte[] GetCharacterBytes(string encoding, string character)
public byte[] GetCharacterBytesMalformed(string encoding, string character)
{
System.Text.Encoding enc;

if (encoding == "utf-16le")
{
enc = new System.Text.UnicodeEncoding();
Expand Down Expand Up @@ -209,7 +249,7 @@ public byte[] GetCharacterBytesMalformed(string encoding, string character)

public string GetBom()
{
return Fuzzer.uBOM;
return HostileCodePoint.uBOM;
}

/// <summary>
Expand All @@ -220,8 +260,43 @@ public string GetBom()
/// <returns>A raw byte array because .NET will not allow illegal code points in the System.String class.</returns>
public byte[] OutOfRangeCodePointAsUtf32BE()
{
byte[] bytes = {0x00, 0x1F, 0xFF, 0xFF};
byte[] bytes = { 0x00, 0x1F, 0xFF, 0xFF };
return bytes;
}
#endregion

#region Static Methods
/// <summary>
/// Perform hostile code point substitution on each character in the specified string.
/// </summary>
/// <param name="source">The string on which to perform the substitution</param>
/// <returns>The next string in the sequence of strings with the next character replaced with a hostile code point</returns>
public static IEnumerable<string> Substitute(string source)
{
var lastCodePointLength = 0;
var target = new StringBuilder(source);
for (int n = 0; n < source.Length; ++n)
{
foreach (var codepoint in HostileCodePoint.GetValues())
{
if (n > 0 && lastCodePointLength > 0)
{
// Remove the last hostile code point replacement and re-insert the original character
target.Remove(n - 1, lastCodePointLength);
target.Insert(n - 1, source[n - 1]);
}

// Replace the current character of the source with the current code point string
target.Remove(n, 1);
target.Insert(n, codepoint);

// Store the length of the code point for the next iteration, when it is removed
lastCodePointLength = codepoint.Length;

yield return target.ToString();
}
}
}
#endregion
}
}
}
1 change: 0 additions & 1 deletion UniHax/Exceptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ public override string Message
get
{
return String.Format("Bestfit mapping error:{0}", messageDetails);
return base.Message;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion UniHax/UniHax.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="CodePointFuzzer.cs" />
<Compile Include="Exceptions.cs" />
<Compile Include="Fuzzer.cs" />
<Compile Include="Mappings.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Properties\Resources.Designer.cs">
Expand Down
8 changes: 4 additions & 4 deletions UniHax/UnicodeChar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ public string ConvertCharacterToString(char character)
{
i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff
}
catch (FormatException e)
catch (FormatException)
{
i = 0;
}
catch(Exception)
catch (Exception)
{
throw;
}
Expand All @@ -107,11 +107,11 @@ public string ConvertCodePointToString(string codepoint)
{
i = Convert.ToInt32(codepoint.Trim(), 16); // 0x00 to 0x10ffff
}
catch (FormatException e)
catch (FormatException)
{
i = 0;
}
catch(ArgumentOutOfRangeException e)
catch (ArgumentOutOfRangeException)
{
i = 0;
}
Expand Down