Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ Encoding covers all encodings specified by WHATWG Encoding Standard and some mor
* Encodings that were originally specified by WHATWG Encoding Standard:
* HZ
* ISO 8859-1 (distinct from Windows code page 1252)
* Code page 437 (`cp437`)

Parenthesized names refer to the encoding's primary name assigned by WHATWG Encoding Standard.

Expand Down
2 changes: 2 additions & 0 deletions src/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ macro_rules! singlebyte(
unique!(var=ERROR, mod=codec::error, val=ErrorEncoding);
unique!(var=ASCII, mod=codec::ascii, val=ASCIIEncoding);
singlebyte!(var=ARMSCII_8, mod=index::armscii_8, name="armscii-8");
singlebyte!(var=CP437, mod=index::cp437, name="cp437");
singlebyte!(var=IBM866, mod=index::ibm866, name|whatwg="ibm866");
singlebyte!(var=ISO_8859_1, mod=codec::singlebyte::iso_8859_1, name="iso-8859-1");
singlebyte!(var=ISO_8859_2, mod=index::iso_8859_2, name|whatwg="iso-8859-2");
Expand Down Expand Up @@ -96,6 +97,7 @@ pub fn encodings() -> &'static [EncodingRef] {
const ENCODINGS: &'static [EncodingRef] = &[
ERROR,
ASCII,
CP437,
IBM866,
ISO_8859_1,
ISO_8859_2,
Expand Down
1 change: 1 addition & 0 deletions src/index/gen_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ def generate_multi_byte_range_lbound_index(opts, crate, name):

INDICES = [
('singlebyte/armscii-8', generate_single_byte_index),
('singlebyte/cp437', generate_single_byte_index),

('singlebyte/ibm866', generate_single_byte_index),
('singlebyte/iso-8859-2', generate_single_byte_index),
Expand Down
112 changes: 112 additions & 0 deletions src/index/singlebyte/cp437.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// AUTOGENERATED FROM index-cp437.txt, ORIGINAL COMMENT FOLLOWS:
//
// Source URL: ftp://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT
// Keys have been shifted to be more like WHATWG's indexes.
//
// Name: cp437_DOSLatinUS to Unicode table
// Unicode version: 2.0
// Table version: 2.00
// Table format: Format A
// Date: 04/24/96
// Contact: Shawn.Steele@microsoft.com
//
// General notes: none
//
// Format: Three tab-separated columns
// Column #1 is the cp437_DOSLatinUS code (in hex)
// Column #2 is the Unicode (in hex as 0xXXXX)
// Column #3 is the Unicode name (follows a comment sign, '#')
//
// The entries are in cp437_DOSLatinUS order
//

#[allow(dead_code)] const X: u16 = 0xffff;

const FORWARD_TABLE: &'static [u16] = &[
199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196,
197, 201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 162, 163, 165,
8359, 402, 225, 237, 243, 250, 241, 209, 170, 186, 191, 8976, 172, 189,
188, 161, 171, 187, 9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558, 9557,
9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472,
9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, 9576,
9572, 9573, 9561, 9560, 9554, 9555, 9579, 9578, 9496, 9484, 9608, 9604,
9612, 9616, 9600, 945, 223, 915, 960, 931, 963, 181, 964, 934, 920, 937,
948, 8734, 966, 949, 8745, 8801, 177, 8805, 8804, 8992, 8993, 247, 8776,
176, 8729, 183, 8730, 8319, 178, 9632, 160,
]; // 128 entries

/// Returns the index code point for pointer `code` in this index.
#[inline]
pub fn forward(code: u8) -> u16 {
FORWARD_TABLE[(code - 0x80) as usize]
}

#[cfg(not(feature = "no-optimized-legacy-encoding"))]
const BACKWARD_TABLE_LOWER: &'static [u8] = &[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196, 0, 179, 0, 0, 0, 0, 0, 0, 0,
0, 0, 218, 0, 0, 0, 191, 0, 0, 0, 192, 0, 0, 0, 217, 0, 0, 0, 195, 0, 0, 0,
0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 194, 0, 0, 0, 0, 0, 0, 0, 193, 0, 0,
0, 0, 0, 0, 0, 197, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 169,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244, 245, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249,
251, 0, 0, 0, 236, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 239, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 205, 186, 213, 214, 201, 184,
183, 187, 212, 211, 200, 190, 189, 188, 198, 199, 204, 181, 182, 185, 209,
210, 203, 207, 208, 202, 216, 215, 206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 247, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 0, 243, 242, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 226, 0, 0, 0, 0, 233, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 232, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0,
224, 0, 0, 235, 238, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 143, 146, 128, 0,
144, 0, 0, 0, 0, 0, 0, 0, 165, 0, 0, 0, 0, 153, 0, 0, 0, 0, 0, 154, 0, 0,
225, 133, 160, 131, 0, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161,
140, 139, 0, 164, 149, 162, 147, 0, 148, 246, 0, 151, 163, 150, 129, 0, 0,
152, 223, 0, 0, 0, 220, 0, 0, 0, 219, 0, 0, 0, 221, 0, 0, 0, 222, 176, 177,
178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 173,
155, 156, 0, 157, 0, 0, 0, 0, 166, 174, 170, 0, 0, 0, 248, 241, 253, 0, 0,
230, 0, 250, 0, 0, 167, 175, 172, 171, 0, 168, 227, 0, 0, 229, 231, 0, 237,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252,
]; // 678 entries

#[cfg(not(feature = "no-optimized-legacy-encoding"))]
const BACKWARD_TABLE_UPPER: &'static [u16] = &[
0, 0, 543, 446, 0, 0, 125, 0, 0, 0, 0, 0, 0, 0, 386, 607, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 614, 150, 0, 0, 0, 0, 0, 237, 341, 0, 0, 198, 0, 0, 0,
0, 0, 0, 0, 64, 285, 510,
]; // 151 entries

/// Returns the index pointer for code point `code` in this index.
#[inline]
#[cfg(not(feature = "no-optimized-legacy-encoding"))]
pub fn backward(code: u32) -> u8 {
let offset = (code >> 6) as usize;
let offset = if offset < 151 {BACKWARD_TABLE_UPPER[offset] as usize} else {0};
BACKWARD_TABLE_LOWER[offset + ((code & 63) as usize)]
}

/// Returns the index pointer for code point `code` in this index.
#[cfg(feature = "no-optimized-legacy-encoding")]
pub fn backward(code: u32) -> u8 {
if code > 9632 || ((0x70003u32 >> (code >> 9)) & 1) == 0 { return 0; }
let code = code as u16;
for i in 0..0x80 {
if FORWARD_TABLE[i as usize] == code { return 0x80 + i; }
}
0
}

#[cfg(test)]
single_byte_tests! {
}
148 changes: 148 additions & 0 deletions src/index/singlebyte/index-cp437.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Source URL: ftp://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT
# Keys have been shifted to be more like WHATWG's indexes.
#
# Name: cp437_DOSLatinUS to Unicode table
# Unicode version: 2.0
# Table version: 2.00
# Table format: Format A
# Date: 04/24/96
# Contact: Shawn.Steele@microsoft.com
#
# General notes: none
#
# Format: Three tab-separated columns
# Column #1 is the cp437_DOSLatinUS code (in hex)
# Column #2 is the Unicode (in hex as 0xXXXX)
# Column #3 is the Unicode name (follows a comment sign, '#')
#
# The entries are in cp437_DOSLatinUS order
#

0x00 0x00c7 #LATIN CAPITAL LETTER C WITH CEDILLA
0x01 0x00fc #LATIN SMALL LETTER U WITH DIAERESIS
0x02 0x00e9 #LATIN SMALL LETTER E WITH ACUTE
0x03 0x00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
0x04 0x00e4 #LATIN SMALL LETTER A WITH DIAERESIS
0x05 0x00e0 #LATIN SMALL LETTER A WITH GRAVE
0x06 0x00e5 #LATIN SMALL LETTER A WITH RING ABOVE
0x07 0x00e7 #LATIN SMALL LETTER C WITH CEDILLA
0x08 0x00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX
0x09 0x00eb #LATIN SMALL LETTER E WITH DIAERESIS
0x0a 0x00e8 #LATIN SMALL LETTER E WITH GRAVE
0x0b 0x00ef #LATIN SMALL LETTER I WITH DIAERESIS
0x0c 0x00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0d 0x00ec #LATIN SMALL LETTER I WITH GRAVE
0x0e 0x00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS
0x0f 0x00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE
0x10 0x00c9 #LATIN CAPITAL LETTER E WITH ACUTE
0x11 0x00e6 #LATIN SMALL LIGATURE AE
0x12 0x00c6 #LATIN CAPITAL LIGATURE AE
0x13 0x00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
0x14 0x00f6 #LATIN SMALL LETTER O WITH DIAERESIS
0x15 0x00f2 #LATIN SMALL LETTER O WITH GRAVE
0x16 0x00fb #LATIN SMALL LETTER U WITH CIRCUMFLEX
0x17 0x00f9 #LATIN SMALL LETTER U WITH GRAVE
0x18 0x00ff #LATIN SMALL LETTER Y WITH DIAERESIS
0x19 0x00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS
0x1a 0x00dc #LATIN CAPITAL LETTER U WITH DIAERESIS
0x1b 0x00a2 #CENT SIGN
0x1c 0x00a3 #POUND SIGN
0x1d 0x00a5 #YEN SIGN
0x1e 0x20a7 #PESETA SIGN
0x1f 0x0192 #LATIN SMALL LETTER F WITH HOOK
0x20 0x00e1 #LATIN SMALL LETTER A WITH ACUTE
0x21 0x00ed #LATIN SMALL LETTER I WITH ACUTE
0x22 0x00f3 #LATIN SMALL LETTER O WITH ACUTE
0x23 0x00fa #LATIN SMALL LETTER U WITH ACUTE
0x24 0x00f1 #LATIN SMALL LETTER N WITH TILDE
0x25 0x00d1 #LATIN CAPITAL LETTER N WITH TILDE
0x26 0x00aa #FEMININE ORDINAL INDICATOR
0x27 0x00ba #MASCULINE ORDINAL INDICATOR
0x28 0x00bf #INVERTED QUESTION MARK
0x29 0x2310 #REVERSED NOT SIGN
0x2a 0x00ac #NOT SIGN
0x2b 0x00bd #VULGAR FRACTION ONE HALF
0x2c 0x00bc #VULGAR FRACTION ONE QUARTER
0x2d 0x00a1 #INVERTED EXCLAMATION MARK
0x2e 0x00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x2f 0x00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x30 0x2591 #LIGHT SHADE
0x31 0x2592 #MEDIUM SHADE
0x32 0x2593 #DARK SHADE
0x33 0x2502 #BOX DRAWINGS LIGHT VERTICAL
0x34 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x35 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x36 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x37 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x38 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x39 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x3a 0x2551 #BOX DRAWINGS DOUBLE VERTICAL
0x3b 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT
0x3c 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT
0x3d 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x3e 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x3f 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT
0x40 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT
0x41 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x42 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x43 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x44 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL
0x45 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x46 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x47 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x48 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT
0x49 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x4a 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x4b 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x4c 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x4d 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL
0x4e 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x4f 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x50 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x51 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x52 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x53 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x54 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x55 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x56 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x57 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x58 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x59 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT
0x5a 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT
0x5b 0x2588 #FULL BLOCK
0x5c 0x2584 #LOWER HALF BLOCK
0x5d 0x258c #LEFT HALF BLOCK
0x5e 0x2590 #RIGHT HALF BLOCK
0x5f 0x2580 #UPPER HALF BLOCK
0x60 0x03b1 #GREEK SMALL LETTER ALPHA
0x61 0x00df #LATIN SMALL LETTER SHARP S
0x62 0x0393 #GREEK CAPITAL LETTER GAMMA
0x63 0x03c0 #GREEK SMALL LETTER PI
0x64 0x03a3 #GREEK CAPITAL LETTER SIGMA
0x65 0x03c3 #GREEK SMALL LETTER SIGMA
0x66 0x00b5 #MICRO SIGN
0x67 0x03c4 #GREEK SMALL LETTER TAU
0x68 0x03a6 #GREEK CAPITAL LETTER PHI
0x69 0x0398 #GREEK CAPITAL LETTER THETA
0x6a 0x03a9 #GREEK CAPITAL LETTER OMEGA
0x6b 0x03b4 #GREEK SMALL LETTER DELTA
0x6c 0x221e #INFINITY
0x6d 0x03c6 #GREEK SMALL LETTER PHI
0x6e 0x03b5 #GREEK SMALL LETTER EPSILON
0x6f 0x2229 #INTERSECTION
0x70 0x2261 #IDENTICAL TO
0x71 0x00b1 #PLUS-MINUS SIGN
0x72 0x2265 #GREATER-THAN OR EQUAL TO
0x73 0x2264 #LESS-THAN OR EQUAL TO
0x74 0x2320 #TOP HALF INTEGRAL
0x75 0x2321 #BOTTOM HALF INTEGRAL
0x76 0x00f7 #DIVISION SIGN
0x77 0x2248 #ALMOST EQUAL TO
0x78 0x00b0 #DEGREE SIGN
0x79 0x2219 #BULLET OPERATOR
0x7a 0x00b7 #MIDDLE DOT
0x7b 0x221a #SQUARE ROOT
0x7c 0x207f #SUPERSCRIPT LATIN SMALL LETTER N
0x7d 0x00b2 #SUPERSCRIPT TWO
0x7e 0x25a0 #BLACK SQUARE
0x7f 0x00a0 #NO-BREAK SPACE
3 changes: 3 additions & 0 deletions src/index/singlebyte/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ extern crate encoding_index_tests;
/// ARMSCII-8
pub mod armscii_8;

/// CP437
pub mod cp437;

/// IBM code page 866.
pub mod ibm866;

Expand Down