Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 95 additions & 21 deletions src/common_file_operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use binrw::{BinReaderExt, BinResult, binread};
use half::f16;
use std::ffi::CString;
use std::io::SeekFrom;
use std::io::{Read, SeekFrom};

pub(crate) fn read_bool_from<T: std::convert::From<u8> + std::cmp::PartialEq>(x: T) -> bool {
x == T::from(1u8)
Expand All @@ -14,6 +14,28 @@ pub(crate) fn write_bool_as<T: std::convert::From<u8>>(x: &bool) -> T {
if *x { T::from(1u8) } else { T::from(0u8) }
}

/// Read a null-terminated UTF-8 string from a reader at its current position.
pub(crate) fn read_null_terminated_utf8<R: Read>(reader: &mut R) -> String {
let mut bytes = Vec::new();
let mut buf = [0u8; 1];
while reader.read_exact(&mut buf).is_ok() && buf[0] != 0 {
bytes.push(buf[0]);
}
String::from_utf8(bytes).unwrap_or_default()
}

/// Read a null-terminated UTF-8 string from a byte slice starting at `offset`.
/// Returns the decoded string and the offset immediately after the null terminator.
pub(crate) fn null_terminated_utf8(data: &[u8], offset: usize) -> (String, usize) {
let end = data[offset..]
.iter()
.position(|&b| b == 0)
.map(|p| p + offset)
.unwrap_or(data.len());
let s = String::from_utf8(data[offset..end].to_vec()).unwrap_or_default();
(s, end + 1)
}

pub(crate) fn read_string(byte_stream: Vec<u8>) -> String {
let str = String::from_utf8(byte_stream).unwrap_or_default();
str.trim_matches(char::from(0)).to_string() // trim \0 from the end of strings
Expand All @@ -37,33 +59,16 @@ pub(crate) fn strings_parser(
let mut strings: Vec<String> = vec![];

for offset in strings_offset {
let string_offset = base_offset + *offset as u64;

let mut string = String::new();

reader.seek(SeekFrom::Start(string_offset))?;
let mut next_char = reader.read_le::<u8>().unwrap() as char;
while next_char != '\0' {
string.push(next_char);
next_char = reader.read_le::<u8>().unwrap() as char;
}

strings.push(string);
reader.seek(SeekFrom::Start(base_offset + *offset as u64))?;
strings.push(read_null_terminated_utf8(reader));
}

Ok(strings)
}

#[binrw::parser(reader)]
pub(crate) fn read_string_until_null() -> BinResult<String> {
let mut string = String::new();

let mut next_char = reader.read_le::<u8>().unwrap() as char;
while next_char != '\0' {
string.push(next_char);
next_char = reader.read_le::<u8>().unwrap() as char;
}
Ok(string)
Ok(read_null_terminated_utf8(reader))
}

fn read_half1(data: [u16; 1]) -> Half1 {
Expand Down Expand Up @@ -184,4 +189,73 @@ mod tests {
4
);
}

#[test]
fn read_null_terminated_utf8_ascii() {
let data = b"hello\0rest";
let mut cursor = std::io::Cursor::new(&data[..]);
assert_eq!(read_null_terminated_utf8(&mut cursor), "hello");
// cursor should be positioned right after the null byte
assert_eq!(cursor.position(), 6);
}

#[test]
fn read_null_terminated_utf8_chinese() {
// "你好" in UTF-8: [0xE4,0xBD,0xA0, 0xE5,0xA5,0xBD] + null
let data = b"\xe4\xbd\xa0\xe5\xa5\xbd\0";
let mut cursor = std::io::Cursor::new(&data[..]);
assert_eq!(read_null_terminated_utf8(&mut cursor), "你好");
}

#[test]
fn read_null_terminated_utf8_empty() {
let data = b"\0trailing";
let mut cursor = std::io::Cursor::new(&data[..]);
assert_eq!(read_null_terminated_utf8(&mut cursor), "");
}

#[test]
fn read_null_terminated_utf8_invalid_fallback() {
// Invalid UTF-8 sequence: 0xFF is never valid in UTF-8
let data: &[u8] = &[0xFF, 0xFE, 0x00];
let mut cursor = std::io::Cursor::new(data);
assert_eq!(read_null_terminated_utf8(&mut cursor), "");
}

#[test]
fn null_terminated_utf8_ascii() {
let data = b"foo\0bar\0";
let (s, next) = null_terminated_utf8(data, 0);
assert_eq!(s, "foo");
assert_eq!(next, 4);
let (s2, next2) = null_terminated_utf8(data, next);
assert_eq!(s2, "bar");
assert_eq!(next2, 8);
}

#[test]
fn null_terminated_utf8_chinese() {
// "装备" in UTF-8: [0xE8,0xA3,0x85, 0xE5,0xA4,0x87] + null
let data = b"\xe8\xa3\x85\xe5\xa4\x87\0";
let (s, _) = null_terminated_utf8(data, 0);
assert_eq!(s, "装备");
}

#[test]
fn null_terminated_utf8_at_offset() {
let data = b"\0hello\0world\0";
let (s, next) = null_terminated_utf8(data, 1);
assert_eq!(s, "hello");
assert_eq!(next, 7);
let (s2, _) = null_terminated_utf8(data, next);
assert_eq!(s2, "world");
}

#[test]
fn null_terminated_utf8_empty_at_offset() {
let data = b"a\0\0b\0";
let (s, next) = null_terminated_utf8(data, 2);
assert_eq!(s, "");
assert_eq!(next, 3);
}
}
4 changes: 3 additions & 1 deletion src/dic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ impl Dictionary {
}

let chara = Dictionary::index_to_rune(&lut, id as u32);
self.dump_dict_node(&mut result, *v as i32, String::from(chara as u8 as char))
if let Some(c) = char::from_u32(chara as u32) {
self.dump_dict_node(&mut result, *v as i32, String::from(c))
}
}

Some(result)
Expand Down
12 changes: 3 additions & 9 deletions src/exd_file_operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use std::{

use binrw::{BinRead, BinWrite, Endian};

use crate::common_file_operations::read_null_terminated_utf8;

use crate::{
excel::{Field, Row},
exd::EXD,
Expand Down Expand Up @@ -125,15 +127,7 @@ impl EXD {
))
.ok()?;

let mut string = String::new();

let mut byte: u8 = Self::read_data_raw(cursor).unwrap();
while byte != 0 {
string.push(byte as char);
byte = Self::read_data_raw(cursor).unwrap();
}

Some(Field::String(string))
Some(Field::String(read_null_terminated_utf8(cursor)))
}
ColumnDataType::Bool => {
let bool_data: i8 = Self::read_data_raw(cursor).unwrap();
Expand Down
40 changes: 7 additions & 33 deletions src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use binrw::{BinWrite, BinWriterExt, binrw};
use bitflags::bitflags;

use crate::common::Platform;
use crate::common_file_operations::{read_bool_from, write_bool_as};
use crate::common_file_operations::{null_terminated_utf8, read_bool_from, write_bool_as};
use crate::{ByteBuffer, ByteSpan, ReadableFile, WritableFile};
use vertex_declarations::{
VERTEX_ELEMENT_SIZE, VertexDeclaration, VertexType, VertexUsage, vertex_element_parser,
Expand Down Expand Up @@ -770,33 +770,15 @@ impl ReadableFile for MDL {
let mut affected_bone_names = vec![];

for offset in &model.bone_name_offsets {
let mut offset = *offset;
let mut string = String::new();

let mut next_char = model.header.strings[offset as usize] as char;
while next_char != '\0' {
string.push(next_char);
offset += 1;
next_char = model.header.strings[offset as usize] as char;
}

affected_bone_names.push(string);
let (name, _) = null_terminated_utf8(&model.header.strings, *offset as usize);
affected_bone_names.push(name);
}

let mut material_names = vec![];

for offset in &model.material_name_offsets {
let mut offset = *offset;
let mut string = String::new();

let mut next_char = model.header.strings[offset as usize] as char;
while next_char != '\0' {
string.push(next_char);
offset += 1;
next_char = model.header.strings[offset as usize] as char;
}

material_names.push(string);
let (name, _) = null_terminated_utf8(&model.header.strings, *offset as usize);
material_names.push(name);
}

let mut lods = vec![];
Expand Down Expand Up @@ -1075,18 +1057,10 @@ impl ReadableFile for MDL {
vertex.position[2] = new_vertex.position[2] - old_vertex.position[2];
}

let mut offset = shape.string_offset;
let mut string = String::new();

let mut next_char = model.header.strings[offset as usize] as char;
while next_char != '\0' {
string.push(next_char);
offset += 1;
next_char = model.header.strings[offset as usize] as char;
}
let (name, _) = null_terminated_utf8(&model.header.strings, shape.string_offset as usize);

shapes.push(Shape {
name: string,
name,
morphed_vertices,
});
}
Expand Down
32 changes: 8 additions & 24 deletions src/mtrl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use std::io::Cursor;

use crate::common::Platform;
use crate::common_file_operations::{Half1, Half2, Half3};
use crate::common_file_operations::{Half1, Half2, Half3, null_terminated_utf8};
use crate::mtrl::ColorDyeTable::{
DawntrailColorDyeTable, LegacyColorDyeTable, OpaqueColorDyeTable,
};
Expand Down Expand Up @@ -471,31 +471,15 @@ impl ReadableFile for Material {

let mut offset = 0;
for _ in 0..mat_data.file_header.texture_count {
let mut string = String::new();

let mut next_char = mat_data.strings[offset] as char;
while next_char != '\0' {
string.push(next_char);
offset += 1;
next_char = mat_data.strings[offset] as char;
}

texture_paths.push(string);

offset += 1;
let (s, next) = null_terminated_utf8(&mat_data.strings, offset);
texture_paths.push(s);
offset = next;
}

// TODO: move to reusable function
let mut shader_package_name = String::new();

offset = mat_data.file_header.shader_package_name_offset as usize;

let mut next_char = mat_data.strings[offset] as char;
while next_char != '\0' {
shader_package_name.push(next_char);
offset += 1;
next_char = mat_data.strings[offset] as char;
}
let (shader_package_name, _) = null_terminated_utf8(
&mat_data.strings,
mat_data.file_header.shader_package_name_offset as usize,
);

// bg/ffxiv/wil_w1/evt/w1eb/material/w1eb_f1_vfog1a.mtrl has a shader value list of 9, which doesn't make sense in this system
// eventually we need to un-hardcode it from vec4 or whatever
Expand Down
14 changes: 3 additions & 11 deletions src/scn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

use std::io::SeekFrom;

use binrw::{BinReaderExt, BinResult, BinWrite, binrw};
use binrw::{BinResult, BinWrite, binrw};

use crate::{
common_file_operations::{read_bool_from, write_bool_as},
common_file_operations::{read_bool_from, read_null_terminated_utf8, write_bool_as},
layer::Layer,
string_heap::{HeapPointer, HeapStringFromPointer, StringHeap},
tmb::Tmb,
Expand Down Expand Up @@ -438,16 +438,8 @@ fn strings_from_offsets(offsets: &Vec<i32>) -> BinResult<Vec<String>> {
for offset in offsets {
let string_offset = *offset as u64;

let mut string = String::new();

reader.seek(SeekFrom::Start(base_offset + string_offset))?;
let mut next_char = reader.read_le::<u8>().unwrap() as char;
while next_char != '\0' {
string.push(next_char);
next_char = reader.read_le::<u8>().unwrap() as char;
}

strings.push(string);
strings.push(read_null_terminated_utf8(reader));
}

Ok(strings)
Expand Down
2 changes: 1 addition & 1 deletion src/sqpack/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::io::{Read, Seek, SeekFrom, Write};
use binrw::{BinRead, BinWrite, Endian, binrw};
use data::{BlockHeader, CompressionMode};

use crate::common::{Platform, Region};
use crate::common::Platform;
use crate::compression::no_header_decompress;

mod data;
Expand Down
13 changes: 3 additions & 10 deletions src/string_heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::io::{Cursor, Read, Seek, SeekFrom, Write};

use binrw::{BinRead, BinReaderExt, BinResult, BinWrite, Endian, Error, binrw};

use crate::{ByteBuffer, common_file_operations::write_string};
use crate::{ByteBuffer, common_file_operations::{read_null_terminated_utf8, write_string}};

/// A string that exists in a different location in the file, usually a heap with a bunch of other strings.
#[binrw]
Expand Down Expand Up @@ -166,18 +166,11 @@ impl StringHeap {
{
let offset = self.pos + offset as u64;

let mut string = String::new();

let old_pos = reader.stream_position().unwrap();

reader.seek(SeekFrom::Start(offset)).unwrap();
let mut next_char = reader.read_le::<u8>().unwrap() as char;
while next_char != '\0' {
string.push(next_char);
next_char = reader.read_le::<u8>().unwrap() as char;
}
let s = read_null_terminated_utf8(reader);
reader.seek(SeekFrom::Start(old_pos)).unwrap();
string
s
}
}

Expand Down
Loading