-
Notifications
You must be signed in to change notification settings - Fork 19
Open
Description
utils.parse_type() coerces certain types to string, losing the ability to manipulate or reformat native Python types.
access_parser/access_parser/utils.py
Lines 141 to 196 in 7b73391
| def parse_type(data_type, buffer, length=None, version=3, props=None): | |
| parsed = "" | |
| # Bool or int8 | |
| if data_type == TYPE_INT8: | |
| parsed = struct.unpack_from("b", buffer)[0] | |
| elif data_type == TYPE_INT16: | |
| parsed = struct.unpack_from("h", buffer)[0] | |
| elif data_type == TYPE_INT32 or data_type == TYPE_COMPLEX: | |
| parsed = struct.unpack_from("i", buffer)[0] | |
| elif data_type == TYPE_MONEY: | |
| parsed = struct.unpack_from("q", buffer)[0] | |
| if props and "Format" in props: | |
| prop_format = props['Format'] | |
| if parsed == 0: | |
| parsed = [y for x, y in FORMAT_TO_DEFAULT_VALUE.items() if prop_format.startswith(x)] | |
| if not parsed: | |
| LOGGER.warning(f"parse_type got unknown format while parsing money field {prop_format}") | |
| else: | |
| parsed = parsed[0] | |
| else: | |
| parsed = parse_money_type(parsed, prop_format) | |
| elif data_type == TYPE_FLOAT32: | |
| parsed = struct.unpack_from("f", buffer)[0] | |
| elif data_type == TYPE_FLOAT64: | |
| parsed = struct.unpack_from("d", buffer)[0] | |
| elif data_type == TYPE_DATETIME: | |
| double_datetime = struct.unpack_from("q", buffer)[0] | |
| parsed = mdb_date_to_readable(double_datetime) | |
| elif data_type == TYPE_BINARY: | |
| parsed = buffer[:length] | |
| offset = length | |
| elif data_type == TYPE_OLE: | |
| parsed = buffer | |
| elif data_type == TYPE_GUID: | |
| parsed = buffer[:16] | |
| guid = uuid.UUID(parsed.hex()) | |
| parsed = str(guid) | |
| elif data_type == TYPE_96_bit_17_BYTES: | |
| parsed = buffer[:17] | |
| elif data_type == TYPE_TEXT: | |
| if version > 3: | |
| # Looks like if BOM is present text is already decoded | |
| if buffer.startswith(b"\xfe\xff") or buffer.startswith(b"\xff\xfe"): | |
| buff = buffer[2:] | |
| parsed = get_decoded_text(buff) | |
| else: | |
| parsed = buffer.decode("utf-16", errors='ignore') | |
| else: | |
| parsed = get_decoded_text(buffer) | |
| if "\x00" in parsed: | |
| LOGGER.debug(f"Parsed string contains NUL (0x00) characters: {parsed}") | |
| parsed = parsed.replace("\x00", "") | |
| else: | |
| LOGGER.debug(f"parse_type - unsupported data type: {data_type}") | |
| return parsed |
This especially affects datetime, which is further processed by utils.mdb_date_to_readable():
access_parser/access_parser/utils.py
Lines 59 to 73 in 7b73391
| # https://stackoverflow.com/questions/45560782 | |
| def mdb_date_to_readable(double_time): | |
| try: | |
| dtime_bytes = struct.pack("Q", double_time) | |
| dtime_double = struct.unpack('<d', dtime_bytes)[0] | |
| dtime_frac, dtime_whole = math.modf(dtime_double) | |
| dtime = (ACCESS_EPOCH + timedelta(days=dtime_whole) + timedelta(days=dtime_frac)) | |
| if dtime == ACCESS_EPOCH: | |
| return "(Empty Date)" | |
| return str(dtime) | |
| except OverflowError: | |
| return "(Invalid Date)" | |
| except struct.error: | |
| return "(Invalid Date)" |
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels