Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2077,6 +2077,9 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
offset = False
addr_imm = None

if prev == "add\t%al,(%eax)" and "dir32" in row:
return f".dword {row.split()[-1]}", repl

# Calls

# Example lcall $0x0, $0x00
Expand Down Expand Up @@ -2201,6 +2204,65 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
def is_end_of_function(self, mnemonic: str, args: str) -> bool:
return mnemonic == "ret"

def _post_process_jump_tables(self, lines: List["Line"]) -> None:

# Handle jump tables produced by MSVC.
# MSVC jump table labels are in the format of "$L{NUM}"

# Removes extra "add\t%al,(%eax)" which correspond to an existing jumptable entry
# and adds branch arrows to the jump table entries

# This will have errors for multiple text sections as objdump (at least 2.38) doesn't emit
# the correct labels for sections that are not the first

jump_table_targets: List[str] = [] # target_label
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be unused

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it. I have the targets stored to it but they are never read from it.


labels: Dict[str, int] = {} # line number for each label

was_previous_jumptable_entry = False

for index, line in enumerate(lines[:]):
if line.original.startswith(".dword"):

orig_jump_table_target: str = line.original.split(".dword")[1].strip()

# Remove "-0x..." from older objdump relocations
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does older mean, are we talking about different compiler or objdump versions, or is it older in the sense of us not yet having processed them?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Older objdump versions such as 2.39 emit -0x... in relocations relative to the section start
image
image

The binutils that decomp.me uses is GNU objdump (GNU Binutils) 2.41.90.20240122 which does not emit the -0x....

jump_table_target = re.sub(r"-0x[0-9a-f]+", "", orig_jump_table_target)

jump_table_targets.append(jump_table_target)

# If a line begins with ".dword", it is a jumptable
was_previous_jumptable_entry = True

target_line_num: Union[int, None]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
target_line_num: Union[int, None]
target_line_num: Optional[int]

try:
target_line_num = lines[
labels[jump_table_target + ":"] + 1
].line_num
except:
target_line_num = None

if target_line_num is not None:
line.original = line.original.replace(
orig_jump_table_target, hex(target_line_num).replace("0x", "")
)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we set line.original = ".dword\t" + hex(...) instead? Search-replace always feels fragile to me


line.branch_target = target_line_num
elif was_previous_jumptable_entry:
# if the previous line was a jumptable and the current line is "add\t%al,(%eax)"
# this is the 2nd half of the previous jumptable entry
if line.original == "add\t%al,(%eax)":
lines.remove(line)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.remove(...) loops over the entire lines list and checks dataclass equality, which is slow and can have false positives. We could instead e.g. do

indices_to_delete = []
for index, ... in enumerate(lines):
    if ...:
        indices_to_delete.append(index)
for index in reversed(indices_to_delete):
    del lines[index]


was_previous_jumptable_entry = False
elif line.mnemonic == "<label>" and line.original.startswith("$L"):
# If this is a label that is an MSVC jumptable label "$L..."
# add it to the labels list
labels[line.original] = lines.index(line)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... and then this would let us use index instead of finding the line anew because lines has not yet been re-indexed


def post_process(self, lines: List["Line"]) -> None:
self._post_process_jump_tables(lines)


class AsmProcessorSH2(AsmProcessor):
def __init__(self, config: Config) -> None:
Expand Down