diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b07c819 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,42 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file LICENSE.rst or https://cmake.org/licensing for details. + +cmake_minimum_required(VERSION 4.2.0) + +# Reject any attempt to use a toolchain file. We must not use one because +# we could be downloading it here. If the CMAKE_TOOLCHAIN_FILE environment +# variable is set, the cache variable will have been initialized from it. +unset(CMAKE_TOOLCHAIN_FILE CACHE) +unset(ENV{CMAKE_TOOLCHAIN_FILE}) + +# We name the project and the target for the ExternalProject_Add() call +# to something that will highlight to the user what we are working on if +# something goes wrong and an error message is produced. + +project(libelfin-populate NONE) + + +# Pass through things we've already detected in the main project to avoid +# paying the cost of redetecting them again in ExternalProject_Add() +set(GIT_EXECUTABLE [==[/opt/homebrew/bin/git]==]) +set(Git_VERSION [==[2.52.0]==]) +set_property(GLOBAL PROPERTY _CMAKE_FindGit_GIT_EXECUTABLE_VERSION + [==[/opt/homebrew/bin/git;2.52.0]==] +) + + +include(ExternalProject) +ExternalProject_Add(libelfin-populate + "UPDATE_DISCONNECTED" "False" "GIT_REPOSITORY" "https://github.com/plasma-umass/libelfin.git" "EXTERNALPROJECT_INTERNAL_ARGUMENT_SEPARATOR" "GIT_TAG" "8bd19d1e4bb19ec1046d44e1ac1b3bb72b91d0c5" "GIT_SHALLOW" "TRUE" + SOURCE_DIR "/Users/emery/git/coz-portage/_deps/libelfin-src" + BINARY_DIR "/Users/emery/git/coz-portage/_deps/libelfin-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + USES_TERMINAL_DOWNLOAD YES + USES_TERMINAL_UPDATE YES + USES_TERMINAL_PATCH YES +) + + diff --git a/Makefile b/Makefile deleted file mode 100644 index 30c8a14..0000000 --- a/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -all: - $(MAKE) -C elf - $(MAKE) -C dwarf - -install: - $(MAKE) -C elf install - $(MAKE) -C dwarf install - -clean: - $(MAKE) -C elf clean - $(MAKE) -C dwarf clean - -check: - cd test && ./test.sh diff --git a/dwarf/.gitignore b/dwarf/.gitignore index 1049e91..3154398 100644 --- a/dwarf/.gitignore +++ b/dwarf/.gitignore @@ -1,5 +1,4 @@ *.o -to_string.cc libdwarf++.a libdwarf++.so libdwarf++.so.* diff --git a/dwarf/Makefile b/dwarf/Makefile deleted file mode 100644 index 2d47003..0000000 --- a/dwarf/Makefile +++ /dev/null @@ -1,75 +0,0 @@ -# Changed when ABI backwards compatibility is broken. -# Typically uses the major version. -SONAME = 0 - -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall -fPIC - -all: libdwarf++.a libdwarf++.so.$(SONAME) libdwarf++.so libdwarf++.pc - -SRCS := dwarf.cc cursor.cc die.cc value.cc abbrev.cc \ - expr.cc rangelist.cc line.cc attrs.cc \ - die_str_map.cc elf.cc to_string.cc -HDRS := dwarf++.hh data.hh internal.hh small_vector.hh ../elf/to_hex.hh -CLEAN := - -libdwarf++.a: $(SRCS:.cc=.o) - ar rcs $@ $^ -CLEAN += libdwarf++.a $(SRCS:.cc=.o) - -$(SRCS:.cc=.o): $(HDRS) - -to_string.cc: ../elf/enum-print.py dwarf++.hh data.hh Makefile - @echo "// Automatically generated by make at $$(date)" > to_string.cc - @echo "// DO NOT EDIT" >> to_string.cc - @echo >> to_string.cc - @echo '#include "internal.hh"' >> to_string.cc - @echo >> to_string.cc - @echo 'DWARFPP_BEGIN_NAMESPACE' >> to_string.cc - @echo >> to_string.cc - python3 ../elf/enum-print.py < dwarf++.hh >> to_string.cc - python3 ../elf/enum-print.py -s _ -u --hex -x hi_user -x lo_user < data.hh >> to_string.cc - @echo 'DWARFPP_END_NAMESPACE' >> to_string.cc -CLEAN += to_string.cc - -libdwarf++.so.$(SONAME): $(SRCS:.cc=.o) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ -CLEAN += libdwarf++.so.* - -libdwarf++.so: - ln -s $@.$(SONAME) $@ -CLEAN += libdwarf++.so - -# Create pkg-config for local library and headers. This will be -# transformed in to the correct global pkg-config by install. -libdwarf++.pc: always - @(VER=$$(git describe --match 'v*' | sed -e s/^v//); \ - echo "libdir=$$PWD"; \ - echo "includedir=$$PWD"; \ - echo ""; \ - echo "Name: libdwarf++"; \ - echo "Description: C++11 DWARF library"; \ - echo "Version: $$VER"; \ - echo "Requires: libelf++ = $$VER"; \ - echo "Libs: -L\$${libdir} -ldwarf++"; \ - echo "Cflags: -I\$${includedir}") > $@ -CLEAN += libdwarf++.pc - -.PHONY: always - -PREFIX?=/usr/local - -install: libdwarf++.a libdwarf++.so.$(SONAME) libdwarf++.so libdwarf++.pc - install -d $(DESTDIR)$(PREFIX)/lib/pkgconfig - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.a - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.so.$(SONAME) - install -t $(DESTDIR)$(PREFIX)/lib libdwarf++.so - install -d $(DESTDIR)$(PREFIX)/include/libelfin/dwarf - install -t $(DESTDIR)$(PREFIX)/include/libelfin/dwarf data.hh dwarf++.hh small_vector.hh - sed 's,^libdir=.*,libdir=$(PREFIX)/lib,;s,^includedir=.*,includedir=$(PREFIX)/include,' libdwarf++.pc \ - > $(DESTDIR)$(PREFIX)/lib/pkgconfig/libdwarf++.pc - -clean: - rm -f $(CLEAN) - -.DELETE_ON_ERROR: diff --git a/dwarf/abbrev.cc b/dwarf/abbrev.cc index f77dc0c..25c69aa 100644 --- a/dwarf/abbrev.cc +++ b/dwarf/abbrev.cc @@ -98,12 +98,37 @@ resolve_type(DW_AT name, DW_FORM form) case DW_FORM::ref_addr: case DW_FORM::ref_sig8: case DW_FORM::ref_udata: + case DW_FORM::ref_sup4: + case DW_FORM::ref_sup8: return value::type::reference; case DW_FORM::string: case DW_FORM::strp: + case DW_FORM::line_strp: + case DW_FORM::strp_sup: + case DW_FORM::strx: + case DW_FORM::strx1: + case DW_FORM::strx2: + case DW_FORM::strx3: + case DW_FORM::strx4: return value::type::string; + case DW_FORM::addrx: + case DW_FORM::addrx1: + case DW_FORM::addrx2: + case DW_FORM::addrx3: + case DW_FORM::addrx4: + return value::type::address; + + case DW_FORM::implicit_const: + return value::type::constant; + + case DW_FORM::loclistx: + return value::type::loclist; + + case DW_FORM::rnglistx: + return value::type::rangelist; + case DW_FORM::indirect: // There's nothing meaningful we can do return value::type::invalid; @@ -138,15 +163,18 @@ resolve_type(DW_AT name, DW_FORM form) return value::type::invalid; default: - throw format_error("DW_FORM_sec_offset not expected for attribute " + - to_string(name)); + // DWARF 5 has many new attributes using sec_offset + // (str_offsets_base, addr_base, rnglists_base, loclists_base, etc.) + // Just treat them as invalid for now to allow skipping + return value::type::invalid; } } throw format_error("unknown attribute form " + to_string(form)); } -attribute_spec::attribute_spec(DW_AT name, DW_FORM form) - : name(name), form(form), type(resolve_type(name, form)) +attribute_spec::attribute_spec(DW_AT name, DW_FORM form, int64_t implicit_const) + : name(name), form(form), type(resolve_type(name, form)), + implicit_const(implicit_const) { } @@ -167,7 +195,10 @@ abbrev_entry::read(cursor *cur) DW_FORM form = (DW_FORM)cur->uleb128(); if (name == (DW_AT)0 && form == (DW_FORM)0) break; - attributes.push_back(attribute_spec(name, form)); + int64_t implicit_const = 0; + if (form == DW_FORM::implicit_const) + implicit_const = cur->sleb128(); + attributes.push_back(attribute_spec(name, form, implicit_const)); } attributes.shrink_to_fit(); return true; diff --git a/dwarf/cursor.cc b/dwarf/cursor.cc index 22b28b1..ea59641 100644 --- a/dwarf/cursor.cc +++ b/dwarf/cursor.cc @@ -92,7 +92,9 @@ cursor::string(std::string &out) size_t size; const char *p = this->cstr(&size); out.resize(size); - memmove(&out.front(), p, size); + if (size > 0) { + memmove(&out.front(), p, size); + } } const char * @@ -123,6 +125,8 @@ cursor::skip_form(DW_FORM form) case DW_FORM::sec_offset: case DW_FORM::ref_addr: case DW_FORM::strp: + case DW_FORM::line_strp: + case DW_FORM::strp_sup: switch (sec->fmt) { case format::dwarf32: pos += 4; @@ -156,29 +160,49 @@ cursor::skip_form(DW_FORM form) // fixed-length forms case DW_FORM::flag_present: + case DW_FORM::implicit_const: break; case DW_FORM::flag: case DW_FORM::data1: case DW_FORM::ref1: + case DW_FORM::strx1: + case DW_FORM::addrx1: pos += 1; break; case DW_FORM::data2: case DW_FORM::ref2: + case DW_FORM::strx2: + case DW_FORM::addrx2: pos += 2; break; + case DW_FORM::strx3: + case DW_FORM::addrx3: + pos += 3; + break; case DW_FORM::data4: case DW_FORM::ref4: + case DW_FORM::ref_sup4: + case DW_FORM::strx4: + case DW_FORM::addrx4: pos += 4; break; case DW_FORM::data8: case DW_FORM::ref_sig8: + case DW_FORM::ref_sup8: pos += 8; break; + case DW_FORM::data16: + pos += 16; + break; // variable-length forms case DW_FORM::sdata: case DW_FORM::udata: case DW_FORM::ref_udata: + case DW_FORM::strx: + case DW_FORM::addrx: + case DW_FORM::loclistx: + case DW_FORM::rnglistx: while (pos < sec->end && (*(uint8_t*)pos & 0x80)) pos++; pos++; diff --git a/dwarf/data.hh b/dwarf/data.hh index e6002b7..cd32770 100644 --- a/dwarf/data.hh +++ b/dwarf/data.hh @@ -250,7 +250,27 @@ enum class DW_FORM sec_offset = 0x17, // lineptr, loclistptr, macptr, rangelistptr exprloc = 0x18, // exprloc flag_present = 0x19, // flag + + // DWARF 5 + strx = 0x1a, // string index in .debug_str_offsets + addrx = 0x1b, // address index in .debug_addr + ref_sup4 = 0x1c, // reference + strp_sup = 0x1d, // string + data16 = 0x1e, // constant + line_strp = 0x1f, // string ref_sig8 = 0x20, // reference + implicit_const = 0x21, // constant encoded in abbrev + loclistx = 0x22, // location list index + rnglistx = 0x23, // range list index + ref_sup8 = 0x24, // reference + strx1 = 0x25, // 1-byte string index + strx2 = 0x26, // 2-byte string index + strx3 = 0x27, // 3-byte string index + strx4 = 0x28, // 4-byte string index + addrx1 = 0x29, // 1-byte address index + addrx2 = 0x2a, // 2-byte address index + addrx3 = 0x2b, // 3-byte address index + addrx4 = 0x2c, // 4-byte address index }; std::string @@ -560,6 +580,37 @@ enum class DW_LNE std::string to_string(DW_LNE v); +// Line number content types (DWARF5 section 7.22 table 7.30) +enum class DW_LNCT +{ + path = 0x01, + directory_index = 0x02, + timestamp = 0x03, + size = 0x04, + MD5 = 0x05, + lo_user = 0x2000, + hi_user = 0x3fff, +}; + +std::string +to_string(DW_LNCT v); + +// Range list entry encodings (DWARF5 section 7.25) +enum class DW_RLE : ubyte +{ + end_of_list = 0x00, + base_addressx = 0x01, + startx_endx = 0x02, + startx_length = 0x03, + offset_pair = 0x04, + base_address = 0x05, + start_end = 0x06, + start_length = 0x07, +}; + +std::string +to_string(DW_RLE v); + DWARFPP_END_NAMESPACE #endif diff --git a/dwarf/die.cc b/dwarf/die.cc index a87c018..cd3f390 100644 --- a/dwarf/die.cc +++ b/dwarf/die.cc @@ -74,7 +74,7 @@ die::operator[](DW_AT attr) const int i = 0; for (auto &a : abbrev->attributes) { if (a.name == attr) - return value(cu, a.name, a.form, a.type, attrs[i]); + return value(cu, a, attrs[i]); i++; } } @@ -174,7 +174,7 @@ die::attributes() const // custom iterator. int i = 0; for (auto &a : abbrev->attributes) { - res.push_back(make_pair(a.name, value(cu, a.name, a.form, a.type, attrs[i]))); + res.push_back(make_pair(a.name, value(cu, a, attrs[i]))); i++; } return res; diff --git a/dwarf/dwarf++.hh b/dwarf/dwarf++.hh index a53f87e..0d282ef 100644 --- a/dwarf/dwarf++.hh +++ b/dwarf/dwarf++.hh @@ -38,6 +38,7 @@ class line_table; // Internal type forward-declarations struct section; struct abbrev_entry; +struct attribute_spec; struct cursor; // XXX Audit for binary-compatibility @@ -81,16 +82,20 @@ public: enum class section_type { abbrev, + addr, // DWARF 5 .debug_addr aranges, frame, info, line, + line_str, loc, macinfo, pubnames, pubtypes, ranges, + rnglists, // DWARF 5 .debug_rnglists str, + str_offsets, types, }; @@ -567,7 +572,8 @@ public: /** * Construct a value with type `type::invalid`. */ - value() : cu(nullptr), typ(type::invalid) { } + value() : cu(nullptr), form(DW_FORM::addr), typ(type::invalid), + offset(0), has_implicit_const(false), implicit_const(0) { } value(const value &o) = default; value(value &&o) = default; @@ -705,7 +711,7 @@ private: friend class die; value(const unit *cu, - DW_AT name, DW_FORM form, type typ, section_offset offset); + const attribute_spec &spec, section_offset offset); void resolve_indirect(DW_AT name); @@ -713,6 +719,8 @@ private: DW_FORM form; type typ; section_offset offset; + bool has_implicit_const; + int64_t implicit_const; }; std::string @@ -914,10 +922,11 @@ public: * the associated compilation unit. cu_low_pc is the * DW_AT::low_pc attribute of the compilation unit containing * the referring DIE or 0 (this is used as the base address of - * the range list). + * the range list). is_dwarf5 indicates whether this uses + * DWARF 5 format (DW_RLE_* encodings). */ rangelist(const std::shared_ptr
&sec, section_offset off, - unsigned cu_addr_size, taddr cu_low_pc); + unsigned cu_addr_size, taddr cu_low_pc, bool is_dwarf5 = false); /** * Construct a range list from a sequence of {low, high} @@ -966,6 +975,7 @@ private: std::vector synthetic; std::shared_ptr
sec; taddr base_addr; + bool is_dwarf5; }; /** @@ -994,14 +1004,15 @@ public: /** * \internal Construct an end iterator. */ - iterator() : sec(nullptr), base_addr(0), pos(0) { } + iterator() : sec(nullptr), base_addr(0), pos(0), is_dwarf5(false) { } /** * \internal Construct an iterator that reads rangelist data * from the beginning of the given section and starts with the - * given base address. + * given base address. is_dwarf5 indicates whether to use + * DWARF 5 format parsing (DW_RLE_* encodings). */ - iterator(const std::shared_ptr
&sec, taddr base_addr); + iterator(const std::shared_ptr
&sec, taddr base_addr, bool is_dwarf5 = false); /** Copy constructor */ iterator(const iterator &o) = default; @@ -1050,6 +1061,7 @@ private: taddr base_addr; section_offset pos; rangelist::entry entry; + bool is_dwarf5; }; ////////////////////////////////////////////////////////////////// @@ -1079,7 +1091,7 @@ public: */ line_table(const std::shared_ptr
&sec, section_offset offset, unsigned cu_addr_size, const std::string &cu_comp_dir, - const std::string &cu_name); + const std::string &cu_name, const dwarf *dw = nullptr); /** * Construct an invalid, empty line table. @@ -1122,6 +1134,7 @@ public: */ iterator end() const; + /** * Return an iterator to the line table entry containing addr * (roughly, the entry with the highest address less than or @@ -1264,7 +1277,7 @@ public: * for all fields. is_stmt has no default value, so the * caller must provide it. */ - void reset(bool is_stmt); + void reset(bool is_stmt, unsigned default_file_index); /** * Return a descriptive string of the form diff --git a/dwarf/dwarf.cc b/dwarf/dwarf.cc index 09d6fb0..2ca56c6 100644 --- a/dwarf/dwarf.cc +++ b/dwarf/dwarf.cc @@ -122,7 +122,38 @@ dwarf::get_section(section_type type) const if (!data) throw format_error(std::string(elf::section_type_to_name(type)) + " section missing"); - m->sections[type] = std::make_shared
(section_type::str, data, size, m->sec_info->ord); + + // Determine format for auxiliary sections. + // DWARF 5 sections like str_offsets and addr have headers with initial length. + // Simple sections like str and line_str are just raw string data. + format fmt = format::unknown; + if ((type == section_type::str_offsets || type == section_type::addr) && size >= 4) { + // .debug_str_offsets and .debug_addr have headers starting with initial length + uint32_t initial_length = *reinterpret_cast(data); + if (initial_length == 0xffffffff) { + fmt = format::dwarf64; + } else { + fmt = format::dwarf32; + } + } else if (type == section_type::str || type == section_type::line_str) { + // String sections don't need format - they're just null-terminated strings. + // Use dwarf32 as default since cursor operations don't use format for strings. + fmt = format::dwarf32; + } else { + // For other sections, try to detect format from initial length if present + if (size >= 4) { + uint32_t initial_length = *reinterpret_cast(data); + if (initial_length == 0xffffffff) { + fmt = format::dwarf64; + } else if (initial_length < 0xfffffff0) { + fmt = format::dwarf32; + } + // If initial_length is a reserved value, leave format unknown + } + } + + m->sections[type] = std::make_shared
(type, data, size, + m->sec_info->ord, fmt); return m->sections[type]; } @@ -314,7 +345,7 @@ compilation_unit::get_line_table() const m->lt = line_table(sec, d[DW_AT::stmt_list].as_sec_offset(), m->subsec->addr_size, comp_dir, - at_name(d)); + at_name(d), &m->file); } done: return m->lt; diff --git a/dwarf/elf.cc b/dwarf/elf.cc index baf8e67..1ed0134 100644 --- a/dwarf/elf.cc +++ b/dwarf/elf.cc @@ -15,18 +15,22 @@ static const struct const char *name; section_type type; } sections[] = { - {".debug_abbrev", section_type::abbrev}, - {".debug_aranges", section_type::aranges}, - {".debug_frame", section_type::frame}, - {".debug_info", section_type::info}, - {".debug_line", section_type::line}, - {".debug_loc", section_type::loc}, - {".debug_macinfo", section_type::macinfo}, - {".debug_pubnames", section_type::pubnames}, - {".debug_pubtypes", section_type::pubtypes}, - {".debug_ranges", section_type::ranges}, - {".debug_str", section_type::str}, - {".debug_types", section_type::types}, + {".debug_abbrev", section_type::abbrev}, + {".debug_addr", section_type::addr}, + {".debug_aranges", section_type::aranges}, + {".debug_frame", section_type::frame}, + {".debug_info", section_type::info}, + {".debug_line", section_type::line}, + {".debug_line_str", section_type::line_str}, + {".debug_loc", section_type::loc}, + {".debug_macinfo", section_type::macinfo}, + {".debug_pubnames", section_type::pubnames}, + {".debug_pubtypes", section_type::pubtypes}, + {".debug_ranges", section_type::ranges}, + {".debug_rnglists", section_type::rnglists}, + {".debug_str", section_type::str}, + {".debug_str_offsets", section_type::str_offsets}, + {".debug_types", section_type::types}, }; bool diff --git a/dwarf/internal.hh b/dwarf/internal.hh index 42679ca..ac30c34 100644 --- a/dwarf/internal.hh +++ b/dwarf/internal.hh @@ -230,7 +230,10 @@ struct attribute_spec // Computed information value::type type; - attribute_spec(DW_AT name, DW_FORM form); + // For DW_FORM_implicit_const, stores the SLEB128 constant. + int64_t implicit_const; + + attribute_spec(DW_AT name, DW_FORM form, int64_t implicit_const = 0); }; typedef std::uint64_t abbrev_code; diff --git a/dwarf/line.cc b/dwarf/line.cc index d3cbc4e..e66cf1c 100644 --- a/dwarf/line.cc +++ b/dwarf/line.cc @@ -24,9 +24,19 @@ static const int opcode_lengths[] = { struct line_table::impl { + struct entry_format { + DW_LNCT content; + DW_FORM form; + }; + shared_ptr
sec; + const dwarf *dw; + shared_ptr
line_str_sec; + shared_ptr
str_sec; + string comp_dir; // Header information + uhalf version; section_offset program_offset; ubyte minimum_instruction_length; ubyte maximum_operations_per_instruction; @@ -34,9 +44,11 @@ struct line_table::impl sbyte line_base; ubyte line_range; ubyte opcode_base; + unsigned file_index_base; vector standard_opcode_lengths; vector include_directories; vector file_names; + vector file_entry_formats; // The offset in sec following the last read file name entry. // File name entries can appear both in the line table header @@ -49,42 +61,63 @@ struct line_table::impl // know we've gathered all file names. bool file_names_complete; - impl() : last_file_name_end(0), file_names_complete(false) {}; + impl() : dw(nullptr), version(0), file_index_base(1), + last_file_name_end(0), + file_names_complete(false) {}; bool read_file_entry(cursor *cur, bool in_header); + void add_include_directory(const string &dir); + void add_file_entry(string file_name, uint64_t dir_index, + uint64_t mtime, uint64_t length); + vector read_entry_formats(cursor *cur); + void read_v5_directory_table(cursor *cur); + void read_v5_file_table(cursor *cur); + void read_file_entry_v5(cursor *cur); + string read_form_string(cursor *cur, DW_FORM form); + uint64_t read_form_unsigned(cursor *cur, DW_FORM form); + string read_string_from_section(section_type type, section_offset off); }; line_table::line_table(const shared_ptr
&sec, section_offset offset, unsigned cu_addr_size, const string &cu_comp_dir, - const string &cu_name) + const string &cu_name, const dwarf *dw) : m(make_shared()) { - // XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir + m->dw = dw; + // XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir string comp_dir, abs_path; if (cu_comp_dir.empty() || cu_comp_dir.back() == '/') comp_dir = cu_comp_dir; else comp_dir = cu_comp_dir + '/'; + m->comp_dir = comp_dir; // Read the line table header (DWARF2 section 6.2.4, DWARF3 - // section 6.2.4, DWARF4 section 6.2.3) + // section 6.2.4, DWARF4 section 6.2.3, DWARF5 section 6.2.4) cursor cur(sec, offset); m->sec = cur.subsection(); cur = cursor(m->sec); cur.skip_initial_length(); - m->sec->addr_size = cu_addr_size; // Basic header information - uhalf version = cur.fixed(); - if (version < 2 || version > 4) + m->version = cur.fixed(); + if (m->version < 2 || m->version > 5) throw format_error("unknown line number table version " + - std::to_string(version)); + std::to_string(m->version)); + if (m->version >= 5) { + m->sec->addr_size = cur.fixed(); + ubyte segment_selector_size = cur.fixed(); + (void)segment_selector_size; + } else { + m->sec->addr_size = cu_addr_size; + } + m->file_index_base = (m->version >= 5) ? 0 : 1; section_length header_length = cur.offset(); m->program_offset = cur.get_section_offset() + header_length; m->minimum_instruction_length = cur.fixed(); m->maximum_operations_per_instruction = 1; - if (version >= 4) + if (m->version >= 4) m->maximum_operations_per_instruction = cur.fixed(); if (m->maximum_operations_per_instruction == 0) throw format_error("maximum_operations_per_instruction cannot" @@ -95,7 +128,7 @@ line_table::line_table(const shared_ptr
&sec, section_offset offset, if (m->line_range == 0) throw format_error("line_range cannot be 0 in line number table"); m->opcode_base = cur.fixed(); - + static_assert(sizeof(opcode_lengths) / sizeof(opcode_lengths[0]) == 13, "opcode_lengths table has wrong length"); @@ -118,31 +151,45 @@ line_table::line_table(const shared_ptr
&sec, section_offset offset, } // Include directories list - string incdir; - // Include directory 0 is implicitly the compilation unit - // current directory - m->include_directories.push_back(comp_dir); - while (true) { - cur.string(incdir); - if (incdir.empty()) - break; - if (incdir.back() != '/') - incdir += '/'; - if (incdir[0] == '/') - m->include_directories.push_back(move(incdir)); - else - m->include_directories.push_back(comp_dir + incdir); + m->include_directories.clear(); + if (m->version < 5) + m->include_directories.push_back(m->comp_dir); + if (m->version >= 5) { + m->read_v5_directory_table(&cur); + } else { + string incdir; + while (true) { + cur.string(incdir); + if (incdir.empty()) + break; + if (incdir.back() != '/') + incdir += '/'; + if (incdir[0] == '/') + m->include_directories.push_back(move(incdir)); + else + m->include_directories.push_back(comp_dir + incdir); + } } // File name list string file_name; - // File name 0 is implicitly the compilation unit file name. - // cu_name can be relative to comp_dir or absolute. - if (!cu_name.empty() && cu_name[0] == '/') - m->file_names.emplace_back(cu_name); - else - m->file_names.emplace_back(comp_dir + cu_name); - while (m->read_file_entry(&cur, true)); + if (m->version >= 5) { + m->read_v5_file_table(&cur); + if (m->file_names.empty()) { + if (!cu_name.empty() && cu_name[0] == '/') + m->file_names.emplace_back(cu_name); + else + m->file_names.emplace_back(comp_dir + cu_name); + } + } else { + // File name 0 is implicitly the compilation unit file name. + // cu_name can be relative to comp_dir or absolute. + if (!cu_name.empty() && cu_name[0] == '/') + m->file_names.emplace_back(cu_name); + else + m->file_names.emplace_back(comp_dir + cu_name); + while (m->read_file_entry(&cur, true)); + } } line_table::iterator @@ -204,6 +251,11 @@ line_table::impl::read_file_entry(cursor *cur, bool in_header) { assert(cur->sec == sec); + if (version >= 5) { + read_file_entry_v5(cur); + return true; + } + string file_name; cur->string(file_name); if (in_header && file_name.empty()) @@ -217,17 +269,227 @@ line_table::impl::read_file_entry(cursor *cur, bool in_header) return true; last_file_name_end = cur->get_section_offset(); - if (file_name[0] == '/') + if (file_name.empty()) + return false; + + add_file_entry(move(file_name), dir_index, mtime, length); + + return true; +} + +void +line_table::impl::add_include_directory(const string &dir) +{ + string resolved = dir; + if (!resolved.empty() && resolved.back() != '/') + resolved += '/'; + if (!resolved.empty() && resolved[0] != '/' && !comp_dir.empty()) + resolved = comp_dir + resolved; + if (resolved.empty()) + resolved = comp_dir; + include_directories.push_back(move(resolved)); +} + +void +line_table::impl::add_file_entry(string file_name, uint64_t dir_index, + uint64_t mtime, uint64_t length) +{ + if (file_name.empty()) + throw format_error("file entry missing file name"); + if (file_name[0] == '/') { file_names.emplace_back(move(file_name), mtime, length); - else if (dir_index < include_directories.size()) - file_names.emplace_back( - include_directories[dir_index] + file_name, - mtime, length); - else + return; + } + + const string *base = nullptr; + if (dir_index < include_directories.size()) + base = &include_directories[dir_index]; + else if (dir_index == 0 && version < 5 && !comp_dir.empty()) + base = &comp_dir; + if (!base) throw format_error("file name directory index out of range: " + std::to_string(dir_index)); + file_names.emplace_back(*base + file_name, mtime, length); +} - return true; +vector +line_table::impl::read_entry_formats(cursor *cur) +{ + vector formats; + uint64_t count = cur->uleb128(); + formats.reserve(count); + for (uint64_t i = 0; i < count; ++i) { + entry_format fmt; + fmt.content = (DW_LNCT)cur->uleb128(); + fmt.form = (DW_FORM)cur->uleb128(); + formats.push_back(fmt); + } + return formats; +} + +void +line_table::impl::read_v5_directory_table(cursor *cur) +{ + auto formats = read_entry_formats(cur); + uint64_t count = cur->uleb128(); + for (uint64_t i = 0; i < count; ++i) { + string path; + for (auto &fmt : formats) { + switch (fmt.content) { + case DW_LNCT::path: + path = read_form_string(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + add_include_directory(path); + } +} + +void +line_table::impl::read_v5_file_table(cursor *cur) +{ + file_entry_formats = read_entry_formats(cur); + uint64_t count = cur->uleb128(); + for (uint64_t i = 0; i < count; ++i) { + string file_name; + uint64_t dir_index = 0; + uint64_t mtime = 0; + uint64_t length = 0; + for (auto &fmt : file_entry_formats) { + switch (fmt.content) { + case DW_LNCT::path: + file_name = read_form_string(cur, fmt.form); + break; + case DW_LNCT::directory_index: + dir_index = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::timestamp: + mtime = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::size: + length = read_form_unsigned(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + if (!file_name.empty()) + add_file_entry(move(file_name), dir_index, mtime, length); + } +} + +void +line_table::impl::read_file_entry_v5(cursor *cur) +{ + if (file_entry_formats.empty()) + throw format_error("line table missing file name entry formats"); + + string file_name; + uint64_t dir_index = 0; + uint64_t mtime = 0; + uint64_t length = 0; + for (auto &fmt : file_entry_formats) { + switch (fmt.content) { + case DW_LNCT::path: + file_name = read_form_string(cur, fmt.form); + break; + case DW_LNCT::directory_index: + dir_index = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::timestamp: + mtime = read_form_unsigned(cur, fmt.form); + break; + case DW_LNCT::size: + length = read_form_unsigned(cur, fmt.form); + break; + default: + cur->skip_form(fmt.form); + break; + } + } + + section_offset entry_end = cur->get_section_offset(); + if (entry_end <= last_file_name_end) + return; + last_file_name_end = entry_end; + + if (!file_name.empty()) + add_file_entry(move(file_name), dir_index, mtime, length); +} + +string +line_table::impl::read_form_string(cursor *cur, DW_FORM form) +{ + switch (form) { + case DW_FORM::string: { + string res; + cur->string(res); + return res; + } + case DW_FORM::line_strp: + return read_string_from_section(section_type::line_str, + cur->offset()); + case DW_FORM::strp: + return read_string_from_section(section_type::str, + cur->offset()); + default: + throw format_error("unsupported string form in line table: " + + to_string(form)); + } +} + +uint64_t +line_table::impl::read_form_unsigned(cursor *cur, DW_FORM form) +{ + switch (form) { + case DW_FORM::data1: + return cur->fixed(); + case DW_FORM::data2: + return cur->fixed(); + case DW_FORM::data4: + return cur->fixed(); + case DW_FORM::data8: + return cur->fixed(); + case DW_FORM::udata: + return cur->uleb128(); + case DW_FORM::sdata: + return (uint64_t)cur->sleb128(); + default: + throw format_error("unsupported numeric form in line table: " + + to_string(form)); + } +} + +string +line_table::impl::read_string_from_section(section_type type, + section_offset off) +{ + shared_ptr
*cache = nullptr; + switch (type) { + case section_type::line_str: + cache = &line_str_sec; + break; + case section_type::str: + cache = &str_sec; + break; + default: + throw format_error("unsupported string section"); + } + + if (!cache->get()) { + if (!dw) + throw format_error("line table requires DWARF context to read strings"); + *cache = dw->get_section(type); + } + + cursor scur(*cache, off); + string res; + scur.string(res); + return res; } line_table::file::file(string path, uint64_t mtime, uint64_t length) @@ -236,11 +498,12 @@ line_table::file::file(string path, uint64_t mtime, uint64_t length) } void -line_table::entry::reset(bool is_stmt) +line_table::entry::reset(bool is_stmt, unsigned default_file_index) { address = op_index = 0; file = nullptr; - file_index = line = 1; + file_index = default_file_index; + line = 1; column = 0; this->is_stmt = is_stmt; basic_block = end_sequence = prologue_end = epilogue_begin = false; @@ -263,7 +526,7 @@ line_table::iterator::iterator(const line_table *table, section_offset pos) : table(table), pos(pos) { if (table) { - regs.reset(table->m->default_is_stmt); + regs.reset(table->m->default_is_stmt, table->m->file_index_base); ++(*this); } } @@ -403,8 +666,8 @@ line_table::iterator::step(cursor *cur) case DW_LNE::end_sequence: regs.end_sequence = true; entry = regs; - regs.reset(m->default_is_stmt); - break; + regs.reset(m->default_is_stmt, m->file_index_base); + break; case DW_LNE::set_address: regs.address = cur->address(); regs.op_index = 0; diff --git a/dwarf/rangelist.cc b/dwarf/rangelist.cc index f9eb8e9..9853b1f 100644 --- a/dwarf/rangelist.cc +++ b/dwarf/rangelist.cc @@ -9,13 +9,15 @@ using namespace std; DWARFPP_BEGIN_NAMESPACE rangelist::rangelist(const std::shared_ptr
&sec, section_offset off, - unsigned cu_addr_size, taddr cu_low_pc) + unsigned cu_addr_size, taddr cu_low_pc, bool is_dwarf5) : sec(sec->slice(off, ~0, format::unknown, cu_addr_size)), - base_addr(cu_low_pc) + base_addr(cu_low_pc), + is_dwarf5(is_dwarf5) { } rangelist::rangelist(const initializer_list > &ranges) + : is_dwarf5(false) { synthetic.reserve(ranges.size() * 2 + 2); for (auto &range : ranges) { @@ -37,7 +39,7 @@ rangelist::iterator rangelist::begin() const { if (sec) - return iterator(sec, base_addr); + return iterator(sec, base_addr, is_dwarf5); return end(); } @@ -56,8 +58,8 @@ rangelist::contains(taddr addr) const return false; } -rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_addr) - : sec(sec), base_addr(base_addr), pos(0) +rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_addr, bool is_dwarf5) + : sec(sec), base_addr(base_addr), pos(0), is_dwarf5(is_dwarf5) { // Read in the first entry ++(*this); @@ -66,34 +68,103 @@ rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_ad rangelist::iterator & rangelist::iterator::operator++() { - // DWARF4 section 2.17.3 - taddr largest_offset = ~(taddr)0; - if (sec->addr_size < sizeof(taddr)) - largest_offset += 1 << (8 * sec->addr_size); - - // Read in entries until we reach a regular entry of an - // end-of-list. Note that pos points to the beginning of the - // entry *following* the current entry, so that's where we - // start. cursor cur(sec, pos); - while (true) { - entry.low = cur.address(); - entry.high = cur.address(); - - if (entry.low == 0 && entry.high == 0) { - // End of list - sec.reset(); - pos = 0; - break; - } else if (entry.low == largest_offset) { - // Base address change - base_addr = entry.high; - } else { - // Regular entry. Adjust by base address. - entry.low += base_addr; - entry.high += base_addr; - pos = cur.get_section_offset(); - break; + + if (is_dwarf5) { + // DWARF 5 range list entries (Section 2.17.3) + while (true) { + if (cur.end()) { + sec.reset(); + pos = 0; + return *this; + } + + DW_RLE rle = (DW_RLE)cur.fixed(); + + switch (rle) { + case DW_RLE::end_of_list: + sec.reset(); + pos = 0; + return *this; + + case DW_RLE::base_addressx: + // Index into .debug_addr - for now, skip this + cur.uleb128(); + break; + + case DW_RLE::startx_endx: + // Both start and end are indices into .debug_addr + cur.uleb128(); + cur.uleb128(); + // Skip for now - would need .debug_addr lookup + break; + + case DW_RLE::startx_length: + // Start is index, length is ULEB128 + cur.uleb128(); + cur.uleb128(); + // Skip for now - would need .debug_addr lookup + break; + + case DW_RLE::offset_pair: + // Two ULEB128 offsets from base address + entry.low = base_addr + cur.uleb128(); + entry.high = base_addr + cur.uleb128(); + pos = cur.get_section_offset(); + return *this; + + case DW_RLE::base_address: + // New base address (full address) + base_addr = cur.address(); + break; + + case DW_RLE::start_end: + // Two full addresses + entry.low = cur.address(); + entry.high = cur.address(); + pos = cur.get_section_offset(); + return *this; + + case DW_RLE::start_length: + // Full address + ULEB128 length + entry.low = cur.address(); + entry.high = entry.low + cur.uleb128(); + pos = cur.get_section_offset(); + return *this; + + default: + throw format_error("unknown DW_RLE encoding " + to_string(rle)); + } + } + } else { + // DWARF 4 section 2.17.3 + taddr largest_offset = ~(taddr)0; + if (sec->addr_size < sizeof(taddr)) + largest_offset += 1 << (8 * sec->addr_size); + + // Read in entries until we reach a regular entry or an + // end-of-list. Note that pos points to the beginning of the + // entry *following* the current entry, so that's where we + // start. + while (true) { + entry.low = cur.address(); + entry.high = cur.address(); + + if (entry.low == 0 && entry.high == 0) { + // End of list + sec.reset(); + pos = 0; + break; + } else if (entry.low == largest_offset) { + // Base address change + base_addr = entry.high; + } else { + // Regular entry. Adjust by base address. + entry.low += base_addr; + entry.high += base_addr; + pos = cur.get_section_offset(); + break; + } } } diff --git a/dwarf/to_string.cc b/dwarf/to_string.cc new file mode 100644 index 0000000..cd9c6c3 --- /dev/null +++ b/dwarf/to_string.cc @@ -0,0 +1,594 @@ +// Automatically generated by make at Sat Nov 29 03:46:33 PM EST 2025 +// DO NOT EDIT + +#include "internal.hh" + +DWARFPP_BEGIN_NAMESPACE + +std::string +to_string(section_type v) +{ + switch (v) { + case section_type::abbrev: return "section_type::abbrev"; + case section_type::addr: return "section_type::addr"; + case section_type::aranges: return "section_type::aranges"; + case section_type::frame: return "section_type::frame"; + case section_type::info: return "section_type::info"; + case section_type::line: return "section_type::line"; + case section_type::line_str: return "section_type::line_str"; + case section_type::loc: return "section_type::loc"; + case section_type::macinfo: return "section_type::macinfo"; + case section_type::pubnames: return "section_type::pubnames"; + case section_type::pubtypes: return "section_type::pubtypes"; + case section_type::ranges: return "section_type::ranges"; + case section_type::str: return "section_type::str"; + case section_type::str_offsets: return "section_type::str_offsets"; + case section_type::types: return "section_type::types"; + } + return "(section_type)" + std::to_string((int)v); +} + +std::string +to_string(value::type v) +{ + switch (v) { + case value::type::invalid: return "value::type::invalid"; + case value::type::address: return "value::type::address"; + case value::type::block: return "value::type::block"; + case value::type::constant: return "value::type::constant"; + case value::type::uconstant: return "value::type::uconstant"; + case value::type::sconstant: return "value::type::sconstant"; + case value::type::exprloc: return "value::type::exprloc"; + case value::type::flag: return "value::type::flag"; + case value::type::line: return "value::type::line"; + case value::type::loclist: return "value::type::loclist"; + case value::type::mac: return "value::type::mac"; + case value::type::rangelist: return "value::type::rangelist"; + case value::type::reference: return "value::type::reference"; + case value::type::string: return "value::type::string"; + } + return "(value::type)" + std::to_string((int)v); +} + +std::string +to_string(expr_result::type v) +{ + switch (v) { + case expr_result::type::address: return "expr_result::type::address"; + case expr_result::type::reg: return "expr_result::type::reg"; + case expr_result::type::literal: return "expr_result::type::literal"; + case expr_result::type::implicit: return "expr_result::type::implicit"; + case expr_result::type::empty: return "expr_result::type::empty"; + } + return "(expr_result::type)" + std::to_string((int)v); +} + +std::string +to_string(DW_TAG v) +{ + switch (v) { + case DW_TAG::array_type: return "DW_TAG_array_type"; + case DW_TAG::class_type: return "DW_TAG_class_type"; + case DW_TAG::entry_point: return "DW_TAG_entry_point"; + case DW_TAG::enumeration_type: return "DW_TAG_enumeration_type"; + case DW_TAG::formal_parameter: return "DW_TAG_formal_parameter"; + case DW_TAG::imported_declaration: return "DW_TAG_imported_declaration"; + case DW_TAG::label: return "DW_TAG_label"; + case DW_TAG::lexical_block: return "DW_TAG_lexical_block"; + case DW_TAG::member: return "DW_TAG_member"; + case DW_TAG::pointer_type: return "DW_TAG_pointer_type"; + case DW_TAG::reference_type: return "DW_TAG_reference_type"; + case DW_TAG::compile_unit: return "DW_TAG_compile_unit"; + case DW_TAG::string_type: return "DW_TAG_string_type"; + case DW_TAG::structure_type: return "DW_TAG_structure_type"; + case DW_TAG::subroutine_type: return "DW_TAG_subroutine_type"; + case DW_TAG::typedef_: return "DW_TAG_typedef"; + case DW_TAG::union_type: return "DW_TAG_union_type"; + case DW_TAG::unspecified_parameters: return "DW_TAG_unspecified_parameters"; + case DW_TAG::variant: return "DW_TAG_variant"; + case DW_TAG::common_block: return "DW_TAG_common_block"; + case DW_TAG::common_inclusion: return "DW_TAG_common_inclusion"; + case DW_TAG::inheritance: return "DW_TAG_inheritance"; + case DW_TAG::inlined_subroutine: return "DW_TAG_inlined_subroutine"; + case DW_TAG::module: return "DW_TAG_module"; + case DW_TAG::ptr_to_member_type: return "DW_TAG_ptr_to_member_type"; + case DW_TAG::set_type: return "DW_TAG_set_type"; + case DW_TAG::subrange_type: return "DW_TAG_subrange_type"; + case DW_TAG::with_stmt: return "DW_TAG_with_stmt"; + case DW_TAG::access_declaration: return "DW_TAG_access_declaration"; + case DW_TAG::base_type: return "DW_TAG_base_type"; + case DW_TAG::catch_block: return "DW_TAG_catch_block"; + case DW_TAG::const_type: return "DW_TAG_const_type"; + case DW_TAG::constant: return "DW_TAG_constant"; + case DW_TAG::enumerator: return "DW_TAG_enumerator"; + case DW_TAG::file_type: return "DW_TAG_file_type"; + case DW_TAG::friend_: return "DW_TAG_friend"; + case DW_TAG::namelist: return "DW_TAG_namelist"; + case DW_TAG::namelist_item: return "DW_TAG_namelist_item"; + case DW_TAG::packed_type: return "DW_TAG_packed_type"; + case DW_TAG::subprogram: return "DW_TAG_subprogram"; + case DW_TAG::template_type_parameter: return "DW_TAG_template_type_parameter"; + case DW_TAG::template_value_parameter: return "DW_TAG_template_value_parameter"; + case DW_TAG::thrown_type: return "DW_TAG_thrown_type"; + case DW_TAG::try_block: return "DW_TAG_try_block"; + case DW_TAG::variant_part: return "DW_TAG_variant_part"; + case DW_TAG::variable: return "DW_TAG_variable"; + case DW_TAG::volatile_type: return "DW_TAG_volatile_type"; + case DW_TAG::dwarf_procedure: return "DW_TAG_dwarf_procedure"; + case DW_TAG::restrict_type: return "DW_TAG_restrict_type"; + case DW_TAG::interface_type: return "DW_TAG_interface_type"; + case DW_TAG::namespace_: return "DW_TAG_namespace"; + case DW_TAG::imported_module: return "DW_TAG_imported_module"; + case DW_TAG::unspecified_type: return "DW_TAG_unspecified_type"; + case DW_TAG::partial_unit: return "DW_TAG_partial_unit"; + case DW_TAG::imported_unit: return "DW_TAG_imported_unit"; + case DW_TAG::condition: return "DW_TAG_condition"; + case DW_TAG::shared_type: return "DW_TAG_shared_type"; + case DW_TAG::type_unit: return "DW_TAG_type_unit"; + case DW_TAG::rvalue_reference_type: return "DW_TAG_rvalue_reference_type"; + case DW_TAG::template_alias: return "DW_TAG_template_alias"; + case DW_TAG::lo_user: break; + case DW_TAG::hi_user: break; + } + return "(DW_TAG)0x" + to_hex((int)v); +} + +std::string +to_string(DW_CHILDREN v) +{ + switch (v) { + case DW_CHILDREN::no: return "DW_CHILDREN_no"; + case DW_CHILDREN::yes: return "DW_CHILDREN_yes"; + } + return "(DW_CHILDREN)0x" + to_hex((int)v); +} + +std::string +to_string(DW_AT v) +{ + switch (v) { + case DW_AT::sibling: return "DW_AT_sibling"; + case DW_AT::location: return "DW_AT_location"; + case DW_AT::name: return "DW_AT_name"; + case DW_AT::ordering: return "DW_AT_ordering"; + case DW_AT::byte_size: return "DW_AT_byte_size"; + case DW_AT::bit_offset: return "DW_AT_bit_offset"; + case DW_AT::bit_size: return "DW_AT_bit_size"; + case DW_AT::stmt_list: return "DW_AT_stmt_list"; + case DW_AT::low_pc: return "DW_AT_low_pc"; + case DW_AT::high_pc: return "DW_AT_high_pc"; + case DW_AT::language: return "DW_AT_language"; + case DW_AT::discr: return "DW_AT_discr"; + case DW_AT::discr_value: return "DW_AT_discr_value"; + case DW_AT::visibility: return "DW_AT_visibility"; + case DW_AT::import: return "DW_AT_import"; + case DW_AT::string_length: return "DW_AT_string_length"; + case DW_AT::common_reference: return "DW_AT_common_reference"; + case DW_AT::comp_dir: return "DW_AT_comp_dir"; + case DW_AT::const_value: return "DW_AT_const_value"; + case DW_AT::containing_type: return "DW_AT_containing_type"; + case DW_AT::default_value: return "DW_AT_default_value"; + case DW_AT::inline_: return "DW_AT_inline"; + case DW_AT::is_optional: return "DW_AT_is_optional"; + case DW_AT::lower_bound: return "DW_AT_lower_bound"; + case DW_AT::producer: return "DW_AT_producer"; + case DW_AT::prototyped: return "DW_AT_prototyped"; + case DW_AT::return_addr: return "DW_AT_return_addr"; + case DW_AT::start_scope: return "DW_AT_start_scope"; + case DW_AT::bit_stride: return "DW_AT_bit_stride"; + case DW_AT::upper_bound: return "DW_AT_upper_bound"; + case DW_AT::abstract_origin: return "DW_AT_abstract_origin"; + case DW_AT::accessibility: return "DW_AT_accessibility"; + case DW_AT::address_class: return "DW_AT_address_class"; + case DW_AT::artificial: return "DW_AT_artificial"; + case DW_AT::base_types: return "DW_AT_base_types"; + case DW_AT::calling_convention: return "DW_AT_calling_convention"; + case DW_AT::count: return "DW_AT_count"; + case DW_AT::data_member_location: return "DW_AT_data_member_location"; + case DW_AT::decl_column: return "DW_AT_decl_column"; + case DW_AT::decl_file: return "DW_AT_decl_file"; + case DW_AT::decl_line: return "DW_AT_decl_line"; + case DW_AT::declaration: return "DW_AT_declaration"; + case DW_AT::discr_list: return "DW_AT_discr_list"; + case DW_AT::encoding: return "DW_AT_encoding"; + case DW_AT::external: return "DW_AT_external"; + case DW_AT::frame_base: return "DW_AT_frame_base"; + case DW_AT::friend_: return "DW_AT_friend"; + case DW_AT::identifier_case: return "DW_AT_identifier_case"; + case DW_AT::macro_info: return "DW_AT_macro_info"; + case DW_AT::namelist_item: return "DW_AT_namelist_item"; + case DW_AT::priority: return "DW_AT_priority"; + case DW_AT::segment: return "DW_AT_segment"; + case DW_AT::specification: return "DW_AT_specification"; + case DW_AT::static_link: return "DW_AT_static_link"; + case DW_AT::type: return "DW_AT_type"; + case DW_AT::use_location: return "DW_AT_use_location"; + case DW_AT::variable_parameter: return "DW_AT_variable_parameter"; + case DW_AT::virtuality: return "DW_AT_virtuality"; + case DW_AT::vtable_elem_location: return "DW_AT_vtable_elem_location"; + case DW_AT::allocated: return "DW_AT_allocated"; + case DW_AT::associated: return "DW_AT_associated"; + case DW_AT::data_location: return "DW_AT_data_location"; + case DW_AT::byte_stride: return "DW_AT_byte_stride"; + case DW_AT::entry_pc: return "DW_AT_entry_pc"; + case DW_AT::use_UTF8: return "DW_AT_use_UTF8"; + case DW_AT::extension: return "DW_AT_extension"; + case DW_AT::ranges: return "DW_AT_ranges"; + case DW_AT::trampoline: return "DW_AT_trampoline"; + case DW_AT::call_column: return "DW_AT_call_column"; + case DW_AT::call_file: return "DW_AT_call_file"; + case DW_AT::call_line: return "DW_AT_call_line"; + case DW_AT::description: return "DW_AT_description"; + case DW_AT::binary_scale: return "DW_AT_binary_scale"; + case DW_AT::decimal_scale: return "DW_AT_decimal_scale"; + case DW_AT::small: return "DW_AT_small"; + case DW_AT::decimal_sign: return "DW_AT_decimal_sign"; + case DW_AT::digit_count: return "DW_AT_digit_count"; + case DW_AT::picture_string: return "DW_AT_picture_string"; + case DW_AT::mutable_: return "DW_AT_mutable"; + case DW_AT::threads_scaled: return "DW_AT_threads_scaled"; + case DW_AT::explicit_: return "DW_AT_explicit"; + case DW_AT::object_pointer: return "DW_AT_object_pointer"; + case DW_AT::endianity: return "DW_AT_endianity"; + case DW_AT::elemental: return "DW_AT_elemental"; + case DW_AT::pure: return "DW_AT_pure"; + case DW_AT::recursive: return "DW_AT_recursive"; + case DW_AT::signature: return "DW_AT_signature"; + case DW_AT::main_subprogram: return "DW_AT_main_subprogram"; + case DW_AT::data_bit_offset: return "DW_AT_data_bit_offset"; + case DW_AT::const_expr: return "DW_AT_const_expr"; + case DW_AT::enum_class: return "DW_AT_enum_class"; + case DW_AT::linkage_name: return "DW_AT_linkage_name"; + case DW_AT::lo_user: break; + case DW_AT::hi_user: break; + } + return "(DW_AT)0x" + to_hex((int)v); +} + +std::string +to_string(DW_FORM v) +{ + switch (v) { + case DW_FORM::addr: return "DW_FORM_addr"; + case DW_FORM::block2: return "DW_FORM_block2"; + case DW_FORM::block4: return "DW_FORM_block4"; + case DW_FORM::data2: return "DW_FORM_data2"; + case DW_FORM::data4: return "DW_FORM_data4"; + case DW_FORM::data8: return "DW_FORM_data8"; + case DW_FORM::string: return "DW_FORM_string"; + case DW_FORM::block: return "DW_FORM_block"; + case DW_FORM::block1: return "DW_FORM_block1"; + case DW_FORM::data1: return "DW_FORM_data1"; + case DW_FORM::flag: return "DW_FORM_flag"; + case DW_FORM::sdata: return "DW_FORM_sdata"; + case DW_FORM::strp: return "DW_FORM_strp"; + case DW_FORM::udata: return "DW_FORM_udata"; + case DW_FORM::ref_addr: return "DW_FORM_ref_addr"; + case DW_FORM::ref1: return "DW_FORM_ref1"; + case DW_FORM::ref2: return "DW_FORM_ref2"; + case DW_FORM::ref4: return "DW_FORM_ref4"; + case DW_FORM::ref8: return "DW_FORM_ref8"; + case DW_FORM::ref_udata: return "DW_FORM_ref_udata"; + case DW_FORM::indirect: return "DW_FORM_indirect"; + case DW_FORM::sec_offset: return "DW_FORM_sec_offset"; + case DW_FORM::exprloc: return "DW_FORM_exprloc"; + case DW_FORM::flag_present: return "DW_FORM_flag_present"; + case DW_FORM::line_strp: return "DW_FORM_line_strp"; + case DW_FORM::implicit_const: return "DW_FORM_implicit_const"; + case DW_FORM::ref_sig8: return "DW_FORM_ref_sig8"; + } + return "(DW_FORM)0x" + to_hex((int)v); +} + +std::string +to_string(DW_OP v) +{ + switch (v) { + case DW_OP::addr: return "DW_OP_addr"; + case DW_OP::deref: return "DW_OP_deref"; + case DW_OP::const1u: return "DW_OP_const1u"; + case DW_OP::const1s: return "DW_OP_const1s"; + case DW_OP::const2u: return "DW_OP_const2u"; + case DW_OP::const2s: return "DW_OP_const2s"; + case DW_OP::const4u: return "DW_OP_const4u"; + case DW_OP::const4s: return "DW_OP_const4s"; + case DW_OP::const8u: return "DW_OP_const8u"; + case DW_OP::const8s: return "DW_OP_const8s"; + case DW_OP::constu: return "DW_OP_constu"; + case DW_OP::consts: return "DW_OP_consts"; + case DW_OP::dup: return "DW_OP_dup"; + case DW_OP::drop: return "DW_OP_drop"; + case DW_OP::over: return "DW_OP_over"; + case DW_OP::pick: return "DW_OP_pick"; + case DW_OP::swap: return "DW_OP_swap"; + case DW_OP::rot: return "DW_OP_rot"; + case DW_OP::xderef: return "DW_OP_xderef"; + case DW_OP::abs: return "DW_OP_abs"; + case DW_OP::and_: return "DW_OP_and"; + case DW_OP::div: return "DW_OP_div"; + case DW_OP::minus: return "DW_OP_minus"; + case DW_OP::mod: return "DW_OP_mod"; + case DW_OP::mul: return "DW_OP_mul"; + case DW_OP::neg: return "DW_OP_neg"; + case DW_OP::not_: return "DW_OP_not"; + case DW_OP::or_: return "DW_OP_or"; + case DW_OP::plus: return "DW_OP_plus"; + case DW_OP::plus_uconst: return "DW_OP_plus_uconst"; + case DW_OP::shl: return "DW_OP_shl"; + case DW_OP::shr: return "DW_OP_shr"; + case DW_OP::shra: return "DW_OP_shra"; + case DW_OP::xor_: return "DW_OP_xor"; + case DW_OP::skip: return "DW_OP_skip"; + case DW_OP::bra: return "DW_OP_bra"; + case DW_OP::eq: return "DW_OP_eq"; + case DW_OP::ge: return "DW_OP_ge"; + case DW_OP::gt: return "DW_OP_gt"; + case DW_OP::le: return "DW_OP_le"; + case DW_OP::lt: return "DW_OP_lt"; + case DW_OP::ne: return "DW_OP_ne"; + case DW_OP::lit0: return "DW_OP_lit0"; + case DW_OP::lit31: return "DW_OP_lit31"; + case DW_OP::reg0: return "DW_OP_reg0"; + case DW_OP::reg31: return "DW_OP_reg31"; + case DW_OP::breg0: return "DW_OP_breg0"; + case DW_OP::breg31: return "DW_OP_breg31"; + case DW_OP::regx: return "DW_OP_regx"; + case DW_OP::fbreg: return "DW_OP_fbreg"; + case DW_OP::bregx: return "DW_OP_bregx"; + case DW_OP::piece: return "DW_OP_piece"; + case DW_OP::deref_size: return "DW_OP_deref_size"; + case DW_OP::xderef_size: return "DW_OP_xderef_size"; + case DW_OP::nop: return "DW_OP_nop"; + case DW_OP::push_object_address: return "DW_OP_push_object_address"; + case DW_OP::call2: return "DW_OP_call2"; + case DW_OP::call4: return "DW_OP_call4"; + case DW_OP::call_ref: return "DW_OP_call_ref"; + case DW_OP::form_tls_address: return "DW_OP_form_tls_address"; + case DW_OP::call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP::bit_piece: return "DW_OP_bit_piece"; + case DW_OP::implicit_value: return "DW_OP_implicit_value"; + case DW_OP::stack_value: return "DW_OP_stack_value"; + case DW_OP::lo_user: break; + case DW_OP::hi_user: break; + } + return "(DW_OP)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ATE v) +{ + switch (v) { + case DW_ATE::address: return "DW_ATE_address"; + case DW_ATE::boolean: return "DW_ATE_boolean"; + case DW_ATE::complex_float: return "DW_ATE_complex_float"; + case DW_ATE::float_: return "DW_ATE_float"; + case DW_ATE::signed_: return "DW_ATE_signed"; + case DW_ATE::signed_char: return "DW_ATE_signed_char"; + case DW_ATE::unsigned_: return "DW_ATE_unsigned"; + case DW_ATE::unsigned_char: return "DW_ATE_unsigned_char"; + case DW_ATE::imaginary_float: return "DW_ATE_imaginary_float"; + case DW_ATE::packed_decimal: return "DW_ATE_packed_decimal"; + case DW_ATE::numeric_string: return "DW_ATE_numeric_string"; + case DW_ATE::edited: return "DW_ATE_edited"; + case DW_ATE::signed_fixed: return "DW_ATE_signed_fixed"; + case DW_ATE::unsigned_fixed: return "DW_ATE_unsigned_fixed"; + case DW_ATE::decimal_float: return "DW_ATE_decimal_float"; + case DW_ATE::UTF: return "DW_ATE_UTF"; + case DW_ATE::lo_user: break; + case DW_ATE::hi_user: break; + } + return "(DW_ATE)0x" + to_hex((int)v); +} + +std::string +to_string(DW_DS v) +{ + switch (v) { + case DW_DS::unsigned_: return "DW_DS_unsigned"; + case DW_DS::leading_overpunch: return "DW_DS_leading_overpunch"; + case DW_DS::trailing_overpunch: return "DW_DS_trailing_overpunch"; + case DW_DS::leading_separate: return "DW_DS_leading_separate"; + case DW_DS::trailing_separate: return "DW_DS_trailing_separate"; + } + return "(DW_DS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_END v) +{ + switch (v) { + case DW_END::default_: return "DW_END_default"; + case DW_END::big: return "DW_END_big"; + case DW_END::little: return "DW_END_little"; + case DW_END::lo_user: break; + case DW_END::hi_user: break; + } + return "(DW_END)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ACCESS v) +{ + switch (v) { + case DW_ACCESS::public_: return "DW_ACCESS_public"; + case DW_ACCESS::protected_: return "DW_ACCESS_protected"; + case DW_ACCESS::private_: return "DW_ACCESS_private"; + } + return "(DW_ACCESS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_VIS v) +{ + switch (v) { + case DW_VIS::local: return "DW_VIS_local"; + case DW_VIS::exported: return "DW_VIS_exported"; + case DW_VIS::qualified: return "DW_VIS_qualified"; + } + return "(DW_VIS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_VIRTUALITY v) +{ + switch (v) { + case DW_VIRTUALITY::none: return "DW_VIRTUALITY_none"; + case DW_VIRTUALITY::virtual_: return "DW_VIRTUALITY_virtual"; + case DW_VIRTUALITY::pure_virtual: return "DW_VIRTUALITY_pure_virtual"; + } + return "(DW_VIRTUALITY)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LANG v) +{ + switch (v) { + case DW_LANG::C89: return "DW_LANG_C89"; + case DW_LANG::C: return "DW_LANG_C"; + case DW_LANG::Ada83: return "DW_LANG_Ada83"; + case DW_LANG::C_plus_plus: return "DW_LANG_C_plus_plus"; + case DW_LANG::Cobol74: return "DW_LANG_Cobol74"; + case DW_LANG::Cobol85: return "DW_LANG_Cobol85"; + case DW_LANG::Fortran77: return "DW_LANG_Fortran77"; + case DW_LANG::Fortran90: return "DW_LANG_Fortran90"; + case DW_LANG::Pascal83: return "DW_LANG_Pascal83"; + case DW_LANG::Modula2: return "DW_LANG_Modula2"; + case DW_LANG::Java: return "DW_LANG_Java"; + case DW_LANG::C99: return "DW_LANG_C99"; + case DW_LANG::Ada95: return "DW_LANG_Ada95"; + case DW_LANG::Fortran95: return "DW_LANG_Fortran95"; + case DW_LANG::PLI: return "DW_LANG_PLI"; + case DW_LANG::ObjC: return "DW_LANG_ObjC"; + case DW_LANG::ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus"; + case DW_LANG::UPC: return "DW_LANG_UPC"; + case DW_LANG::D: return "DW_LANG_D"; + case DW_LANG::Python: return "DW_LANG_Python"; + case DW_LANG::lo_user: break; + case DW_LANG::hi_user: break; + } + return "(DW_LANG)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ID v) +{ + switch (v) { + case DW_ID::case_sensitive: return "DW_ID_case_sensitive"; + case DW_ID::up_case: return "DW_ID_up_case"; + case DW_ID::down_case: return "DW_ID_down_case"; + case DW_ID::case_insensitive: return "DW_ID_case_insensitive"; + } + return "(DW_ID)0x" + to_hex((int)v); +} + +std::string +to_string(DW_CC v) +{ + switch (v) { + case DW_CC::normal: return "DW_CC_normal"; + case DW_CC::program: return "DW_CC_program"; + case DW_CC::nocall: return "DW_CC_nocall"; + case DW_CC::lo_user: break; + case DW_CC::hi_user: break; + } + return "(DW_CC)0x" + to_hex((int)v); +} + +std::string +to_string(DW_INL v) +{ + switch (v) { + case DW_INL::not_inlined: return "DW_INL_not_inlined"; + case DW_INL::inlined: return "DW_INL_inlined"; + case DW_INL::declared_not_inlined: return "DW_INL_declared_not_inlined"; + case DW_INL::declared_inlined: return "DW_INL_declared_inlined"; + } + return "(DW_INL)0x" + to_hex((int)v); +} + +std::string +to_string(DW_ORD v) +{ + switch (v) { + case DW_ORD::row_major: return "DW_ORD_row_major"; + case DW_ORD::col_major: return "DW_ORD_col_major"; + } + return "(DW_ORD)0x" + to_hex((int)v); +} + +std::string +to_string(DW_DSC v) +{ + switch (v) { + case DW_DSC::label: return "DW_DSC_label"; + case DW_DSC::range: return "DW_DSC_range"; + } + return "(DW_DSC)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNS v) +{ + switch (v) { + case DW_LNS::copy: return "DW_LNS_copy"; + case DW_LNS::advance_pc: return "DW_LNS_advance_pc"; + case DW_LNS::advance_line: return "DW_LNS_advance_line"; + case DW_LNS::set_file: return "DW_LNS_set_file"; + case DW_LNS::set_column: return "DW_LNS_set_column"; + case DW_LNS::negate_stmt: return "DW_LNS_negate_stmt"; + case DW_LNS::set_basic_block: return "DW_LNS_set_basic_block"; + case DW_LNS::const_add_pc: return "DW_LNS_const_add_pc"; + case DW_LNS::fixed_advance_pc: return "DW_LNS_fixed_advance_pc"; + case DW_LNS::set_prologue_end: return "DW_LNS_set_prologue_end"; + case DW_LNS::set_epilogue_begin: return "DW_LNS_set_epilogue_begin"; + case DW_LNS::set_isa: return "DW_LNS_set_isa"; + } + return "(DW_LNS)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNE v) +{ + switch (v) { + case DW_LNE::end_sequence: return "DW_LNE_end_sequence"; + case DW_LNE::set_address: return "DW_LNE_set_address"; + case DW_LNE::define_file: return "DW_LNE_define_file"; + case DW_LNE::set_discriminator: return "DW_LNE_set_discriminator"; + case DW_LNE::lo_user: break; + case DW_LNE::hi_user: break; + } + return "(DW_LNE)0x" + to_hex((int)v); +} + +std::string +to_string(DW_LNCT v) +{ + switch (v) { + case DW_LNCT::path: return "DW_LNCT_path"; + case DW_LNCT::directory_index: return "DW_LNCT_directory_index"; + case DW_LNCT::timestamp: return "DW_LNCT_timestamp"; + case DW_LNCT::size: return "DW_LNCT_size"; + case DW_LNCT::MD5: return "DW_LNCT_MD5"; + case DW_LNCT::lo_user: break; + case DW_LNCT::hi_user: break; + } + return "(DW_LNCT)0x" + to_hex((int)v); +} + +std::string +to_string(DW_RLE v) +{ + switch (v) { + case DW_RLE::end_of_list: return "DW_RLE_end_of_list"; + case DW_RLE::base_addressx: return "DW_RLE_base_addressx"; + case DW_RLE::startx_endx: return "DW_RLE_startx_endx"; + case DW_RLE::startx_length: return "DW_RLE_startx_length"; + case DW_RLE::offset_pair: return "DW_RLE_offset_pair"; + case DW_RLE::base_address: return "DW_RLE_base_address"; + case DW_RLE::start_end: return "DW_RLE_start_end"; + case DW_RLE::start_length: return "DW_RLE_start_length"; + } + return "(DW_RLE)0x" + to_hex((int)v); +} + +DWARFPP_END_NAMESPACE diff --git a/dwarf/value.cc b/dwarf/value.cc index 2ab6431..5693026 100644 --- a/dwarf/value.cc +++ b/dwarf/value.cc @@ -11,10 +11,15 @@ using namespace std; DWARFPP_BEGIN_NAMESPACE value::value(const unit *cu, - DW_AT name, DW_FORM form, type typ, section_offset offset) - : cu(cu), form(form), typ(typ), offset(offset) { + const attribute_spec &spec, section_offset offset) + : cu(cu), + form(spec.form), + typ(spec.type), + offset(offset), + has_implicit_const(spec.form == DW_FORM::implicit_const), + implicit_const(spec.implicit_const) { if (form == DW_FORM::indirect) - resolve_indirect(name); + resolve_indirect(spec.name); } section_offset @@ -26,11 +31,48 @@ value::get_section_offset() const taddr value::as_address() const { - if (form != DW_FORM::addr) + cursor cur(cu->data(), offset); + + if (form == DW_FORM::addr) { + return cur.address(); + } + + // DWARF 5 address index forms + uint64_t index; + switch (form) { + case DW_FORM::addrx: + index = cur.uleb128(); + break; + case DW_FORM::addrx1: + index = cur.fixed(); + break; + case DW_FORM::addrx2: + index = cur.fixed(); + break; + case DW_FORM::addrx3: + index = cur.fixed() | (cur.fixed() << 8); + break; + case DW_FORM::addrx4: + index = cur.fixed(); + break; + default: throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); + } - cursor cur(cu->data(), offset); - return cur.address(); + // Look up address in .debug_addr section + // DWARF 5 .debug_addr has a header: length (4 or 12 bytes), version (2), addr_size (1), segment_selector_size (1) + auto addr_sec = cu->get_dwarf().get_section(section_type::addr); + section_offset header_size = 8; // Simplified: assume 32-bit DWARF (4 + 2 + 1 + 1) + unsigned addr_size = cu->data()->addr_size; + cursor addr_cur(addr_sec, header_size + index * addr_size); + // Read address directly using the CU's addr_size (not the section's) + if (addr_size == 4) { + return addr_cur.fixed(); + } else if (addr_size == 8) { + return addr_cur.fixed(); + } else { + throw format_error("unsupported address size " + std::to_string(addr_size)); + } } const void * @@ -76,6 +118,8 @@ value::as_uconstant() const return cur.fixed(); case DW_FORM::udata: return cur.uleb128(); + case DW_FORM::implicit_const: + return static_cast(implicit_const); default: throw value_type_mismatch("cannot read " + to_string(typ) + " as uconstant"); } @@ -96,6 +140,8 @@ value::as_sconstant() const return cur.fixed(); case DW_FORM::sdata: return cur.sleb128(); + case DW_FORM::implicit_const: + return implicit_const; default: throw value_type_mismatch("cannot read " + to_string(typ) + " as sconstant"); } @@ -145,17 +191,74 @@ value::as_flag() const rangelist value::as_rangelist() const { - section_offset off = as_sec_offset(); - // The compilation unit may not have a base address. In this // case, the first entry in the range list must be a base // address entry, but we'll just assume 0 for the initial base // address. die cudie = cu->root(); taddr cu_low_pc = cudie.has(DW_AT::low_pc) ? at_low_pc(cudie) : 0; - auto sec = cu->get_dwarf().get_section(section_type::ranges); auto cusec = cu->data(); - return rangelist(sec, off, cusec->addr_size, cu_low_pc); + + // DWARF 5 uses rnglistx form with .debug_rnglists section + if (form == DW_FORM::rnglistx) { + cursor cur(cu->data(), offset); + uint64_t index = cur.uleb128(); + + // Get .debug_rnglists section + auto rnglists_sec = cu->get_dwarf().get_section(section_type::rnglists); + + // Parse the rnglists header to find the offsets table + // Header format: unit_length (4/12), version (2), addr_size (1), + // segment_selector_size (1), offset_entry_count (4) + cursor hdr(rnglists_sec, (section_offset)0); + + // Read unit length to determine format + uint32_t unit_length32 = hdr.fixed(); + format fmt; + section_offset header_size; + if (unit_length32 == 0xffffffff) { + // 64-bit DWARF + hdr.fixed(); // actual length + fmt = format::dwarf64; + header_size = 20; // 12 + 2 + 1 + 1 + 4 + } else { + fmt = format::dwarf32; + header_size = 12; // 4 + 2 + 1 + 1 + 4 + } + + uint16_t version = hdr.fixed(); + (void)version; // Should be 5 + uint8_t addr_size = hdr.fixed(); + (void)addr_size; + uint8_t segment_selector_size = hdr.fixed(); + (void)segment_selector_size; + uint32_t offset_entry_count = hdr.fixed(); + + if (index >= offset_entry_count) { + throw format_error("rnglistx index out of bounds"); + } + + // Read the offset from the offsets table + section_offset offset_size = (fmt == format::dwarf64) ? 8 : 4; + cursor offsets_cur(rnglists_sec, header_size + index * offset_size); + section_offset range_offset; + if (fmt == format::dwarf64) { + range_offset = offsets_cur.fixed(); + } else { + range_offset = offsets_cur.fixed(); + } + + // The offset is relative to the first range list entry (after offsets table) + section_offset base_offset = header_size + offset_entry_count * offset_size; + section_offset absolute_offset = base_offset + range_offset; + + return rangelist(rnglists_sec, absolute_offset, cusec->addr_size, cu_low_pc, true); + } + + // DWARF 4 and earlier: direct offset into .debug_ranges + section_offset off = as_sec_offset(); + auto sec = cu->get_dwarf().get_section(section_type::ranges); + return rangelist(sec, off, cusec->addr_size, cu_low_pc, false); } die @@ -245,6 +348,51 @@ value::as_cstr(size_t *size_out) const cursor scur(cu->get_dwarf().get_section(section_type::str), off); return scur.cstr(size_out); } + case DW_FORM::line_strp: { + section_offset off = cur.offset(); + cursor scur(cu->get_dwarf().get_section(section_type::line_str), off); + return scur.cstr(size_out); + } + case DW_FORM::strx: + case DW_FORM::strx1: + case DW_FORM::strx2: + case DW_FORM::strx3: + case DW_FORM::strx4: { + // DWARF 5: Read string index, look up in .debug_str_offsets, then read from .debug_str + uint64_t index; + switch (form) { + case DW_FORM::strx: + index = cur.uleb128(); + break; + case DW_FORM::strx1: + index = cur.fixed(); + break; + case DW_FORM::strx2: + index = cur.fixed(); + break; + case DW_FORM::strx3: + index = cur.fixed() | (cur.fixed() << 8); + break; + case DW_FORM::strx4: + index = cur.fixed(); + break; + default: + index = 0; + break; + } + // Get str_offsets_base from CU root DIE's DW_AT_str_offsets_base + // For now, we use a simplified approach: read from start of section + header + // DWARF 5 .debug_str_offsets has a header (length + version + padding) + // We skip the 8-byte header (4-byte length + 2-byte version + 2-byte padding for 32-bit DWARF) + auto str_offsets_sec = cu->get_dwarf().get_section(section_type::str_offsets); + section_offset header_size = 8; // Simplified: assume 32-bit DWARF + unsigned offset_size = (str_offsets_sec->addr_size == 8) ? 8 : 4; + cursor offsets_cur(str_offsets_sec, + header_size + index * offset_size); + section_offset str_off = offsets_cur.offset(); + cursor scur(cu->get_dwarf().get_section(section_type::str), str_off); + return scur.cstr(size_out); + } default: throw value_type_mismatch("cannot read " + to_string(typ) + " as string"); } @@ -279,7 +427,10 @@ value::resolve_indirect(DW_AT name) do { form = (DW_FORM)c.uleb128(); } while (form == DW_FORM::indirect); - typ = attribute_spec(name, form).type; + attribute_spec spec(name, form); + typ = spec.type; + has_implicit_const = (form == DW_FORM::implicit_const); + implicit_const = spec.implicit_const; offset = c.get_section_offset(); } diff --git a/elf/.gitignore b/elf/.gitignore index 0166efc..5120c10 100644 --- a/elf/.gitignore +++ b/elf/.gitignore @@ -1,5 +1,4 @@ *.o -to_string.cc libelf++.a libelf++.so libelf++.so.* diff --git a/elf/Makefile b/elf/Makefile deleted file mode 100644 index f598328..0000000 --- a/elf/Makefile +++ /dev/null @@ -1,73 +0,0 @@ -# Changed when ABI backwards compatibility is broken. -# Typically uses the major version. -SONAME = 0 - -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall -fPIC - -all: libelf++.a libelf++.so libelf++.so.$(SONAME) libelf++.pc - -SRCS := elf.cc mmap_loader.cc to_string.cc -HDRS := elf++.hh data.hh common.hh to_hex.hh -CLEAN := - -libelf++.a: $(SRCS:.cc=.o) - ar rcs $@ $^ -CLEAN += libelf++.a $(SRCS:.cc=.o) - -$(SRCS:.cc=.o): $(HDRS) - -to_string.cc: enum-print.py data.hh Makefile - @echo "// Automatically generated by make at $$(date)" > to_string.cc - @echo "// DO NOT EDIT" >> to_string.cc - @echo >> to_string.cc - @echo '#include "data.hh"' >> to_string.cc - @echo '#include "to_hex.hh"' >> to_string.cc - @echo >> to_string.cc - @echo 'ELFPP_BEGIN_NAMESPACE' >> to_string.cc - @echo >> to_string.cc - python3 enum-print.py -u --hex --no-type --mask shf --mask pf \ - -x loos -x hios -x loproc -x hiproc < data.hh >> to_string.cc - @echo 'ELFPP_END_NAMESPACE' >> to_string.cc -CLEAN += to_string.cc - -libelf++.so.$(SONAME): $(SRCS:.cc=.o) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ -CLEAN += libelf++.so.* - -libelf++.so: - ln -s $@.$(SONAME) $@ -CLEAN += libelf++.so - -# Create pkg-config for local library and headers. This will be -# transformed in to the correct global pkg-config by install. -libelf++.pc: always - @(VER=$$(git describe --match 'v*' | sed -e s/^v//); \ - echo "libdir=$$PWD"; \ - echo "includedir=$$PWD"; \ - echo ""; \ - echo "Name: libelf++"; \ - echo "Description: C++11 ELF library"; \ - echo "Version: $$VER"; \ - echo "Libs: -L\$${libdir} -lelf++"; \ - echo "Cflags: -I\$${includedir}") > $@ -CLEAN += libelf++.pc - -.PHONY: always - -PREFIX?=/usr/local - -install: libelf++.a libelf++.so libelf++.so.$(SONAME) libelf++.pc - install -d $(DESTDIR)$(PREFIX)/lib/pkgconfig - install -t $(DESTDIR)$(PREFIX)/lib libelf++.a - install -t $(DESTDIR)$(PREFIX)/lib libelf++.so.$(SONAME) - install -t $(DESTDIR)$(PREFIX)/lib libelf++.so - install -d $(DESTDIR)$(PREFIX)/include/libelfin/elf - install -t $(DESTDIR)$(PREFIX)/include/libelfin/elf common.hh data.hh elf++.hh - sed 's,^libdir=.*,libdir=$(PREFIX)/lib,;s,^includedir=.*,includedir=$(PREFIX)/include,' libelf++.pc \ - > $(DESTDIR)$(PREFIX)/lib/pkgconfig/libelf++.pc - -clean: - rm -f $(CLEAN) - -.DELETE_ON_ERROR: diff --git a/elf/to_string.cc b/elf/to_string.cc new file mode 100644 index 0000000..3293a5b --- /dev/null +++ b/elf/to_string.cc @@ -0,0 +1,160 @@ +// Automatically generated by make at Sat Nov 29 03:46:27 PM EST 2025 +// DO NOT EDIT + +#include "data.hh" +#include "to_hex.hh" + +ELFPP_BEGIN_NAMESPACE + +std::string +to_string(elfclass v) +{ + switch (v) { + case elfclass::_32: return "32"; + case elfclass::_64: return "64"; + } + return "(elfclass)0x" + to_hex((int)v); +} + +std::string +to_string(elfdata v) +{ + switch (v) { + case elfdata::lsb: return "lsb"; + case elfdata::msb: return "msb"; + } + return "(elfdata)0x" + to_hex((int)v); +} + +std::string +to_string(elfosabi v) +{ + switch (v) { + case elfosabi::sysv: return "sysv"; + case elfosabi::hpux: return "hpux"; + case elfosabi::standalone: return "standalone"; + } + return "(elfosabi)0x" + to_hex((int)v); +} + +std::string +to_string(et v) +{ + switch (v) { + case et::none: return "none"; + case et::rel: return "rel"; + case et::exec: return "exec"; + case et::dyn: return "dyn"; + case et::core: return "core"; + case et::loos: break; + case et::hios: break; + case et::loproc: break; + case et::hiproc: break; + } + return "(et)0x" + to_hex((int)v); +} + +std::string +to_string(sht v) +{ + switch (v) { + case sht::null: return "null"; + case sht::progbits: return "progbits"; + case sht::symtab: return "symtab"; + case sht::strtab: return "strtab"; + case sht::rela: return "rela"; + case sht::hash: return "hash"; + case sht::dynamic: return "dynamic"; + case sht::note: return "note"; + case sht::nobits: return "nobits"; + case sht::rel: return "rel"; + case sht::shlib: return "shlib"; + case sht::dynsym: return "dynsym"; + case sht::loos: break; + case sht::hios: break; + case sht::loproc: break; + case sht::hiproc: break; + } + return "(sht)0x" + to_hex((int)v); +} + +std::string +to_string(shf v) +{ + std::string res; + if ((v & shf::write) == shf::write) { res += "write|"; v &= ~shf::write; } + if ((v & shf::alloc) == shf::alloc) { res += "alloc|"; v &= ~shf::alloc; } + if ((v & shf::execinstr) == shf::execinstr) { res += "execinstr|"; v &= ~shf::execinstr; } + if ((v & shf::maskos) == shf::maskos) { res += "maskos|"; v &= ~shf::maskos; } + if ((v & shf::maskproc) == shf::maskproc) { res += "maskproc|"; v &= ~shf::maskproc; } + if (res.empty() || v != (shf)0) res += "(shf)0x" + to_hex((int)v); + else res.pop_back(); + return res; +} + +std::string +to_string(pt v) +{ + switch (v) { + case pt::null: return "null"; + case pt::load: return "load"; + case pt::dynamic: return "dynamic"; + case pt::interp: return "interp"; + case pt::note: return "note"; + case pt::shlib: return "shlib"; + case pt::phdr: return "phdr"; + case pt::loos: break; + case pt::hios: break; + case pt::loproc: break; + case pt::hiproc: break; + } + return "(pt)0x" + to_hex((int)v); +} + +std::string +to_string(pf v) +{ + std::string res; + if ((v & pf::x) == pf::x) { res += "x|"; v &= ~pf::x; } + if ((v & pf::w) == pf::w) { res += "w|"; v &= ~pf::w; } + if ((v & pf::r) == pf::r) { res += "r|"; v &= ~pf::r; } + if ((v & pf::maskos) == pf::maskos) { res += "maskos|"; v &= ~pf::maskos; } + if ((v & pf::maskproc) == pf::maskproc) { res += "maskproc|"; v &= ~pf::maskproc; } + if (res.empty() || v != (pf)0) res += "(pf)0x" + to_hex((int)v); + else res.pop_back(); + return res; +} + +std::string +to_string(stb v) +{ + switch (v) { + case stb::local: return "local"; + case stb::global: return "global"; + case stb::weak: return "weak"; + case stb::loos: break; + case stb::hios: break; + case stb::loproc: break; + case stb::hiproc: break; + } + return "(stb)0x" + to_hex((int)v); +} + +std::string +to_string(stt v) +{ + switch (v) { + case stt::notype: return "notype"; + case stt::object: return "object"; + case stt::func: return "func"; + case stt::section: return "section"; + case stt::file: return "file"; + case stt::loos: break; + case stt::hios: break; + case stt::loproc: break; + case stt::hiproc: break; + } + return "(stt)0x" + to_hex((int)v); +} + +ELFPP_END_NAMESPACE diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index 4fa66af..0000000 --- a/examples/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -CXXFLAGS+=-g -O2 -Werror -override CXXFLAGS+=-std=c++0x -Wall - -CLEAN := - -all: dump-sections dump-segments dump-syms dump-tree dump-lines find-pc - -# Find libs -export PKG_CONFIG_PATH=../elf:../dwarf -CPPFLAGS+=$$(pkg-config --cflags libelf++ libdwarf++) -# Statically link against our libs to keep the example binaries simple -# and dependencies correct. -LIBS=../dwarf/libdwarf++.a ../elf/libelf++.a - -# Dependencies -CPPFLAGS+=-MD -MP -MF .$@.d --include .*.d - -dump-sections: dump-sections.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-sections dump-sections.o - -dump-segments: dump-segments.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-segments dump-segments.o - -dump-syms: dump-syms.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-syms dump-syms.o - -dump-tree: dump-tree.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-tree dump-tree.o - -dump-lines: dump-lines.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += dump-lines dump-lines.o - -find-pc: find-pc.o $(LIBS) - $(LINK.cc) $^ $(LOADLIBES) $(LDLIBS) -o $@ -CLEAN += find-pc find-pc.o - -clean: - rm -f $(CLEAN) .*.d