Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty_package.bloaty
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

add_library(libbloaty STATIC
src/arfile.cc
src/bloaty.cc
src/bloaty.h
src/disassemble.cc
Expand Down
136 changes: 136 additions & 0 deletions src/arfile.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <algorithm>
#include <string>
#include <iostream>
#include "absl/numeric/int128.h"
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "arfile.h"
#include "bloaty.h"
#include "util.h"

#include <assert.h>
#include <limits.h>
#include <stdlib.h>

using absl::string_view;

namespace bloaty {

size_t StringViewToSize(string_view str) {
// Trim trailing whitespace (AR format allows space-padding in numeric fields)
while (!str.empty() && (str.back() == ' ' || str.back() == '\t')) {
str.remove_suffix(1);
}
size_t ret;
if (!absl::SimpleAtoi(str, &ret)) {
THROWF("couldn't convert string '$0' to integer.", str);
}
return ret;
}

bool ArFile::MemberReader::ReadMember(MemberFile* file) {
struct Header {
char file_id[16];
char modified_timestamp[12];
char owner_id[6];
char group_id[6];
char mode[8];
char size[10];
char end[2];
};

if (remaining_.size() == 0) {
return false;
} else if (remaining_.size() < sizeof(Header)) {
THROW("Premature EOF in AR data");
}

const Header* header = reinterpret_cast<const Header*>(remaining_.data());
file->header = Consume(sizeof(Header));

string_view file_id(&header->file_id[0], sizeof(header->file_id));
string_view size_str(&header->size[0], sizeof(header->size));
file->size = StringViewToSize(size_str);
file->contents = Consume(file->size);
file->file_type = MemberFile::kNormal;
file->format = MemberFile::GNU;

if (file_id[0] == '/') {
// Special filename, internal to the format.
if (file_id[1] == ' ') {
file->file_type = MemberFile::kSymbolTable;
} else if (file_id[1] == '/') {
file->file_type = MemberFile::kLongFilenameTable;
long_filenames_ = file->contents;
} else if (isdigit(file_id[1])) {
size_t offset = StringViewToSize(file_id.substr(1));
size_t end = long_filenames_.find('/', offset);

if (end == std::string::npos) {
THROW("Unterminated long filename");
}

file->filename = long_filenames_.substr(offset, end - offset);
} else {
THROW("Unexpected special filename in AR archive");
}
} else if (file_id[0] == '#' && file_id[1] == '1' &&
file_id[2] == '/') {
// Darwin-style long filename: #1/N where N is the embedded filename length
file->format = MemberFile::Darwin;
size_t offset = StringViewToSize(file_id.substr(3));

// Validate that the filename length doesn't exceed member content size
if (offset > file->contents.size()) {
THROWF("Darwin long filename offset ($0) exceeds member size ($1)",
offset, file->contents.size());
}

string_view filename_data = file->contents.substr(0, offset);
size_t null_pos = filename_data.find('\0');
if (null_pos != string_view::npos) {
file->filename = filename_data.substr(0, null_pos);
} else {
file->filename = filename_data;
}

// Darwin archives use "__.SYMDEF" or "__.SYMDEF SORTED" for symbol tables
// (GNU uses "/" for the same purpose)
if (file->filename == "__.SYMDEF" || file->filename == "__.SYMDEF SORTED") {
file->file_type = MemberFile::kSymbolTable;
} else {
file->contents = file->contents.substr(offset);
}
} else {
// Normal filename, slash-terminated.
size_t slash = file_id.find('/');

if (slash == std::string::npos) {
file->format = MemberFile::BSD;
THROW("BSD-style AR not yet implemented");
}

file->filename = file_id.substr(0, slash);
}

return true;
}

} // bloaty namespace

135 changes: 135 additions & 0 deletions src/arfile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef BLOATY_ARFILE_H_
#define BLOATY_ARFILE_H_

#include <algorithm>
#include <string>
#include <iostream>
#include "absl/numeric/int128.h"
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "bloaty.h"
#include "util.h"

#include <assert.h>
#include <limits.h>
#include <stdlib.h>

using absl::string_view;

namespace bloaty {

// ArFile //////////////////////////////////////////////////////////////////////

// For parsing .a files (static libraries).
//
// AR archives are used for static libraries and can contain multiple object
// files. The format is ancient but still widely used. There are three main
// variants:
//
// 1. GNU format:
// - Symbol table: member named "/"
// - Long filename table: member named "//"
// - Long filename references: members named "/N" where N is offset into table
// - Short filenames: slash-terminated in header (e.g., "foo.o/")
//
// 2. Darwin format:
// - Symbol table: member named "__.SYMDEF" or "__.SYMDEF SORTED"
// - Long filenames: embedded in member data, indicated by "#1/N" where N is length
// - Short filenames: same as GNU (slash-terminated)
//
// 3. BSD format:
// - Uses space-padded filenames instead of slash-terminated
// - Currently not implemented (throws error if detected)
//
// Archive structure:
// Magic: "!<arch>\n" (8 bytes)
// For each member:
// Header: 60 bytes (file_id, timestamp, owner, group, mode, size, end marker)
// Data: size bytes (padded to even boundary for alignment)
//
// The best documentation for this file format is Wikipedia:
// https://en.wikipedia.org/wiki/Ar_(Unix)

class ArFile {
public:
ArFile(string_view data)
: magic_(StrictSubstr(data, 0, kMagicSize)),
contents_(data.substr(std::min<size_t>(data.size(), kMagicSize))) {}

bool IsOpen() const { return magic() == string_view(kMagic); }

string_view magic() const { return magic_; }
string_view contents() const { return contents_; }

struct MemberFile {
enum {
kSymbolTable, // Stores a symbol table.
kLongFilenameTable, // Stores long filenames, users should ignore.
kNormal, // Regular data file.
} file_type;

enum {
GNU,
Darwin,
BSD
} format;

string_view filename; // Only when file_type == kNormal
size_t size;
string_view header;
string_view contents;
};

class MemberReader {
public:
MemberReader(const ArFile& ar) : remaining_(ar.contents()) {}
bool ReadMember(MemberFile* file);
bool IsEof() const { return remaining_.size() == 0; }

private:
string_view Consume(size_t n) {
n = (n % 2 == 0 ? n : n + 1);
if (remaining_.size() < n) {
THROW("premature end of file");
}
string_view ret = remaining_.substr(0, n);
remaining_.remove_prefix(n);
return ret;
}

string_view long_filenames_;
string_view remaining_;
};

private:
const string_view magic_;
const string_view contents_;

static constexpr const char* kMagic = "!<arch>\n";
static constexpr int kMagicSize = 8;
};

inline bool IsArchiveFile(string_view data) {
ArFile ar(data);
return ar.IsOpen();
}

} // namespace bloaty

#endif // BLOATY_ARFILE_H_

1 change: 1 addition & 0 deletions src/bloaty.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct DataSourceDefinition {

constexpr DataSourceDefinition data_sources[] = {
{DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"},
{DataSource::kArchs, "archs", "architecture slices in universal binaries"},
{DataSource::kCompileUnits, "compileunits",
"source file for the .o file (translation unit). requires debug info."},
{DataSource::kInputFiles, "inputfiles",
Expand Down
1 change: 1 addition & 0 deletions src/bloaty.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum class DataSource {
kRawRanges,
kSections,
kSegments,
kArchs,

// We always set this to one of the concrete symbol types below before
// setting it on a sink.
Expand Down
Loading