Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/stirling/obj_tools/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ pl_cc_test(
srcs = ["go_syms_test.cc"],
data = [
"//src/stirling/obj_tools/testdata/go:test_binaries",
"//src/stirling/obj_tools/testdata/go:test_buildinfo_with_mods",
"//src/stirling/obj_tools/testdata/go:test_go_1_17_binary",
"//src/stirling/obj_tools/testdata/go:test_go_1_19_binary",
"//src/stirling/obj_tools/testdata/go:test_go_1_21_binary",
Expand Down
2 changes: 1 addition & 1 deletion src/stirling/obj_tools/elf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class ElfReader {
* ElfAddressConverter::VirtualAddrToBinaryAddr is a more appropriate utility to use.
*
* Certain use cases may require this function, such as cases where the Go toolchain
* embeds virtual addresses within a binary and must be parsed (See ReadGoBuildVersion and
* embeds virtual addresses within a binary and must be parsed (See ReadGoBuildInfo and
* ReadGoString in go_syms.cc).
*/
StatusOr<uint64_t> VirtualAddrToBinaryAddr(uint64_t virtual_addr);
Expand Down
210 changes: 162 additions & 48 deletions src/stirling/obj_tools/go_syms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#include "src/stirling/obj_tools/go_syms.h"
#include "src/stirling/utils/binary_decoder.h"

#include <utility>

namespace px {
namespace stirling {
namespace obj_tools {
Expand Down Expand Up @@ -70,10 +68,99 @@ StatusOr<std::string> ReadGoString(ElfReader* elf_reader, uint64_t ptr_size, uin
return std::string(go_version_bytecode);
}

// Extracts the semantic version from a Go version string (e.g., "go1.20.3").
// This is how the version is formatted in the buildinfo header.
StatusOr<std::string> ExtractSemVer(const std::string& input) {
Comment thread
ddelnano marked this conversation as resolved.
size_t go_pos = input.find("go"); // Find "go"
if (go_pos == std::string::npos) {
LOG(ERROR) << "Prefix 'go' not found in input.";
return error::NotFound("Prefix 'go' not found in input.");
}

size_t start = go_pos + 2; // Move past "go"
size_t end = input.find(" ", start); // Find space delimiter after version
if (end == std::string::npos) {
end = input.size(); // If no space, take the rest of the string
}

return input.substr(start, end - start);
}

// This is modeled after go's runtime/debug package
// https://github.com/golang/go/blob/93fb2c90740aef00553c9ce6a7cd4578c2469675/src/runtime/debug/mod.go#L158
StatusOr<BuildInfo> ReadModInfo(const std::string& mod) {
BuildInfo build_info;
Module* last_module = nullptr;

for (std::string_view line : absl::StrSplit(mod, '\n')) {
if (absl::StartsWith(line, "path\t")) {
build_info.path = line.substr(5);
} else if (absl::StartsWith(line, "mod\t")) {
std::vector<std::string_view> mod_parts = absl::StrSplit(line.substr(4), '\t');

// The sum is optional, so each line must have either 2 or 3 parts.
auto size = mod_parts.size();
if (size != 2 && size != 3) {
return error::InvalidArgument(absl::Substitute("Invalid mod line format: $0", line));
}
build_info.main.path = mod_parts[0];
build_info.main.version = mod_parts[1];
if (size == 3) {
build_info.main.sum = mod_parts[2];
}
VLOG(2) << absl::Substitute("mod.path=$0, mod.version=$1, mod.sum=$2", build_info.main.path,
build_info.main.version, build_info.main.sum);
last_module = &build_info.main;
} else if (absl::StartsWith(line, "dep\t")) {
std::vector<std::string_view> dep_parts = absl::StrSplit(line.substr(4), '\t');

// The sum is optional, so each line must have either 2 or 3 parts.
auto size = dep_parts.size();
if (size != 2 && size != 3) {
return error::InvalidArgument(absl::Substitute("Invalid dep line format: $0", line));
}
Module dep;
dep.path = dep_parts[0];
dep.version = dep_parts[1];
if (size == 3) {
dep.sum = dep_parts[2];
}

build_info.deps.push_back(std::move(dep));
last_module = &build_info.deps.back();

VLOG(2) << absl::Substitute("dep.path=$0, dep.version=$1, dep.sum=$2", dep.path, dep.version,
dep.sum);

} else if (absl::StartsWith(line, "=>\t")) {
if (last_module == nullptr) {
return error::InvalidArgument(
"Unexpected module replacement line with no preceding module.");
}
std::vector<std::string_view> replace_parts = absl::StrSplit(line.substr(3), '\t');

if (replace_parts.size() != 3) {
return error::InvalidArgument(
absl::Substitute("Invalid module replacement line format: $0", line));
}

std::unique_ptr<Module> replacement = std::make_unique<Module>();
replacement->path = replace_parts[0];
replacement->version = replace_parts[1];
replacement->sum = replace_parts[2];
last_module->replace = std::move(replacement);
}
// TODO(ddelnano): Handle the build flags line in the future
// (https://github.com/golang/go/blob/93fb2c90740aef00553c9ce6a7cd4578c2469675/src/runtime/debug/mod.go#L171).
// This is omitted for now since it doesn't help with Go uprobes.
}
return build_info;
}

// Reads the buildinfo header embedded in the .go.buildinfo ELF section in order to determine the go
// toolchain version. This function emulates what the go version cli performs as seen
// https://github.com/golang/go/blob/cb7a091d729eab75ccfdaeba5a0605f05addf422/src/debug/buildinfo/buildinfo.go#L151-L221
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader) {
StatusOr<std::pair<std::string, BuildInfo>> ReadGoBuildInfo(ElfReader* elf_reader) {
PX_ASSIGN_OR_RETURN(ELFIO::section * section, elf_reader->SectionWithName(kGoBuildInfoSection));
int offset = section->get_offset();
PX_ASSIGN_OR_RETURN(std::string_view buildInfoByteCode,
Expand All @@ -85,63 +172,90 @@ StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader) {
PX_ASSIGN_OR_RETURN(uint8_t ptr_size, binary_decoder.ExtractBEInt<uint8_t>());
PX_ASSIGN_OR_RETURN(uint8_t endianness, binary_decoder.ExtractBEInt<uint8_t>());

BuildInfo build_info;
std::string go_version;
std::string mod_info;
// If the endianness has its second bit set, then the go version immediately follows the 32 bit
// header specified by the varint encoded string data
if ((endianness & 0x2) != 0) {
// Skip the remaining 16 bytes of buildinfo header
PX_CHECK_OK(binary_decoder.ExtractBufIgnore(16));

PX_ASSIGN_OR_RETURN(uint64_t size, binary_decoder.ExtractUVarInt());
PX_ASSIGN_OR_RETURN(std::string_view go_version, binary_decoder.ExtractString(size));
return std::string(go_version);
}

read_ptr_func_t read_ptr;
switch (endianness) {
case 0x0: {
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint32_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
PX_ASSIGN_OR_RETURN(go_version, binary_decoder.ExtractString(size));

PX_ASSIGN_OR_RETURN(uint64_t mod_size, binary_decoder.ExtractUVarInt());
PX_ASSIGN_OR_RETURN(mod_info, binary_decoder.ExtractString(mod_size));
} else {
read_ptr_func_t read_ptr;
switch (endianness) {
case 0x0: {
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint32_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
}
break;
}
break;
}
case 0x1:
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
case 0x1:
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
}
break;
default: {
auto msg =
absl::Substitute("Invalid endianness=$0, refusing to parse non go binary", endianness);
DCHECK(false) << msg;
return error::NotFound(msg);
}
break;
default: {
auto msg =
absl::Substitute("Invalid endianness=$0, refusing to parse non go binary", endianness);
DCHECK(false) << msg;
return error::NotFound(msg);
}
}

// Reads the virtual address location of the runtime.buildVersion symbol.
PX_ASSIGN_OR_RETURN(auto runtime_version_vaddr,
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
PX_ASSIGN_OR_RETURN(uint64_t ptr_addr,
elf_reader->VirtualAddrToBinaryAddr(read_ptr(runtime_version_vaddr)));
// Reads the virtual address location of the runtime.buildVersion symbol.
PX_ASSIGN_OR_RETURN(auto runtime_version_vaddr,
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
PX_ASSIGN_OR_RETURN(auto mod_info_vaddr,
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
PX_ASSIGN_OR_RETURN(uint64_t ptr_addr,
elf_reader->VirtualAddrToBinaryAddr(read_ptr(runtime_version_vaddr)));

PX_ASSIGN_OR_RETURN(go_version, ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr));

auto mod_ptr_addr_s = elf_reader->VirtualAddrToBinaryAddr(read_ptr(mod_info_vaddr));
if (mod_ptr_addr_s.ok()) {
PX_ASSIGN_OR_RETURN(mod_info, ReadGoString(elf_reader, ptr_size,
mod_ptr_addr_s.ConsumeValueOrDie(), read_ptr));
}
}

return ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr);
auto mod_size = mod_info.size();
if (mod_size > 0) {
// The module info string is delimited by the sentinel strings cmd/go/internal/modload.infoStart
// and infoEnd. These strings are 16 characters long, so first check that the module info
// contains more than the sentinel strings. This check reflects upstream's implementation
// https://github.com/golang/go/blob/cb7a091d729eab75ccfdaeba5a0605f05addf422/src/debug/buildinfo/buildinfo.go#L214-L215
if (mod_size >= 33 && mod_info.at(mod_size - 17) == '\n') {
mod_info.erase(0, 16);
PX_ASSIGN_OR_RETURN(build_info, ReadModInfo(mod_info));
}
}
PX_ASSIGN_OR_RETURN(auto s, ExtractSemVer(go_version));
return std::make_pair(s, std::move(build_info));
}

// Prefixes used to search for itable symbols in the binary. Follows the format:
Expand Down
25 changes: 20 additions & 5 deletions src/stirling/obj_tools/go_syms.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@

#pragma once

#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

#include <absl/container/flat_hash_map.h>
Expand All @@ -33,11 +35,24 @@ namespace obj_tools {
// Returns true if the executable is built by Golang.
bool IsGoExecutable(ElfReader* elf_reader);

// Returns the build version of a Golang executable. The executable is read through the input
// elf_reader.
// TODO(yzhao): We'll use this to determine the corresponding Golang executable's TLS data
// structures and their offsets.
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader);
struct Module {
std::string path;
std::string version;
std::string sum;
std::unique_ptr<Module> replace = nullptr;
};

struct BuildInfo {
std::string path;
Module main;
std::vector<Module> deps;
std::vector<std::pair<std::string, std::string>> settings;
};

StatusOr<BuildInfo> ReadModInfo(const std::string& mod);
// Returns the build version and buildinfo of a Golang executable. The executable is read through
// the input elf_reader.
StatusOr<std::pair<std::string, BuildInfo>> ReadGoBuildInfo(ElfReader* elf_reader);

// Describes a Golang type that implement an interface.
struct IntfImplTypeInfo {
Expand Down
Loading