Skip to content

Commit 18bc7f0

Browse files
committed
Remove DetectSourceLanguage API in favor of per compile unit DIE APIs.
Signed-off-by: Dom Del Nano <ddelnano@gmail.com>
1 parent 17a0121 commit 18bc7f0

8 files changed

Lines changed: 109 additions & 103 deletions

File tree

src/stirling/obj_tools/dwarf_reader.cc

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,6 @@ StatusOr<std::unique_ptr<DwarfReader>> DwarfReader::CreateWithoutIndexing(
9292
auto dwarf_reader = std::unique_ptr<DwarfReader>(
9393
new DwarfReader(std::move(buffer), DWARFContext::create(*obj_file)));
9494

95-
PX_RETURN_IF_ERROR(dwarf_reader->DetectSourceLanguage());
96-
9795
return dwarf_reader;
9896
}
9997

@@ -154,36 +152,29 @@ bool IsNamespace(llvm::dwarf::Tag tag) { return tag == llvm::dwarf::DW_TAG_names
154152

155153
} // namespace
156154

157-
Status DwarfReader::DetectSourceLanguage() {
158-
for (size_t i = 0; i < dwarf_context_->getNumCompileUnits(); ++i) {
159-
const auto& unit_die = dwarf_context_->getUnitAtIndex(i)->getUnitDIE();
160-
if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) {
161-
// Skip over DW_TAG_partial_unit, and potentially other tags.
162-
continue;
163-
}
164-
165-
PX_ASSIGN_OR(const DWARFFormValue& lang_attr,
166-
GetAttribute(unit_die, llvm::dwarf::DW_AT_language), continue);
167-
source_language_ =
168-
static_cast<llvm::dwarf::SourceLanguage>(lang_attr.getAsUnsignedConstant().getValue());
169-
170-
const DWARFFormValue& producer_attr =
171-
GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({});
172-
173-
auto s = producer_attr.getAsCString();
155+
StatusOr<std::pair<llvm::dwarf::SourceLanguage, std::string>>
156+
DwarfReader::DetectSourceLanguageFromCUDIE(const llvm::DWARFDie& unit_die) {
157+
if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) {
158+
// Skip over DW_TAG_partial_unit, and potentially other tags.
159+
return error::NotFound("Expected DW_TAG_compile_unit, but got DW_TAG=$0 for unit DIE: $1",
160+
magic_enum::enum_name(unit_die.getTag()), Dump(unit_die));
161+
}
162+
const DWARFFormValue& producer_attr =
163+
GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({});
164+
auto s = producer_attr.getAsCString();
165+
std::string compiler;
174166
#if LLVM_VERSION_MAJOR >= 14
175-
if (!s.takeError()) {
176-
compiler_ = s.get();
177-
}
167+
if (!s.takeError()) {
168+
compiler = s.get();
169+
}
178170
#else
179-
compiler_ = s.getValueOr("");
171+
compiler = s.getValueOr("");
180172
#endif
181-
182-
return Status::OK();
183-
}
184-
return error::Internal(
185-
"Could not determine the source language of the DWARF info. DW_AT_language not found on "
186-
"any compilation unit.");
173+
PX_ASSIGN_OR_RETURN(const DWARFFormValue& lang_attr,
174+
GetAttribute(unit_die, llvm::dwarf::DW_AT_language));
175+
auto source_language =
176+
static_cast<llvm::dwarf::SourceLanguage>(lang_attr.getAsUnsignedConstant().getValue());
177+
return std::make_pair(source_language, compiler);
187178
}
188179

189180
void DwarfReader::IndexDIEs(
@@ -923,16 +914,20 @@ StatusOr<std::map<std::string, ArgInfo>> DwarfReader::GetFunctionArgInfo(
923914
// but DW_AT_location has been found to be blank in some cases, making it unreliable.
924915
// Instead, we use a FunctionArgTracker that tries to reverse engineer the calling convention.
925916

926-
ABI abi = LanguageToABI(source_language_, compiler_);
917+
PX_ASSIGN_OR_RETURN(const DWARFDie& function_die,
918+
GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram));
919+
// Certain binaries can have DW_TAG_compile_units with different source languages. When compiling
920+
// programs with ASAN/TSAN enabled this is common.
921+
llvm::DWARFUnit* cu = function_die.getDwarfUnit();
922+
llvm::DWARFDie unit_die = cu->getUnitDIE();
923+
PX_ASSIGN_OR_RETURN(auto p, DetectSourceLanguageFromCUDIE(unit_die));
924+
ABI abi = LanguageToABI(p.first, p.second);
927925
if (abi == ABI::kUnknown) {
928926
return error::Unimplemented("Unable to determine ABI from language: $0",
929-
magic_enum::enum_name(source_language_));
927+
magic_enum::enum_name(p.first));
930928
}
931929
std::unique_ptr<ABICallingConventionModel> arg_tracker = ABICallingConventionModel::Create(abi);
932930

933-
PX_ASSIGN_OR_RETURN(const DWARFDie& function_die,
934-
GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram));
935-
936931
// If function has a return value, process that first.
937932
// This is important, because in some ABIs (e.g. SystemV ABI),
938933
// if the return value is not able to be passed back in the available registers,
@@ -968,7 +963,7 @@ StatusOr<std::map<std::string, ArgInfo>> DwarfReader::GetFunctionArgInfo(
968963
PX_ASSIGN_OR_RETURN(const DWARFDie type_die, GetTypeDie(die));
969964
PX_ASSIGN_OR_RETURN(arg.type_info, GetTypeInfo(die, type_die));
970965

971-
if (source_language_ == llvm::dwarf::DW_LANG_Go) {
966+
if (p.first == llvm::dwarf::DW_LANG_Go) {
972967
arg.retarg = IsGolangRetArg(die).ValueOr(false);
973968
}
974969

src/stirling/obj_tools/dwarf_reader.h

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@ class DwarfReader {
178178
StatusOr<llvm::DWARFDie> GetMatchingDIE(std::string_view name,
179179
std::optional<llvm::dwarf::Tag> type = {});
180180

181+
/**
182+
* Detects the source language and compiler from a DW_TAG_compile_unit's attributes.
183+
*/
184+
StatusOr<std::pair<llvm::dwarf::SourceLanguage, std::string>> DetectSourceLanguageFromCUDIE(
185+
const llvm::DWARFDie& die);
186+
181187
/**
182188
* Return the size of a struct.
183189
*/
@@ -291,16 +297,10 @@ class DwarfReader {
291297

292298
bool IsValid() const { return dwarf_context_->getNumCompileUnits() != 0; }
293299

294-
const llvm::dwarf::SourceLanguage& source_language() const { return source_language_; }
295-
const std::string& compiler() const { return compiler_; }
296-
297300
private:
298301
DwarfReader(std::unique_ptr<llvm::MemoryBuffer> buffer,
299302
std::unique_ptr<llvm::DWARFContext> dwarf_context);
300303

301-
// Detects the source language of the dwarf content being read.
302-
Status DetectSourceLanguage();
303-
304304
// Builds an index for certain commonly used DIE types (e.g. structs and functions).
305305
// When making multiple DwarfReader calls, this speeds up the process at the cost of some memory.
306306
//
@@ -317,12 +317,6 @@ class DwarfReader {
317317
void InsertToDIEMap(std::string name, llvm::dwarf::Tag tag, llvm::DWARFDie die);
318318
std::optional<llvm::DWARFDie> FindInDIEMap(const std::string& name, llvm::dwarf::Tag tag) const;
319319

320-
// Records the source language of the DWARF information.
321-
llvm::dwarf::SourceLanguage source_language_;
322-
323-
// Records the name of the compiler that produces this file.
324-
std::string compiler_;
325-
326320
std::unique_ptr<llvm::MemoryBuffer> memory_buffer_;
327321
std::unique_ptr<llvm::DWARFContext> dwarf_context_;
328322

src/stirling/obj_tools/dwarf_reader_test.cc

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,23 +125,32 @@ TEST_P(CppDwarfReaderTest, NonExistentPath) {
125125

126126
TEST_P(CppDwarfReaderTest, SourceLanguage) {
127127
{
128+
// Check that source language detect for individual DIEs works.
129+
ASSERT_OK_AND_ASSIGN(
130+
auto die, dwarf_reader->GetMatchingDIE("CanYouFindThis", llvm::dwarf::DW_TAG_subprogram));
131+
llvm::DWARFUnit* cu = die.getDwarfUnit();
132+
llvm::DWARFDie unit_die = cu->getUnitDIE();
133+
ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
128134
// We use C++17, but the dwarf shows 14.
129-
EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_C_plus_plus_14);
130-
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("clang"));
135+
EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_C_plus_plus_14);
136+
EXPECT_THAT(p.second, ::testing::HasSubstr("clang"));
131137
}
132138
}
133139

134140
TEST_P(GolangDwarfReaderTest, SourceLanguage) {
135141
{
136-
EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_Go);
137-
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("go"));
138-
142+
// Check that source language detect for individual DIEs works.
139143
ASSERT_OK_AND_ASSIGN(const bool uses_regabi, UsesRegABI());
140-
144+
ASSERT_OK_AND_ASSIGN(auto die, dwarf_reader->GetMatchingDIE("main.(*Vertex).Scale",
145+
llvm::dwarf::DW_TAG_subprogram));
146+
llvm::DWARFUnit* cu = die.getDwarfUnit();
147+
llvm::DWARFDie unit_die = cu->getUnitDIE();
148+
ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
149+
EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_Go);
141150
if (uses_regabi) {
142-
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("regabi"));
151+
EXPECT_THAT(p.second, ::testing::HasSubstr("regabi"));
143152
} else {
144-
EXPECT_THAT(dwarf_reader->compiler(), ::testing::Not(::testing::HasSubstr("regabi")));
153+
EXPECT_THAT(p.second, ::testing::Not(::testing::HasSubstr("regabi")));
145154
}
146155
}
147156
}

src/stirling/obj_tools/elf_reader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class ElfReader {
6262
const std::string& binary_path, const std::filesystem::path& debug_file_dir = kDebugFileDir);
6363

6464
std::filesystem::path& debug_symbols_path() { return debug_symbols_path_; }
65+
const std::string& binary_path() const { return binary_path_; }
6566

6667
struct SymbolInfo {
6768
std::string name;

src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.cc

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ StatusOr<ir::shared::Language> TransformSourceLanguage(
4242
return ir::shared::Language::GOLANG;
4343
case llvm::dwarf::DW_LANG_C:
4444
case llvm::dwarf::DW_LANG_C99:
45+
case llvm::dwarf::DW_LANG_C11:
4546
case llvm::dwarf::DW_LANG_C_plus_plus:
4647
case llvm::dwarf::DW_LANG_C_plus_plus_03:
4748
case llvm::dwarf::DW_LANG_C_plus_plus_11:
@@ -55,41 +56,38 @@ StatusOr<ir::shared::Language> TransformSourceLanguage(
5556

5657
} // namespace
5758

58-
void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
59-
ir::logical::TracepointDeployment* input_program) {
60-
ir::shared::Language detected_language = ir::shared::Language::LANG_UNKNOWN;
61-
59+
Status DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
60+
ir::logical::TracepointSpec* program, const std::string& symbol_name) {
6261
// Primary detection mechanism is DWARF info, when available.
6362
if (dwarf_reader != nullptr) {
64-
detected_language = TransformSourceLanguage(dwarf_reader->source_language())
65-
.ConsumeValueOr(ir::shared::Language::LANG_UNKNOWN);
63+
PX_ASSIGN_OR_RETURN(const auto& function_die,
64+
dwarf_reader->GetMatchingDIE(symbol_name, llvm::dwarf::DW_TAG_subprogram));
65+
llvm::DWARFUnit* cu = function_die.getDwarfUnit();
66+
llvm::DWARFDie unit_die = cu->getUnitDIE();
67+
68+
PX_ASSIGN_OR_RETURN(auto lang_pair, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
69+
llvm::dwarf::SourceLanguage source_lang = lang_pair.first;
70+
PX_ASSIGN_OR_RETURN(auto detected_language, TransformSourceLanguage(source_lang));
71+
72+
LOG(INFO) << absl::Substitute("Using language $0 for object $1 and others",
73+
magic_enum::enum_name(source_lang), elf_reader->binary_path());
74+
75+
program->set_language(detected_language);
76+
return Status::OK();
6677
} else {
6778
// Back-up detection policy looks for certain language-specific symbols
6879
if (IsGoExecutable(elf_reader)) {
69-
detected_language = ir::shared::Language::GOLANG;
80+
LOG(INFO) << absl::Substitute("Using language GOLANG for object $0 and others",
81+
elf_reader->binary_path());
82+
program->set_language(ir::shared::Language::GOLANG);
83+
return Status::OK();
7084
}
7185

7286
// TODO(oazizi): Make this stronger by adding more elf-based tests.
7387
}
7488

75-
if (detected_language != ir::shared::Language::LANG_UNKNOWN) {
76-
LOG(INFO) << absl::Substitute("Using language $0 for object $1 and others",
77-
magic_enum::enum_name(dwarf_reader->source_language()),
78-
input_program->deployment_spec().path_list().paths(0));
79-
80-
// Since we only support tracing of a single object, all tracepoints have the same language.
81-
for (auto& tracepoint : *input_program->mutable_tracepoints()) {
82-
tracepoint.mutable_program()->set_language(detected_language);
83-
}
84-
} else {
85-
// For now, just print a warning, and let the probe proceed.
86-
// This is so we can use things like function argument tracing even when other features may not
87-
// work.
88-
LOG(WARNING) << absl::Substitute(
89-
"Language for object $0 and others is unknown or unsupported, so assuming C/C++ ABI. "
90-
"Some dynamic tracing features may not work, or may produce unexpected results.",
91-
input_program->deployment_spec().path_list().paths(0));
92-
}
89+
return error::InvalidArgument("Unable to detect source language for object $0.",
90+
elf_reader->binary_path());
9391
}
9492
namespace {
9593

@@ -110,8 +108,9 @@ bool IsWholeWordSuffix(std::string_view name, std::string_view suffix) {
110108

111109
} // namespace
112110

113-
Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
114-
ir::logical::TracepointDeployment* input_program) {
111+
Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader,
112+
obj_tools::DwarfReader* dwarf_reader,
113+
ir::logical::TracepointDeployment* input_program) {
115114
// Expand symbol
116115
for (auto& t : *input_program->mutable_tracepoints()) {
117116
for (auto& probe : *t.mutable_program()->mutable_probes()) {
@@ -156,7 +155,10 @@ Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
156155
return error::Internal("Could not find valid symbol match");
157156
}
158157

159-
*probe.mutable_tracepoint()->mutable_symbol() = *symbol_name;
158+
auto tracepoint = probe.mutable_tracepoint();
159+
*tracepoint->mutable_symbol() = *symbol_name;
160+
PX_RETURN_IF_ERROR(
161+
DetectSourceLanguage(elf_reader, dwarf_reader, t.mutable_program(), *symbol_name));
160162
}
161163
}
162164

src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
#pragma once
2020

21+
#include <string>
22+
2123
#include "src/common/base/base.h"
2224
#include "src/stirling/obj_tools/dwarf_reader.h"
2325
#include "src/stirling/obj_tools/elf_reader.h"
@@ -32,17 +34,20 @@ namespace dynamic_tracing {
3234
* Uses ELF or DWARF information to detect the source language.
3335
* Populates the tracepoint program's language field in input_program.
3436
*/
35-
void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
36-
ir::logical::TracepointDeployment* input_program);
37+
Status DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
38+
ir::logical::TracepointSpec* program, const std::string& symbol_name);
3739

3840
/**
3941
* Uses ELF information to check if the provided symbol exists.
4042
* If it does not exist, it checks whether it is a short-hand (suffix) of a full symbol.
4143
* If it is a short-hand reference to a symbol, the symbol is replaced with the full-form.
42-
* Potentially modifies each tracepoint's symbol field in input_program.
44+
* Also detects the source language for each resolved symbol using DWARF or ELF information.
45+
* Potentially modifies each tracepoint's symbol field and program's language field in
46+
* input_program.
4347
*/
44-
Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
45-
ir::logical::TracepointDeployment* input_program);
48+
Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader,
49+
obj_tools::DwarfReader* dwarf_reader,
50+
ir::logical::TracepointDeployment* input_program);
4651

4752
/**
4853
* If any tracepoint in input_program contains no fields to trace, this function uses DWARF info

0 commit comments

Comments
 (0)