-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm-gsymutil] Add support for merged functions lookup differentiation #122409
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-debuginfo Author: None (alx32) ChangesThis update introduces the ability to filter merged functions during lookups based on regex patterns derived from call site information in a previous call to Full diff: https://github.com/llvm/llvm-project/pull/122409.diff 7 Files Affected:
diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 55f7322029d0fa..b09364c74db043 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -49,6 +49,15 @@ struct CallSiteInfo {
/// Bitwise OR of CallSiteInfo::Flags values
uint8_t Flags = CallSiteInfo::Flags::None;
+ /// Equality comparison operator for CallSiteInfo.
+ bool operator==(const CallSiteInfo &RHS) const {
+ return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex &&
+ Flags == RHS.Flags;
+ }
+
+ /// Inequality comparison operator for CallSiteInfo.
+ bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); }
+
/// Decode a CallSiteInfo object from a binary data stream.
///
/// \param Data The binary stream to read the data from.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c6..c4d8a8cc1795eb 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -49,6 +49,9 @@ struct LookupResult {
/// deepest inline function will appear at index zero in the source locations
/// array, and the concrete function will appear at the end of the array.
SourceLocations Locations;
+ ///< Function name regex patterns for call site
+ std::vector<StringRef> CallSiteFuncRegex;
+
std::string getSourceFile(uint32_t Index) const;
};
@@ -59,6 +62,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
return false;
if (LHS.FuncName != RHS.FuncName)
return false;
+ if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex)
+ return false;
return LHS.Locations == RHS.Locations;
}
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 785a8da64abe4c..41cf5f926cce75 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -301,6 +301,23 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
InlineInfoData = InfoData;
break;
+ case InfoType::CallSiteInfo:
+ if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) {
+ // Find matching call site based on relative offset
+ for (const auto &CS : CSIC->CallSites) {
+ // Check if the call site matches the lookup address
+ if (CS.ReturnOffset == Addr - FuncAddr) {
+ // Get regex patterns
+ for (uint32_t RegexOffset : CS.MatchRegex) {
+ LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset));
+ }
+ break;
+ }
+ }
+ } else {
+ return CSIC.takeError();
+ }
+
default:
break;
}
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
index 0ac0be6fda8f6a..e3f13cd7abdb8a 100644
--- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
if (IsInlined)
OS << " [inlined]";
}
+
+ if (!LR.CallSiteFuncRegex.empty()) {
+ OS << "\n +CallSites:";
+ for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) {
+ if (i > 0)
+ OS << ",";
+ OS << LR.CallSiteFuncRegex[i];
+ }
+ }
+
OS << '\n';
return OS;
}
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index 5001ffdeab9e20..dd09fa936d1990 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -42,6 +42,50 @@
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
+
+### Check that we can correctly resove merged functions using callstacks:
+### Resolve two callstacks containing merged functions.
+### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`.
+### The callstacks resolve differently based on the merged functions filter.
+### 0x00000001000003d0 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340
+### 0x00000001000003e8 =========================> 0x000000010000035c => 0x0000000100000340
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s
+# CHECK-C1: 0x00000001000003d0: main + 32 @ /tmp/tst/out/merged_funcs_test.cpp:63
+# CHECK-C1-NEXT: +CallSites:function2_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s
+# CHECK-C2: 0x000000010000037c: function_inlined + 8 @ /tmp/tst/out/merged_funcs_test.cpp:35 [inlined]
+# CHECK-C2-NEXT: function2_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:48
+# CHECK-C2-NEXT: +CallSites:function3_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s
+# CHECK-C3: Found 1 function at address 0x000000010000035c:
+# CHECK-C3-NEXT: 0x000000010000035c: function3_copy1 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C3-NEXT: +CallSites:function4_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s
+# CHECK-C4: Found 1 function at address 0x0000000100000340:
+# CHECK-C4-NEXT: 0x0000000100000340: function4_copy1 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+### ----------------------------------------------------------------------------------------------------------------------------------
+### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003e8 | FileCheck --check-prefix=CHECK-C5 %s
+# CHECK-C5: Found 1 function at address 0x00000001000003e8:
+# CHECK-C5-NEXT: 0x00000001000003e8: main + 56 @ /tmp/tst/out/merged_funcs_test.cpp:64
+# CHECK-C5-NEXT: +CallSites:function3_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s
+# CHECK-C6: Found 1 function at address 0x000000010000035c:
+# CHECK-C6-NEXT: 0x000000010000035c: function3_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C6-NEXT: +CallSites:function4_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy2" | FileCheck --check-prefix=CHECK-C7 %s
+# CHECK-C7: Found 1 function at address 0x0000000100000340:
+# CHECK-C7-NEXT: 0x0000000100000340: function4_copy2 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+
#--- merged_funcs_test.cpp
#define ATTRIB extern "C" __attribute__((noinline))
volatile int global_result = 0;
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 89cd3ce6fc4138..15bc064ba6f2cc 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,3 +46,8 @@ def addresses_from_stdin :
defm json_summary_file :
Eq<"json-summary-file",
"Output a categorized summary of errors into the JSON file specified.">;
+defm merged_functions_filter :
+ Eq<"merged-functions-filter",
+ "When used with --address/--addresses-from-stdin and --merged-functions,\n"
+ "filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
+ "Can be specified multiple times.">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 654da68bb69600..84934976be2c89 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin;
static bool UseMergedFunctions = false;
static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;
+static std::vector<std::string> MergedFunctionsFilters;
static void parseArgs(int argc, char **argv) {
GSYMUtilOptTable Tbl;
@@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) {
}
LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
+
+ for (const llvm::opt::Arg *A :
+ Args.filtered(OPT_merged_functions_filter_EQ)) {
+ MergedFunctionsFilters.push_back(A->getValue());
+ // Validate the filter is only used with correct flags
+ if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
+ llvm::errs() << ToolName
+ << ": --merged-functions-filter can only be used with "
+ "--address/--addresses-from-stdin\n";
+ std::exit(1);
+ }
+ if (!UseMergedFunctions) {
+ llvm::errs()
+ << ToolName
+ << ": --merged-functions-filter requires --merged-functions\n";
+ std::exit(1);
+ }
+ }
}
/// @}
@@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
- OS << "Found " << Results->size() << " functions at address "
- << HEX64(Addr) << ":\n";
+ // If we have filters, count matching results first
+ size_t NumMatching = Results->size();
+ if (!MergedFunctionsFilters.empty()) {
+ NumMatching = 0;
+ for (const auto &Result : *Results) {
+ bool Matches = false;
+ for (const auto &Filter : MergedFunctionsFilters) {
+ Regex Pattern(Filter);
+ if (Pattern.match(Result.FuncName)) {
+ Matches = true;
+ break;
+ }
+ }
+ if (Matches)
+ NumMatching++;
+ }
+ }
+
+ OS << "Found " << NumMatching << " function"
+ << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
+ << ":\n";
+
for (size_t i = 0; i < Results->size(); ++i) {
+ // Skip if doesn't match any filter
+ if (!MergedFunctionsFilters.empty()) {
+ bool Matches = false;
+ for (const auto &Filter : MergedFunctionsFilters) {
+ Regex Pattern(Filter);
+ if (Pattern.match(Results->at(i).FuncName)) {
+ Matches = true;
+ break;
+ }
+ }
+ if (!Matches)
+ continue;
+ }
+
OS << " " << Results->at(i);
if (i != Results->size() - 1)
@@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
}
}
+ // Don't print call site info if --merged-functions is not specified.
+ Result->CallSiteFuncRegex.clear();
OS << Result.get();
} else {
if (Verbose)
|
This update introduces the ability to filter merged functions during lookups based on regex patterns derived from call site information in a previous call to
llvm-gsymutil
. The regex patterns, extracted from call sites, can then be passed to subsequent calls using the--merged-functions-filter
option along with--merged-functions
and--address
(or--addresses-from-stdin
). This allows for precise filtering of functions during lookups, giving accurate results for call stacks that contain merged functions.