diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index 55f7322029d0fa..b09364c74db043 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -49,6 +49,15 @@ struct CallSiteInfo { /// Bitwise OR of CallSiteInfo::Flags values uint8_t Flags = CallSiteInfo::Flags::None; + /// Equality comparison operator for CallSiteInfo. + bool operator==(const CallSiteInfo &RHS) const { + return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex && + Flags == RHS.Flags; + } + + /// Inequality comparison operator for CallSiteInfo. + bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); } + /// Decode a CallSiteInfo object from a binary data stream. /// /// \param Data The binary stream to read the data from. diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h index 9ccc96fbb4d5c6..98483805d066c3 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h @@ -49,6 +49,33 @@ struct LookupResult { /// deepest inline function will appear at index zero in the source locations /// array, and the concrete function will appear at the end of the array. SourceLocations Locations; + + /// Function name regex patterns associated with a call site at the lookup + /// address. This vector will be populated when: + /// 1. The lookup address matches a call site's return address in a function + /// 2. The call site has associated regex patterns that describe what + /// functions can be called from that location + /// + /// The regex patterns can be used to validate function calls during runtime + /// checking or symbolication. For example: + /// - Patterns like "^foo$" indicate the call site can only call function + /// "foo" + /// - Patterns like "^std::" indicate the call site can call any function in + /// the std namespace + /// - Multiple patterns allow matching against a set of allowed functions + /// + /// The patterns are stored as string references into the GSYM string table. + /// This information is typically loaded from: + /// - DWARF debug info call site entries + /// - External YAML files specifying call site patterns + /// - Other debug info formats that encode call site constraints + /// + /// The patterns will be empty if: + /// - The lookup address is not at the return address of a call site + /// - The call site has no associated function name constraints + /// - Call site info was not included when creating the GSYM file + std::vector CallSiteFuncRegex; + std::string getSourceFile(uint32_t Index) const; }; @@ -59,6 +86,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) { return false; if (LHS.FuncName != RHS.FuncName) return false; + if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex) + return false; return LHS.Locations == RHS.Locations; } diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index 785a8da64abe4c..41cf5f926cce75 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -301,6 +301,23 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR, InlineInfoData = InfoData; break; + case InfoType::CallSiteInfo: + if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) { + // Find matching call site based on relative offset + for (const auto &CS : CSIC->CallSites) { + // Check if the call site matches the lookup address + if (CS.ReturnOffset == Addr - FuncAddr) { + // Get regex patterns + for (uint32_t RegexOffset : CS.MatchRegex) { + LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset)); + } + break; + } + } + } else { + return CSIC.takeError(); + } + default: break; } diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp index 0ac0be6fda8f6a..f906284455ebde 100644 --- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp +++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp @@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) { if (IsInlined) OS << " [inlined]"; } + + if (!LR.CallSiteFuncRegex.empty()) { + OS << "\n CallSites: "; + for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) { + if (i > 0) + OS << ", "; + OS << LR.CallSiteFuncRegex[i]; + } + } + OS << '\n'; return OS; } diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml index 5001ffdeab9e20..b9e18dee3238f6 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml @@ -42,6 +42,50 @@ # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2] # CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1] + +### Check that we can correctly resove merged functions using callstacks: +### Resolve two callstacks containing merged functions. +### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`. +### The callstacks resolve differently based on the merged functions filter. +### 0x00000001000003d0 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340 +### 0x00000001000003e8 =========================> 0x000000010000035c => 0x0000000100000340 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s +# CHECK-C1: 0x00000001000003d0: main + 32 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:63 +# CHECK-C1-NEXT: CallSites: function2_copy2 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s +# CHECK-C2: 0x000000010000037c: function_inlined + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:35 [inlined] +# CHECK-C2-NEXT: function2_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:48 +# CHECK-C2-NEXT: CallSites: function3_copy1 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s +# CHECK-C3: Found 1 function at address 0x000000010000035c: +# CHECK-C3-NEXT: 0x000000010000035c: function3_copy1 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:28 +# CHECK-C3-NEXT: CallSites: function4_copy1 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s +# CHECK-C4: Found 1 function at address 0x0000000100000340: +# CHECK-C4-NEXT: 0x0000000100000340: function4_copy1 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:14 + +### ---------------------------------------------------------------------------------------------------------------------------------- +### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003e8 --merged-functions | FileCheck --check-prefix=CHECK-C5 %s +# CHECK-C5: Found 1 function at address 0x00000001000003e8: +# CHECK-C5-NEXT: 0x00000001000003e8: main + 56 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:64 +# CHECK-C5-NEXT: CallSites: function3_copy2 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s +# CHECK-C6: Found 1 function at address 0x000000010000035c: +# CHECK-C6-NEXT: 0x000000010000035c: function3_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:28 +# CHECK-C6-NEXT: CallSites: function4_copy2 + +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --merged-functions-filter="function4_copy2" --address=0x0000000100000340 | FileCheck --check-prefix=CHECK-C7 %s +# CHECK-C7: Found 1 function at address 0x0000000100000340: +# CHECK-C7-NEXT: 0x0000000100000340: function4_copy2 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:14 + + #--- merged_funcs_test.cpp #define ATTRIB extern "C" __attribute__((noinline)) volatile int global_result = 0; diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index 89cd3ce6fc4138..15bc064ba6f2cc 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -46,3 +46,8 @@ def addresses_from_stdin : defm json_summary_file : Eq<"json-summary-file", "Output a categorized summary of errors into the JSON file specified.">; +defm merged_functions_filter : + Eq<"merged-functions-filter", + "When used with --address/--addresses-from-stdin and --merged-functions,\n" + "filters the merged functions output to only show functions matching any of the specified regex patterns.\n" + "Can be specified multiple times.">; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 654da68bb69600..84934976be2c89 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin; static bool UseMergedFunctions = false; static bool LoadDwarfCallSites = false; static std::string CallSiteYamlPath; +static std::vector MergedFunctionsFilters; static void parseArgs(int argc, char **argv) { GSYMUtilOptTable Tbl; @@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) { } LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites); + + for (const llvm::opt::Arg *A : + Args.filtered(OPT_merged_functions_filter_EQ)) { + MergedFunctionsFilters.push_back(A->getValue()); + // Validate the filter is only used with correct flags + if (LookupAddresses.empty() && !LookupAddressesFromStdin) { + llvm::errs() << ToolName + << ": --merged-functions-filter can only be used with " + "--address/--addresses-from-stdin\n"; + std::exit(1); + } + if (!UseMergedFunctions) { + llvm::errs() + << ToolName + << ": --merged-functions-filter requires --merged-functions\n"; + std::exit(1); + } + } } /// @} @@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) { static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) { if (UseMergedFunctions) { if (auto Results = Gsym.lookupAll(Addr)) { - OS << "Found " << Results->size() << " functions at address " - << HEX64(Addr) << ":\n"; + // If we have filters, count matching results first + size_t NumMatching = Results->size(); + if (!MergedFunctionsFilters.empty()) { + NumMatching = 0; + for (const auto &Result : *Results) { + bool Matches = false; + for (const auto &Filter : MergedFunctionsFilters) { + Regex Pattern(Filter); + if (Pattern.match(Result.FuncName)) { + Matches = true; + break; + } + } + if (Matches) + NumMatching++; + } + } + + OS << "Found " << NumMatching << " function" + << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr) + << ":\n"; + for (size_t i = 0; i < Results->size(); ++i) { + // Skip if doesn't match any filter + if (!MergedFunctionsFilters.empty()) { + bool Matches = false; + for (const auto &Filter : MergedFunctionsFilters) { + Regex Pattern(Filter); + if (Pattern.match(Results->at(i).FuncName)) { + Matches = true; + break; + } + } + if (!Matches) + continue; + } + OS << " " << Results->at(i); if (i != Results->size() - 1) @@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) { OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; } } + // Don't print call site info if --merged-functions is not specified. + Result->CallSiteFuncRegex.clear(); OS << Result.get(); } else { if (Verbose)