aboutsummaryrefslogtreecommitdiff
path: root/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
blob: 2a4eac77d94652a27837f761d9b93f3d3aac4f32 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H


#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/LineEntry.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorOr.h"

#include <inttypes.h>
#include <memory>
#include <stdint.h>
#include <string>
#include <vector>

namespace llvm {
class MemoryBuffer;
class raw_ostream;

namespace gsym {

/// GsymReader is used to read GSYM data from a file or buffer.
///
/// This class is optimized for very quick lookups when the endianness matches
/// the host system. The Header, address table, address info offsets, and file
/// table is designed to be mmap'ed as read only into memory and used without
/// any parsing needed. If the endianness doesn't match, we swap these objects
/// and tables into GsymReader::SwappedData and then point our header and
/// ArrayRefs to this swapped internal data.
///
/// GsymReader objects must use one of the static functions to create an
/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).

class GsymReader {
  GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
  llvm::Error parse();

  std::unique_ptr<MemoryBuffer> MemBuffer;
  StringRef GsymBytes;
  llvm::support::endianness Endian;
  const Header *Hdr = nullptr;
  ArrayRef<uint8_t> AddrOffsets;
  ArrayRef<uint32_t> AddrInfoOffsets;
  ArrayRef<FileEntry> Files;
  StringTable StrTab;
  /// When the GSYM file's endianness doesn't match the host system then
  /// we must decode all data structures that need to be swapped into
  /// local storage and set point the ArrayRef objects above to these swapped
  /// copies.
  struct SwappedData {
    Header Hdr;
    std::vector<uint8_t> AddrOffsets;
    std::vector<uint32_t> AddrInfoOffsets;
    std::vector<FileEntry> Files;
  };
  std::unique_ptr<SwappedData> Swap;

public:
  GsymReader(GsymReader &&RHS);
  ~GsymReader();

  /// Construct a GsymReader from a file on disk.
  ///
  /// \param Path The file path the GSYM file to read.
  /// \returns An expected GsymReader that contains the object or an error
  /// object that indicates reason for failing to read the GSYM.
  static llvm::Expected<GsymReader> openFile(StringRef Path);

  /// Construct a GsymReader from a buffer.
  ///
  /// \param Bytes A set of bytes that will be copied and owned by the
  /// returned object on success.
  /// \returns An expected GsymReader that contains the object or an error
  /// object that indicates reason for failing to read the GSYM.
  static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);

  /// Access the GSYM header.
  /// \returns A native endian version of the GSYM header.
  const Header &getHeader() const;

  /// Get the full function info for an address.
  ///
  /// This should be called when a client will store a copy of the complete
  /// FunctionInfo for a given address. For one off lookups, use the lookup()
  /// function below.
  ///
  /// Symbolication server processes might want to parse the entire function
  /// info for a given address and cache it if the process stays around to
  /// service many symbolication addresses, like for parsing profiling
  /// information.
  ///
  /// \param Addr A virtual address from the orignal object file to lookup.
  ///
  /// \returns An expected FunctionInfo that contains the function info object
  /// or an error object that indicates reason for failing to lookup the
  /// address.
  llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;

  /// Lookup an address in the a GSYM.
  ///
  /// Lookup just the information needed for a specific address \a Addr. This
  /// function is faster that calling getFunctionInfo() as it will only return
  /// information that pertains to \a Addr and allows the parsing to skip any
  /// extra information encoded for other addresses. For example the line table
  /// parsing can stop when a matching LineEntry has been fouhnd, and the
  /// InlineInfo can stop parsing early once a match has been found and also
  /// skip information that doesn't match. This avoids memory allocations and
  /// is much faster for lookups.
  ///
  /// \param Addr A virtual address from the orignal object file to lookup.
  /// \returns An expected LookupResult that contains only the information
  /// needed for the current address, or an error object that indicates reason
  /// for failing to lookup the address.
  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;

  /// Get a string from the string table.
  ///
  /// \param Offset The string table offset for the string to retrieve.
  /// \returns The string from the strin table.
  StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }

  /// Get the a file entry for the suppplied file index.
  ///
  /// Used to convert any file indexes in the FunctionInfo data back into
  /// files. This function can be used for iteration, but is more commonly used
  /// for random access when doing lookups.
  ///
  /// \param Index An index into the file table.
  /// \returns An optional FileInfo that will be valid if the file index is
  /// valid, or llvm::None if the file index is out of bounds,
  Optional<FileEntry> getFile(uint32_t Index) const {
    if (Index < Files.size())
      return Files[Index];
    return llvm::None;
  }

  /// Dump the entire Gsym data contained in this object.
  ///
  /// \param  OS The output stream to dump to.
  void dump(raw_ostream &OS);

  /// Dump a FunctionInfo object.
  ///
  /// This function will convert any string table indexes and file indexes
  /// into human readable format.
  ///
  /// \param  OS The output stream to dump to.
  ///
  /// \param FI The object to dump.
  void dump(raw_ostream &OS, const FunctionInfo &FI);

  /// Dump a LineTable object.
  ///
  /// This function will convert any string table indexes and file indexes
  /// into human readable format.
  ///
  ///
  /// \param  OS The output stream to dump to.
  ///
  /// \param LT The object to dump.
  void dump(raw_ostream &OS, const LineTable &LT);

  /// Dump a InlineInfo object.
  ///
  /// This function will convert any string table indexes and file indexes
  /// into human readable format.
  ///
  /// \param  OS The output stream to dump to.
  ///
  /// \param II The object to dump.
  ///
  /// \param Indent The indentation as number of spaces. Used for recurive
  /// dumping.
  void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);

  /// Dump a FileEntry object.
  ///
  /// This function will convert any string table indexes into human readable
  /// format.
  ///
  /// \param  OS The output stream to dump to.
  ///
  /// \param FE The object to dump.
  void dump(raw_ostream &OS, Optional<FileEntry> FE);

  /// Get the number of addresses in this Gsym file.
  uint32_t getNumAddresses() const {
    return Hdr->NumAddresses;
  }

  /// Gets an address from the address table.
  ///
  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
  ///
  /// \param Index A index into the address table.
  /// \returns A resolved virtual address for adddress in the address table
  /// or llvm::None if Index is out of bounds.
  Optional<uint64_t> getAddress(size_t Index) const;

protected:

  /// Get an appropriate address info offsets array.
  ///
  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
  /// internally as a array of bytes that are in the correct endianness. When
  /// we access this table we must get an array that matches those sizes. This
  /// templatized helper function is used when accessing address offsets in the
  /// AddrOffsets member variable.
  ///
  /// \returns An ArrayRef of an appropriate address offset size.
  template <class T> ArrayRef<T>
  getAddrOffsets() const {
    return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
                       AddrOffsets.size()/sizeof(T));
  }

  /// Get an appropriate address from the address table.
  ///
  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  /// byte address offsets from the The gsym::Header::BaseAddress. The table is
  /// stored internally as a array of bytes that are in the correct endianness.
  /// In order to extract an address from the address table we must access the
  /// address offset using the correct size and then add it to the BaseAddress
  /// in the header.
  ///
  /// \param Index An index into the AddrOffsets array.
  /// \returns An virtual address that matches the original object file for the
  /// address as the specified index, or llvm::None if Index is out of bounds.
  template <class T> Optional<uint64_t>
  addressForIndex(size_t Index) const {
    ArrayRef<T> AIO = getAddrOffsets<T>();
    if (Index < AIO.size())
      return AIO[Index] + Hdr->BaseAddress;
    return llvm::None;
  }
  /// Lookup an address offset in the AddrOffsets table.
  ///
  /// Given an address offset, look it up using a binary search of the
  /// AddrOffsets table.
  ///
  /// \param AddrOffset An address offset, that has already been computed by
  /// subtracting the gsym::Header::BaseAddress.
  /// \returns The matching address offset index. This index will be used to
  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  template <class T>
  llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const {
    ArrayRef<T> AIO = getAddrOffsets<T>();
    const auto Begin = AIO.begin();
    const auto End = AIO.end();
    auto Iter = std::lower_bound(Begin, End, AddrOffset);
    // Watch for addresses that fall between the gsym::Header::BaseAddress and
    // the first address offset.
    if (Iter == Begin && AddrOffset < *Begin)
      return llvm::None;
    if (Iter == End || AddrOffset < *Iter)
      --Iter;
    return std::distance(Begin, Iter);
  }

  /// Create a GSYM from a memory buffer.
  ///
  /// Called by both openFile() and copyBuffer(), this function does all of the
  /// work of parsing the GSYM file and returning an error.
  ///
  /// \param MemBuffer A memory buffer that will transfer ownership into the
  /// GsymReader.
  /// \returns An expected GsymReader that contains the object or an error
  /// object that indicates reason for failing to read the GSYM.
  static llvm::Expected<llvm::gsym::GsymReader>
  create(std::unique_ptr<MemoryBuffer> &MemBuffer);


  /// Given an address, find the address index.
  ///
  /// Binary search the address table and find the matching address index.
  ///
  /// \param Addr A virtual address that matches the original object file
  /// to lookup.
  /// \returns An index into the address table. This index can be used to
  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  /// Returns an error if the address isn't in the GSYM with details of why.
  Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;

  /// Given an address index, get the offset for the FunctionInfo.
  ///
  /// Looking up an address is done by finding the corresponding address
  /// index for the address. This index is then used to get the offset of the
  /// FunctionInfo data that we will decode using this function.
  ///
  /// \param Index An index into the address table.
  /// \returns An optional GSYM data offset for the offset of the FunctionInfo
  /// that needs to be decoded.
  Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
};

} // namespace gsym
} // namespace llvm

#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H