symbolize.cc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. // Copyright (c) 2006, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. //
  30. // Author: Satoru Takabayashi
  31. // Stack-footprint reduction work done by Raksit Ashok
  32. //
  33. // Implementation note:
  34. //
  35. // We don't use heaps but only use stacks. We want to reduce the
  36. // stack consumption so that the symbolizer can run on small stacks.
  37. //
  38. // Here are some numbers collected with GCC 4.1.0 on x86:
  39. // - sizeof(Elf32_Sym) = 16
  40. // - sizeof(Elf32_Shdr) = 40
  41. // - sizeof(Elf64_Sym) = 24
  42. // - sizeof(Elf64_Shdr) = 64
  43. //
  44. // This implementation is intended to be async-signal-safe but uses
  45. // some functions which are not guaranteed to be so, such as memchr()
  46. // and memmove(). We assume they are async-signal-safe.
  47. //
  48. // Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE
  49. // macro to add platform specific defines (e.g. OS_OPENBSD).
  50. #ifdef GLOG_BUILD_CONFIG_INCLUDE
  51. #include GLOG_BUILD_CONFIG_INCLUDE
  52. #endif // GLOG_BUILD_CONFIG_INCLUDE
  53. #include "utilities.h"
  54. #if defined(HAVE_SYMBOLIZE)
  55. #include <limits>
  56. #include "symbolize.h"
  57. #include "demangle.h"
  58. _START_GOOGLE_NAMESPACE_
  59. // We don't use assert() since it's not guaranteed to be
  60. // async-signal-safe. Instead we define a minimal assertion
  61. // macro. So far, we don't need pretty printing for __FILE__, etc.
  62. // A wrapper for abort() to make it callable in ? :.
  63. static int AssertFail() {
  64. abort();
  65. return 0; // Should not reach.
  66. }
  67. #define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
  68. static SymbolizeCallback g_symbolize_callback = NULL;
  69. void InstallSymbolizeCallback(SymbolizeCallback callback) {
  70. g_symbolize_callback = callback;
  71. }
  72. static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback =
  73. NULL;
  74. void InstallSymbolizeOpenObjectFileCallback(
  75. SymbolizeOpenObjectFileCallback callback) {
  76. g_symbolize_open_object_file_callback = callback;
  77. }
  78. // This function wraps the Demangle function to provide an interface
  79. // where the input symbol is demangled in-place.
  80. // To keep stack consumption low, we would like this function to not
  81. // get inlined.
  82. static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) {
  83. char demangled[256]; // Big enough for sane demangled symbols.
  84. if (Demangle(out, demangled, sizeof(demangled))) {
  85. // Demangling succeeded. Copy to out if the space allows.
  86. size_t len = strlen(demangled);
  87. if (len + 1 <= (size_t)out_size) { // +1 for '\0'.
  88. SAFE_ASSERT(len < sizeof(demangled));
  89. memmove(out, demangled, len + 1);
  90. }
  91. }
  92. }
  93. _END_GOOGLE_NAMESPACE_
  94. #if defined(__ELF__)
  95. #include <dlfcn.h>
  96. #if defined(OS_OPENBSD)
  97. #include <sys/exec_elf.h>
  98. #else
  99. #include <elf.h>
  100. #endif
  101. #include <errno.h>
  102. #include <fcntl.h>
  103. #include <limits.h>
  104. #include <stdint.h>
  105. #include <stdio.h>
  106. #include <stdlib.h>
  107. #include <stddef.h>
  108. #include <string.h>
  109. #include <sys/stat.h>
  110. #include <sys/types.h>
  111. #include <unistd.h>
  112. #include "symbolize.h"
  113. #include "config.h"
  114. #include "glog/raw_logging.h"
  115. // Re-runs fn until it doesn't cause EINTR.
  116. #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
  117. _START_GOOGLE_NAMESPACE_
  118. // Read up to "count" bytes from file descriptor "fd" into the buffer
  119. // starting at "buf" while handling short reads and EINTR. On
  120. // success, return the number of bytes read. Otherwise, return -1.
  121. static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) {
  122. SAFE_ASSERT(fd >= 0);
  123. SAFE_ASSERT(count <= std::numeric_limits<ssize_t>::max());
  124. char *buf0 = reinterpret_cast<char *>(buf);
  125. ssize_t num_bytes = 0;
  126. while (num_bytes < count) {
  127. ssize_t len;
  128. NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
  129. if (len < 0) { // There was an error other than EINTR.
  130. return -1;
  131. }
  132. if (len == 0) { // Reached EOF.
  133. break;
  134. }
  135. num_bytes += len;
  136. }
  137. SAFE_ASSERT(num_bytes <= count);
  138. return num_bytes;
  139. }
  140. // Read up to "count" bytes from "offset" in the file pointed by file
  141. // descriptor "fd" into the buffer starting at "buf". On success,
  142. // return the number of bytes read. Otherwise, return -1.
  143. static ssize_t ReadFromOffset(const int fd, void *buf,
  144. const size_t count, const off_t offset) {
  145. off_t off = lseek(fd, offset, SEEK_SET);
  146. if (off == (off_t)-1) {
  147. return -1;
  148. }
  149. return ReadPersistent(fd, buf, count);
  150. }
  151. // Try reading exactly "count" bytes from "offset" bytes in a file
  152. // pointed by "fd" into the buffer starting at "buf" while handling
  153. // short reads and EINTR. On success, return true. Otherwise, return
  154. // false.
  155. static bool ReadFromOffsetExact(const int fd, void *buf,
  156. const size_t count, const off_t offset) {
  157. ssize_t len = ReadFromOffset(fd, buf, count, offset);
  158. return len == count;
  159. }
  160. // Returns elf_header.e_type if the file pointed by fd is an ELF binary.
  161. static int FileGetElfType(const int fd) {
  162. ElfW(Ehdr) elf_header;
  163. if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
  164. return -1;
  165. }
  166. if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
  167. return -1;
  168. }
  169. return elf_header.e_type;
  170. }
  171. // Read the section headers in the given ELF binary, and if a section
  172. // of the specified type is found, set the output to this section header
  173. // and return true. Otherwise, return false.
  174. // To keep stack consumption low, we would like this function to not get
  175. // inlined.
  176. static ATTRIBUTE_NOINLINE bool
  177. GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset,
  178. ElfW(Word) type, ElfW(Shdr) *out) {
  179. // Read at most 16 section headers at a time to save read calls.
  180. ElfW(Shdr) buf[16];
  181. for (int i = 0; i < sh_num;) {
  182. const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
  183. const ssize_t num_bytes_to_read =
  184. (sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf);
  185. const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
  186. sh_offset + i * sizeof(buf[0]));
  187. SAFE_ASSERT(len % sizeof(buf[0]) == 0);
  188. const ssize_t num_headers_in_buf = len / sizeof(buf[0]);
  189. SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0]));
  190. for (int j = 0; j < num_headers_in_buf; ++j) {
  191. if (buf[j].sh_type == type) {
  192. *out = buf[j];
  193. return true;
  194. }
  195. }
  196. i += num_headers_in_buf;
  197. }
  198. return false;
  199. }
  200. // There is no particular reason to limit section name to 63 characters,
  201. // but there has (as yet) been no need for anything longer either.
  202. const int kMaxSectionNameLen = 64;
  203. // name_len should include terminating '\0'.
  204. bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
  205. ElfW(Shdr) *out) {
  206. ElfW(Ehdr) elf_header;
  207. if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
  208. return false;
  209. }
  210. ElfW(Shdr) shstrtab;
  211. off_t shstrtab_offset = (elf_header.e_shoff +
  212. elf_header.e_shentsize * elf_header.e_shstrndx);
  213. if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
  214. return false;
  215. }
  216. for (int i = 0; i < elf_header.e_shnum; ++i) {
  217. off_t section_header_offset = (elf_header.e_shoff +
  218. elf_header.e_shentsize * i);
  219. if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
  220. return false;
  221. }
  222. char header_name[kMaxSectionNameLen];
  223. if (sizeof(header_name) < name_len) {
  224. RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); "
  225. "section will not be found (even if present).", name, name_len);
  226. // No point in even trying.
  227. return false;
  228. }
  229. off_t name_offset = shstrtab.sh_offset + out->sh_name;
  230. ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
  231. if (n_read == -1) {
  232. return false;
  233. } else if (n_read != name_len) {
  234. // Short read -- name could be at end of file.
  235. continue;
  236. }
  237. if (memcmp(header_name, name, name_len) == 0) {
  238. return true;
  239. }
  240. }
  241. return false;
  242. }
  243. // Read a symbol table and look for the symbol containing the
  244. // pc. Iterate over symbols in a symbol table and look for the symbol
  245. // containing "pc". On success, return true and write the symbol name
  246. // to out. Otherwise, return false.
  247. // To keep stack consumption low, we would like this function to not get
  248. // inlined.
  249. static ATTRIBUTE_NOINLINE bool
  250. FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
  251. uint64_t symbol_offset, const ElfW(Shdr) *strtab,
  252. const ElfW(Shdr) *symtab) {
  253. if (symtab == NULL) {
  254. return false;
  255. }
  256. const int num_symbols = symtab->sh_size / symtab->sh_entsize;
  257. for (int i = 0; i < num_symbols;) {
  258. off_t offset = symtab->sh_offset + i * symtab->sh_entsize;
  259. // If we are reading Elf64_Sym's, we want to limit this array to
  260. // 32 elements (to keep stack consumption low), otherwise we can
  261. // have a 64 element Elf32_Sym array.
  262. #if __WORDSIZE == 64
  263. #define NUM_SYMBOLS 32
  264. #else
  265. #define NUM_SYMBOLS 64
  266. #endif
  267. // Read at most NUM_SYMBOLS symbols at once to save read() calls.
  268. ElfW(Sym) buf[NUM_SYMBOLS];
  269. const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset);
  270. SAFE_ASSERT(len % sizeof(buf[0]) == 0);
  271. const ssize_t num_symbols_in_buf = len / sizeof(buf[0]);
  272. SAFE_ASSERT(num_symbols_in_buf <= sizeof(buf)/sizeof(buf[0]));
  273. for (int j = 0; j < num_symbols_in_buf; ++j) {
  274. const ElfW(Sym)& symbol = buf[j];
  275. uint64_t start_address = symbol.st_value;
  276. start_address += symbol_offset;
  277. uint64_t end_address = start_address + symbol.st_size;
  278. if (symbol.st_value != 0 && // Skip null value symbols.
  279. symbol.st_shndx != 0 && // Skip undefined symbols.
  280. start_address <= pc && pc < end_address) {
  281. ssize_t len1 = ReadFromOffset(fd, out, out_size,
  282. strtab->sh_offset + symbol.st_name);
  283. if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
  284. return false;
  285. }
  286. return true; // Obtained the symbol name.
  287. }
  288. }
  289. i += num_symbols_in_buf;
  290. }
  291. return false;
  292. }
  293. // Get the symbol name of "pc" from the file pointed by "fd". Process
  294. // both regular and dynamic symbol tables if necessary. On success,
  295. // write the symbol name to "out" and return true. Otherwise, return
  296. // false.
  297. static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
  298. char *out, int out_size,
  299. uint64_t map_base_address) {
  300. // Read the ELF header.
  301. ElfW(Ehdr) elf_header;
  302. if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
  303. return false;
  304. }
  305. uint64_t symbol_offset = 0;
  306. if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment.
  307. ElfW(Phdr) phdr;
  308. // We need to find the PT_LOAD segment corresponding to the read-execute
  309. // file mapping in order to correctly perform the offset adjustment.
  310. for (unsigned i = 0; i != elf_header.e_phnum; ++i) {
  311. if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr),
  312. elf_header.e_phoff + i * sizeof(phdr)))
  313. return false;
  314. if (phdr.p_type == PT_LOAD &&
  315. (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) {
  316. // Find the mapped address corresponding to virtual address zero. We do
  317. // this by first adding p_offset. This gives us the mapped address of
  318. // the start of the segment, or in other words the mapped address
  319. // corresponding to the virtual address of the segment. (Note that this
  320. // is distinct from the start address, as p_offset is not guaranteed to
  321. // be page aligned.) We then subtract p_vaddr, which takes us to virtual
  322. // address zero.
  323. symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr;
  324. break;
  325. }
  326. }
  327. if (symbol_offset == 0)
  328. return false;
  329. }
  330. ElfW(Shdr) symtab, strtab;
  331. // Consult a regular symbol table first.
  332. if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
  333. SHT_SYMTAB, &symtab)) {
  334. if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
  335. symtab.sh_link * sizeof(symtab))) {
  336. return false;
  337. }
  338. if (FindSymbol(pc, fd, out, out_size, symbol_offset,
  339. &strtab, &symtab)) {
  340. return true; // Found the symbol in a regular symbol table.
  341. }
  342. }
  343. // If the symbol is not found, then consult a dynamic symbol table.
  344. if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
  345. SHT_DYNSYM, &symtab)) {
  346. if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
  347. symtab.sh_link * sizeof(symtab))) {
  348. return false;
  349. }
  350. if (FindSymbol(pc, fd, out, out_size, symbol_offset,
  351. &strtab, &symtab)) {
  352. return true; // Found the symbol in a dynamic symbol table.
  353. }
  354. }
  355. return false;
  356. }
  357. namespace {
  358. // Thin wrapper around a file descriptor so that the file descriptor
  359. // gets closed for sure.
  360. struct FileDescriptor {
  361. const int fd_;
  362. explicit FileDescriptor(int fd) : fd_(fd) {}
  363. ~FileDescriptor() {
  364. if (fd_ >= 0) {
  365. NO_INTR(close(fd_));
  366. }
  367. }
  368. int get() { return fd_; }
  369. private:
  370. explicit FileDescriptor(const FileDescriptor&);
  371. void operator=(const FileDescriptor&);
  372. };
  373. // Helper class for reading lines from file.
  374. //
  375. // Note: we don't use ProcMapsIterator since the object is big (it has
  376. // a 5k array member) and uses async-unsafe functions such as sscanf()
  377. // and snprintf().
  378. class LineReader {
  379. public:
  380. explicit LineReader(int fd, char *buf, int buf_len) : fd_(fd),
  381. buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) {
  382. }
  383. // Read '\n'-terminated line from file. On success, modify "bol"
  384. // and "eol", then return true. Otherwise, return false.
  385. //
  386. // Note: if the last line doesn't end with '\n', the line will be
  387. // dropped. It's an intentional behavior to make the code simple.
  388. bool ReadLine(const char **bol, const char **eol) {
  389. if (BufferIsEmpty()) { // First time.
  390. const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_);
  391. if (num_bytes <= 0) { // EOF or error.
  392. return false;
  393. }
  394. eod_ = buf_ + num_bytes;
  395. bol_ = buf_;
  396. } else {
  397. bol_ = eol_ + 1; // Advance to the next line in the buffer.
  398. SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_".
  399. if (!HasCompleteLine()) {
  400. const int incomplete_line_length = eod_ - bol_;
  401. // Move the trailing incomplete line to the beginning.
  402. memmove(buf_, bol_, incomplete_line_length);
  403. // Read text from file and append it.
  404. char * const append_pos = buf_ + incomplete_line_length;
  405. const int capacity_left = buf_len_ - incomplete_line_length;
  406. const ssize_t num_bytes = ReadPersistent(fd_, append_pos,
  407. capacity_left);
  408. if (num_bytes <= 0) { // EOF or error.
  409. return false;
  410. }
  411. eod_ = append_pos + num_bytes;
  412. bol_ = buf_;
  413. }
  414. }
  415. eol_ = FindLineFeed();
  416. if (eol_ == NULL) { // '\n' not found. Malformed line.
  417. return false;
  418. }
  419. *eol_ = '\0'; // Replace '\n' with '\0'.
  420. *bol = bol_;
  421. *eol = eol_;
  422. return true;
  423. }
  424. // Beginning of line.
  425. const char *bol() {
  426. return bol_;
  427. }
  428. // End of line.
  429. const char *eol() {
  430. return eol_;
  431. }
  432. private:
  433. explicit LineReader(const LineReader&);
  434. void operator=(const LineReader&);
  435. char *FindLineFeed() {
  436. return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_));
  437. }
  438. bool BufferIsEmpty() {
  439. return buf_ == eod_;
  440. }
  441. bool HasCompleteLine() {
  442. return !BufferIsEmpty() && FindLineFeed() != NULL;
  443. }
  444. const int fd_;
  445. char * const buf_;
  446. const int buf_len_;
  447. char *bol_;
  448. char *eol_;
  449. const char *eod_; // End of data in "buf_".
  450. };
  451. } // namespace
  452. // Place the hex number read from "start" into "*hex". The pointer to
  453. // the first non-hex character or "end" is returned.
  454. static char *GetHex(const char *start, const char *end, uint64_t *hex) {
  455. *hex = 0;
  456. const char *p;
  457. for (p = start; p < end; ++p) {
  458. int ch = *p;
  459. if ((ch >= '0' && ch <= '9') ||
  460. (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
  461. *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
  462. } else { // Encountered the first non-hex character.
  463. break;
  464. }
  465. }
  466. SAFE_ASSERT(p <= end);
  467. return const_cast<char *>(p);
  468. }
  469. // Searches for the object file (from /proc/self/maps) that contains
  470. // the specified pc. If found, sets |start_address| to the start address
  471. // of where this object file is mapped in memory, sets the module base
  472. // address into |base_address|, copies the object file name into
  473. // |out_file_name|, and attempts to open the object file. If the object
  474. // file is opened successfully, returns the file descriptor. Otherwise,
  475. // returns -1. |out_file_name_size| is the size of the file name buffer
  476. // (including the null-terminator).
  477. static ATTRIBUTE_NOINLINE int
  478. OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
  479. uint64_t &start_address,
  480. uint64_t &base_address,
  481. char *out_file_name,
  482. int out_file_name_size) {
  483. int object_fd;
  484. // Open /proc/self/maps.
  485. int maps_fd;
  486. NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
  487. FileDescriptor wrapped_maps_fd(maps_fd);
  488. if (wrapped_maps_fd.get() < 0) {
  489. return -1;
  490. }
  491. // Iterate over maps and look for the map containing the pc. Then
  492. // look into the symbol tables inside.
  493. char buf[1024]; // Big enough for line of sane /proc/self/maps
  494. int num_maps = 0;
  495. LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf));
  496. while (true) {
  497. num_maps++;
  498. const char *cursor;
  499. const char *eol;
  500. if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line.
  501. return -1;
  502. }
  503. // Start parsing line in /proc/self/maps. Here is an example:
  504. //
  505. // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat
  506. //
  507. // We want start address (08048000), end address (0804c000), flags
  508. // (r-xp) and file name (/bin/cat).
  509. // Read start address.
  510. cursor = GetHex(cursor, eol, &start_address);
  511. if (cursor == eol || *cursor != '-') {
  512. return -1; // Malformed line.
  513. }
  514. ++cursor; // Skip '-'.
  515. // Read end address.
  516. uint64_t end_address;
  517. cursor = GetHex(cursor, eol, &end_address);
  518. if (cursor == eol || *cursor != ' ') {
  519. return -1; // Malformed line.
  520. }
  521. ++cursor; // Skip ' '.
  522. // Check start and end addresses.
  523. if (!(start_address <= pc && pc < end_address)) {
  524. continue; // We skip this map. PC isn't in this map.
  525. }
  526. // Read flags. Skip flags until we encounter a space or eol.
  527. const char * const flags_start = cursor;
  528. while (cursor < eol && *cursor != ' ') {
  529. ++cursor;
  530. }
  531. // We expect at least four letters for flags (ex. "r-xp").
  532. if (cursor == eol || cursor < flags_start + 4) {
  533. return -1; // Malformed line.
  534. }
  535. // Check flags. We are only interested in "r*x" maps.
  536. if (flags_start[0] != 'r' || flags_start[2] != 'x') {
  537. continue; // We skip this map.
  538. }
  539. ++cursor; // Skip ' '.
  540. // Read file offset.
  541. uint64_t file_offset;
  542. cursor = GetHex(cursor, eol, &file_offset);
  543. if (cursor == eol || *cursor != ' ') {
  544. return -1; // Malformed line.
  545. }
  546. ++cursor; // Skip ' '.
  547. // Don't subtract 'start_address' from the first entry:
  548. // * If a binary is compiled w/o -pie, then the first entry in
  549. // process maps is likely the binary itself (all dynamic libs
  550. // are mapped higher in address space). For such a binary,
  551. // instruction offset in binary coincides with the actual
  552. // instruction address in virtual memory (as code section
  553. // is mapped to a fixed memory range).
  554. // * If a binary is compiled with -pie, all the modules are
  555. // mapped high at address space (in particular, higher than
  556. // shadow memory of the tool), so the module can't be the
  557. // first entry.
  558. base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
  559. // Skip to file name. "cursor" now points to dev. We need to
  560. // skip at least two spaces for dev and inode.
  561. int num_spaces = 0;
  562. while (cursor < eol) {
  563. if (*cursor == ' ') {
  564. ++num_spaces;
  565. } else if (num_spaces >= 2) {
  566. // The first non-space character after skipping two spaces
  567. // is the beginning of the file name.
  568. break;
  569. }
  570. ++cursor;
  571. }
  572. if (cursor == eol) {
  573. return -1; // Malformed line.
  574. }
  575. // Finally, "cursor" now points to file name of our interest.
  576. NO_INTR(object_fd = open(cursor, O_RDONLY));
  577. if (object_fd < 0) {
  578. // Failed to open object file. Copy the object file name to
  579. // |out_file_name|.
  580. strncpy(out_file_name, cursor, out_file_name_size);
  581. // Making sure |out_file_name| is always null-terminated.
  582. out_file_name[out_file_name_size - 1] = '\0';
  583. return -1;
  584. }
  585. return object_fd;
  586. }
  587. }
  588. // POSIX doesn't define any async-signal safe function for converting
  589. // an integer to ASCII. We'll have to define our own version.
  590. // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
  591. // conversion was successful or NULL otherwise. It never writes more than "sz"
  592. // bytes. Output will be truncated as needed, and a NUL character is always
  593. // appended.
  594. // NOTE: code from sandbox/linux/seccomp-bpf/demo.cc.
  595. char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) {
  596. // Make sure we can write at least one NUL byte.
  597. size_t n = 1;
  598. if (n > sz)
  599. return NULL;
  600. if (base < 2 || base > 16) {
  601. buf[0] = '\000';
  602. return NULL;
  603. }
  604. char *start = buf;
  605. uintptr_t j = i;
  606. // Handle negative numbers (only for base 10).
  607. if (i < 0 && base == 10) {
  608. j = -i;
  609. // Make sure we can write the '-' character.
  610. if (++n > sz) {
  611. buf[0] = '\000';
  612. return NULL;
  613. }
  614. *start++ = '-';
  615. }
  616. // Loop until we have converted the entire number. Output at least one
  617. // character (i.e. '0').
  618. char *ptr = start;
  619. do {
  620. // Make sure there is still enough space left in our output buffer.
  621. if (++n > sz) {
  622. buf[0] = '\000';
  623. return NULL;
  624. }
  625. // Output the next digit.
  626. *ptr++ = "0123456789abcdef"[j % base];
  627. j /= base;
  628. if (padding > 0)
  629. padding--;
  630. } while (j > 0 || padding > 0);
  631. // Terminate the output with a NUL character.
  632. *ptr = '\000';
  633. // Conversion to ASCII actually resulted in the digits being in reverse
  634. // order. We can't easily generate them in forward order, as we can't tell
  635. // the number of characters needed until we are done converting.
  636. // So, now, we reverse the string (except for the possible "-" sign).
  637. while (--ptr > start) {
  638. char ch = *ptr;
  639. *ptr = *start;
  640. *start++ = ch;
  641. }
  642. return buf;
  643. }
  644. // Safely appends string |source| to string |dest|. Never writes past the
  645. // buffer size |dest_size| and guarantees that |dest| is null-terminated.
  646. void SafeAppendString(const char* source, char* dest, int dest_size) {
  647. int dest_string_length = strlen(dest);
  648. SAFE_ASSERT(dest_string_length < dest_size);
  649. dest += dest_string_length;
  650. dest_size -= dest_string_length;
  651. strncpy(dest, source, dest_size);
  652. // Making sure |dest| is always null-terminated.
  653. dest[dest_size - 1] = '\0';
  654. }
  655. // Converts a 64-bit value into a hex string, and safely appends it to |dest|.
  656. // Never writes past the buffer size |dest_size| and guarantees that |dest| is
  657. // null-terminated.
  658. void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) {
  659. // 64-bit numbers in hex can have up to 16 digits.
  660. char buf[17] = {'\0'};
  661. SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size);
  662. }
  663. // The implementation of our symbolization routine. If it
  664. // successfully finds the symbol containing "pc" and obtains the
  665. // symbol name, returns true and write the symbol name to "out".
  666. // Otherwise, returns false. If Callback function is installed via
  667. // InstallSymbolizeCallback(), the function is also called in this function,
  668. // and "out" is used as its output.
  669. // To keep stack consumption low, we would like this function to not
  670. // get inlined.
  671. static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
  672. int out_size) {
  673. uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
  674. uint64_t start_address = 0;
  675. uint64_t base_address = 0;
  676. int object_fd = -1;
  677. if (out_size < 1) {
  678. return false;
  679. }
  680. out[0] = '\0';
  681. SafeAppendString("(", out, out_size);
  682. if (g_symbolize_open_object_file_callback) {
  683. object_fd = g_symbolize_open_object_file_callback(pc0, start_address,
  684. base_address, out + 1,
  685. out_size - 1);
  686. } else {
  687. object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address,
  688. base_address,
  689. out + 1,
  690. out_size - 1);
  691. }
  692. // Check whether a file name was returned.
  693. if (object_fd < 0) {
  694. if (out[1]) {
  695. // The object file containing PC was determined successfully however the
  696. // object file was not opened successfully. This is still considered
  697. // success because the object file name and offset are known and tools
  698. // like asan_symbolize.py can be used for the symbolization.
  699. out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated.
  700. SafeAppendString("+0x", out, out_size);
  701. SafeAppendHexNumber(pc0 - base_address, out, out_size);
  702. SafeAppendString(")", out, out_size);
  703. return true;
  704. }
  705. // Failed to determine the object file containing PC. Bail out.
  706. return false;
  707. }
  708. FileDescriptor wrapped_object_fd(object_fd);
  709. int elf_type = FileGetElfType(wrapped_object_fd.get());
  710. if (elf_type == -1) {
  711. return false;
  712. }
  713. if (g_symbolize_callback) {
  714. // Run the call back if it's installed.
  715. // Note: relocation (and much of the rest of this code) will be
  716. // wrong for prelinked shared libraries and PIE executables.
  717. uint64 relocation = (elf_type == ET_DYN) ? start_address : 0;
  718. int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(),
  719. pc, out, out_size,
  720. relocation);
  721. if (num_bytes_written > 0) {
  722. out += num_bytes_written;
  723. out_size -= num_bytes_written;
  724. }
  725. }
  726. if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
  727. out, out_size, base_address)) {
  728. return false;
  729. }
  730. // Symbolization succeeded. Now we try to demangle the symbol.
  731. DemangleInplace(out, out_size);
  732. return true;
  733. }
  734. _END_GOOGLE_NAMESPACE_
  735. #elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
  736. #include <dlfcn.h>
  737. #include <string.h>
  738. _START_GOOGLE_NAMESPACE_
  739. static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
  740. int out_size) {
  741. Dl_info info;
  742. if (dladdr(pc, &info)) {
  743. if ((int)strlen(info.dli_sname) < out_size) {
  744. strcpy(out, info.dli_sname);
  745. // Symbolization succeeded. Now we try to demangle the symbol.
  746. DemangleInplace(out, out_size);
  747. return true;
  748. }
  749. }
  750. return false;
  751. }
  752. _END_GOOGLE_NAMESPACE_
  753. #else
  754. # error BUG: HAVE_SYMBOLIZE was wrongly set
  755. #endif
  756. _START_GOOGLE_NAMESPACE_
  757. bool Symbolize(void *pc, char *out, int out_size) {
  758. SAFE_ASSERT(out_size >= 0);
  759. return SymbolizeAndDemangle(pc, out, out_size);
  760. }
  761. _END_GOOGLE_NAMESPACE_
  762. #else /* HAVE_SYMBOLIZE */
  763. #include <assert.h>
  764. #include "config.h"
  765. _START_GOOGLE_NAMESPACE_
  766. // TODO: Support other environments.
  767. bool Symbolize(void *pc, char *out, int out_size) {
  768. assert(0);
  769. return false;
  770. }
  771. _END_GOOGLE_NAMESPACE_
  772. #endif