Skip to content

Commit e405fed

Browse files
dinkark-devDinkar Khandalekar
and
Dinkar Khandalekar
authored
Support for parsing .debug_types section introduced in DWARF version 4 (#520)
* Support for parsing .debug_types section introduced in DWARF version 4 Changes * Support for parsing type units from the .debug_types section * Added logic to generate debug_types with readelf tool * Added test binaries generated using IAR Embedded Workbench for ARM toolchain (dwarf_debug_types.elf) Known issues * When running unittests with new ELF binaries, * parsing of the .debug_frames section results in an infinite loop * parsing of the .debug_aranges section causes stream parsing errors * Updated dwarf_debug_types.elf test binary using IAR EWARM 9.40.1. Re-enabled aranges debug-dump test * Resolved merge conflicts with origin/master * Renamed and cleaned up Type Unit retrieval methods. Added/updated docstrings. Added link to binutils bug * Fixed docstrings with correct DWARF document references --------- Co-authored-by: Dinkar Khandalekar <contact@dinkar.dev>
1 parent a916fe5 commit e405fed

File tree

9 files changed

+447
-9
lines changed

9 files changed

+447
-9
lines changed

elftools/dwarf/dwarfinfo.py

+123-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# This code is in the public domain
88
#-------------------------------------------------------------------------------
99
import os
10-
from collections import namedtuple
10+
from collections import namedtuple, OrderedDict
1111
from bisect import bisect_right
1212

1313
from ..construct.lib.container import Container
@@ -16,6 +16,7 @@
1616
parse_cstring_from_stream)
1717
from .structs import DWARFStructs
1818
from .compileunit import CompileUnit
19+
from .typeunit import TypeUnit
1920
from .abbrevtable import AbbrevTable
2021
from .lineprogram import LineProgram
2122
from .callframe import CallFrameInfo
@@ -82,7 +83,8 @@ def __init__(self,
8283
debug_rnglists_sec,
8384
debug_sup_sec,
8485
gnu_debugaltlink_sec,
85-
gnu_debuglink_sec
86+
gnu_debuglink_sec,
87+
debug_types_sec
8688
):
8789
""" config:
8890
A DwarfConfig object
@@ -112,6 +114,7 @@ def __init__(self,
112114
self.debug_sup_sec = debug_sup_sec
113115
self.gnu_debugaltlink_sec = gnu_debugaltlink_sec
114116
self.gnu_debuglink_sec = gnu_debuglink_sec
117+
self.debug_types_sec = debug_types_sec
115118

116119
# Sets the supplementary_dwarfinfo to None. Client code can set this
117120
# to something else, typically a DWARFInfo file read from an ELFFile
@@ -136,6 +139,9 @@ def __init__(self,
136139
self._cu_cache = []
137140
self._cu_offsets_map = []
138141

142+
# DWARF v4 type units by sig8 - OrderedDict created on Reference
143+
self._type_units_by_sig = None
144+
139145
@property
140146
def has_debug_info(self):
141147
""" Return whether this contains debug information.
@@ -145,6 +151,11 @@ def has_debug_info(self):
145151
"""
146152
return bool(self.debug_info_sec)
147153

154+
def has_debug_types(self):
155+
""" Return whether this contains debug types information.
156+
"""
157+
return bool(self.debug_types_sec)
158+
148159
def get_DIE_from_lut_entry(self, lut_entry):
149160
""" Get the DIE from the pubnames or putbtypes lookup table entry.
150161
@@ -223,11 +234,32 @@ def get_CU_at(self, offset):
223234

224235
return self._cached_CU_at_offset(offset)
225236

237+
def get_TU_by_sig8(self, sig8):
238+
""" Find and return a Type Unit referenced by its signature
239+
240+
sig8:
241+
The 8 byte unique signature (as a 64-bit unsigned integer)
242+
243+
Returns the TU with the given type signature by parsing the
244+
.debug_types section.
245+
246+
"""
247+
self._parse_debug_types()
248+
tu = self._type_units_by_sig.get(sig8)
249+
if tu is None:
250+
raise KeyError("Signature %016x not found in .debug_types" % sig8)
251+
return tu
252+
226253
def iter_CUs(self):
227254
""" Yield all the compile units (CompileUnit objects) in the debug info
228255
"""
229256
return self._parse_CUs_iter()
230257

258+
def iter_TUs(self):
259+
"""Yield all the type units (TypeUnit objects) in the debug_types
260+
"""
261+
return self._parse_TUs_iter()
262+
231263
def get_abbrev_table(self, offset):
232264
""" Get an AbbrevTable from the given offset in the debug_abbrev
233265
section.
@@ -416,11 +448,53 @@ def _parse_CUs_iter(self, offset=0):
416448
# Compute the offset of the next CU in the section. The unit_length
417449
# field of the CU header contains its size not including the length
418450
# field itself.
419-
offset = ( offset +
420-
cu['unit_length'] +
421-
cu.structs.initial_length_field_size())
451+
offset = (offset +
452+
cu['unit_length'] +
453+
cu.structs.initial_length_field_size())
422454
yield cu
423455

456+
def _parse_TUs_iter(self, offset=0):
457+
""" Iterate Type Unit objects in order of appearance in the debug_types section.
458+
459+
offset:
460+
The offset of the first TU to yield. Additional iterations
461+
will return the sequential unit objects.
462+
463+
See .iter_TUs().
464+
"""
465+
if self.debug_types_sec is None:
466+
return
467+
468+
while offset < self.debug_types_sec.size:
469+
tu = self._parse_TU_at_offset(offset)
470+
# Compute the offset of the next TU in the section. The unit_length
471+
# field of the TU header contains its size not including the length
472+
# field itself.
473+
offset = (offset +
474+
tu['unit_length'] +
475+
tu.structs.initial_length_field_size())
476+
477+
yield tu
478+
479+
def _parse_debug_types(self):
480+
""" Check if the .debug_types section is previously parsed. If not,
481+
parse all TUs and store them in an OrderedDict using their unique
482+
64-bit signature as the key.
483+
484+
See .get_TU_by_sig8().
485+
"""
486+
if self._type_units_by_sig is not None:
487+
return
488+
self._type_units_by_sig = OrderedDict()
489+
490+
if self.debug_types_sec is None:
491+
return
492+
493+
# Collect all Type Units in the .debug_types section for access using
494+
# their 8-byte unique signature
495+
for tu in self._parse_TUs_iter():
496+
self._type_units_by_sig[tu['signature']] = tu
497+
424498
def _cached_CU_at_offset(self, offset):
425499
""" Return the CU with unit header at the given offset into the
426500
debug_info section from the cache. If not present, the unit is
@@ -493,6 +567,50 @@ def _parse_CU_at_offset(self, offset):
493567
cu_offset=offset,
494568
cu_die_offset=cu_die_offset)
495569

570+
def _parse_TU_at_offset(self, offset):
571+
""" Parse and return a Type Unit (TU) at the given offset in the debug_types stream.
572+
"""
573+
# Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v4
574+
# states that the first 32-bit word of the TU header determines
575+
# whether the TU is represented with 32-bit or 64-bit DWARF format.
576+
#
577+
# So we peek at the first word in the TU header to determine its
578+
# dwarf format. Based on it, we then create a new DWARFStructs
579+
# instance suitable for this TU and use it to parse the rest.
580+
#
581+
initial_length = struct_parse(
582+
self.structs.the_Dwarf_uint32, self.debug_types_sec.stream, offset)
583+
dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
584+
585+
# Temporary structs for parsing the header
586+
# The structs for the rest of the TUs depend on the header data.
587+
tu_structs = DWARFStructs(
588+
little_endian=self.config.little_endian,
589+
dwarf_format=dwarf_format,
590+
address_size=4,
591+
dwarf_version=2)
592+
593+
tu_header = struct_parse(
594+
tu_structs.Dwarf_TU_header, self.debug_types_sec.stream, offset)
595+
596+
# structs for the rest of the TU, taking into account bit-width and DWARF version
597+
tu_structs = DWARFStructs(
598+
little_endian=self.config.little_endian,
599+
dwarf_format=dwarf_format,
600+
address_size=tu_header['address_size'],
601+
dwarf_version=tu_header['version'])
602+
603+
tu_die_offset = self.debug_types_sec.stream.tell()
604+
dwarf_assert(
605+
self._is_supported_version(tu_header['version']),
606+
"Expected supported DWARF version. Got '%s'" % tu_header['version'])
607+
return TypeUnit(
608+
header=tu_header,
609+
dwarfinfo=self,
610+
structs=tu_structs,
611+
tu_offset=offset,
612+
tu_die_offset=tu_die_offset)
613+
496614
def _is_supported_version(self, version):
497615
""" DWARF version supported by this parser
498616
"""

elftools/dwarf/structs.py

+13
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ class DWARFStructs(object):
5353
Dwarf_CU_header (+):
5454
Compilation unit header
5555
56+
Dwarf_TU_header (+):
57+
Type unit header
58+
5659
Dwarf_abbrev_declaration (+):
5760
Abbreviation table declaration - doesn't include the initial
5861
code, only the contents.
@@ -160,6 +163,7 @@ def _create_structs(self):
160163
self._create_initial_length()
161164
self._create_leb128()
162165
self._create_cu_header()
166+
self._create_tu_header()
163167
self._create_abbrev_declaration()
164168
self._create_dw_form()
165169
self._create_lineprog_header()
@@ -237,6 +241,15 @@ def _create_cu_header(self):
237241
Embed(dwarfv4_CU_header),
238242
))
239243

244+
def _create_tu_header(self):
245+
self.Dwarf_TU_header = Struct('Dwarf_TU_header',
246+
self.Dwarf_initial_length('unit_length'),
247+
self.Dwarf_uint16('version'),
248+
self.Dwarf_offset('debug_abbrev_offset'),
249+
self.Dwarf_uint8('address_size'),
250+
self.Dwarf_uint64('signature'),
251+
self.Dwarf_offset('type_offset'))
252+
240253
def _create_abbrev_declaration(self):
241254
self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
242255
Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),

0 commit comments

Comments
 (0)