Skip to content

Commit cae8043

Browse files
authored
Stripped file and gnu_debuglink support take 2 (#596)
* Stripped file and gnu_debuglink support take 2 * Docstring for _file_crc32 * Removed pointless FIXME * _file_crc32 does not rewind
1 parent 108e184 commit cae8043

File tree

7 files changed

+108
-44
lines changed

7 files changed

+108
-44
lines changed

elftools/dwarf/dwarf_util.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# This code is in the public domain
88
#-------------------------------------------------------------------------------
99

10-
import os
10+
import os, binascii
1111
from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array
1212
from ..common.exceptions import DWARFError
1313
from ..common.utils import preserve_stream_pos, struct_parse
@@ -60,3 +60,14 @@ def _iter_CUs_in_section(stream, structs, parser):
6060
yield header
6161
offset = header.offset_after_length + header.unit_length
6262

63+
def _file_crc32(file):
64+
""" Provided a readable binary stream, reads the stream to the end
65+
and computes the CRC32 checksum of its contents,
66+
with the initial value of 0.
67+
"""
68+
d = file.read(4096)
69+
checksum = 0
70+
while len(d):
71+
checksum = binascii.crc32(d, checksum)
72+
d = file.read(4096)
73+
return checksum

elftools/dwarf/dwarfinfo.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def __init__(self,
8383
debug_rnglists_sec,
8484
debug_sup_sec,
8585
gnu_debugaltlink_sec,
86-
gnu_debuglink_sec,
8786
debug_types_sec
8887
):
8988
""" config:
@@ -113,7 +112,6 @@ def __init__(self,
113112
self.debug_rnglists_sec = debug_rnglists_sec
114113
self.debug_sup_sec = debug_sup_sec
115114
self.gnu_debugaltlink_sec = gnu_debugaltlink_sec
116-
self.gnu_debuglink_sec = gnu_debuglink_sec
117115
self.debug_types_sec = debug_types_sec
118116

119117
# Sets the supplementary_dwarfinfo to None. Client code can set this
@@ -716,7 +714,7 @@ def replace_value(data, content_type, replacer):
716714

717715
def parse_debugsupinfo(self):
718716
"""
719-
Extract a filename from .debug_sup, .gnu_debualtlink sections, or .gnu_debuglink.
717+
Extract a filename from .debug_sup, .gnu_debualtlink sections.
720718
"""
721719
if self.debug_sup_sec is not None:
722720
self.debug_sup_sec.stream.seek(0)
@@ -727,9 +725,9 @@ def parse_debugsupinfo(self):
727725
self.gnu_debugaltlink_sec.stream.seek(0)
728726
suplink = self.structs.Dwarf_debugaltlink.parse_stream(self.gnu_debugaltlink_sec.stream)
729727
return suplink.sup_filename
730-
if self.gnu_debuglink_sec is not None:
731-
self.gnu_debuglink_sec.stream.seek(0)
732-
suplink = self.structs.Dwarf_debuglink.parse_stream(self.gnu_debuglink_sec.stream)
733-
return suplink.sup_filename
728+
# The section .gnu_debuglink with similarly looking contents
729+
# has a different meaning - it doesn't point at supplementary DWARF,
730+
# which is meant to be referenced from primary DWARF,
731+
# it points at DWARF proper.
734732
return None
735733

elftools/dwarf/structs.py

-13
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,6 @@ def _create_structs(self):
177177

178178
self._create_debugsup()
179179
self._create_gnu_debugaltlink()
180-
self._create_gnu_debuglink()
181180

182181
def _create_initial_length(self):
183182
def _InitialLength(name):
@@ -275,18 +274,6 @@ def _create_gnu_debugaltlink(self):
275274
CString("sup_filename"),
276275
String("sup_checksum", length=20))
277276

278-
def _create_gnu_debuglink(self):
279-
self.Dwarf_debuglink = Struct('Elf_debuglink',
280-
CString("sup_filename"),
281-
Switch('', lambda ctx: (len(ctx.sup_filename) % 4),
282-
{
283-
0: String("sup_padding", length=3),
284-
1: String("sup_padding", length=2),
285-
2: String("sup_padding", length=1),
286-
3: String("sup_padding", length=0),
287-
}),
288-
String("sup_checksum", length=4))
289-
290277
def _create_dw_form(self):
291278
self.Dwarf_dw_form = dict(
292279
DW_FORM_addr=self.the_Dwarf_target_addr,

elftools/elf/elffile.py

+67-20
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from ..ehabi.ehabiinfo import EHABIInfo
3131
from .hash import ELFHashSection, GNUHashSection
3232
from .constants import SHN_INDICES
33+
from ..dwarf.dwarf_util import _file_crc32
3334

3435
class ELFFile(object):
3536
""" Creation: the constructor accepts a stream (file-like object) with the
@@ -94,15 +95,24 @@ def load_from_path(cls, path):
9495
ELFFile from it, setting up a correct stream_loader relative to the
9596
original file.
9697
"""
97-
base_directory = os.path.dirname(path)
98-
def loader(elf_path):
99-
# FIXME: use actual path instead of str/bytes
100-
if not os.path.isabs(elf_path):
101-
elf_path = os.path.join(base_directory,
102-
elf_path)
103-
return open(elf_path, 'rb')
10498
stream = open(path, 'rb')
105-
return ELFFile(stream, loader)
99+
return ELFFile(stream, ELFFile.make_relative_loader(path))
100+
101+
@staticmethod
102+
def make_relative_loader(base_path):
103+
""" Return a function that takes a potentially relative path,
104+
resolves it against base_path (bytes or str), and opens a file at that.
105+
106+
ELFFile uses functions like that for resolving DWARF links.
107+
"""
108+
if isinstance(base_path, str):
109+
base_path = base_path.encode('UTF-8') # resolver takes a bytes path
110+
base_directory = os.path.dirname(base_path)
111+
def loader(rel_path):
112+
if not os.path.isabs(rel_path):
113+
rel_path = os.path.join(base_directory, rel_path)
114+
return open(rel_path, 'rb')
115+
return loader
106116

107117
def num_sections(self):
108118
""" Number of sections in the file
@@ -172,6 +182,13 @@ def get_section_index(self, section_name):
172182
if self._section_name_map is None:
173183
self._make_section_name_map()
174184
return self._section_name_map.get(section_name, None)
185+
186+
def has_section(self, section_name):
187+
""" Section existence check by name, without the overhead of parsing if found.
188+
"""
189+
if self._section_name_map is None:
190+
self._make_section_name_map()
191+
return section_name in self._section_name_map
175192

176193
def iter_sections(self, type=None):
177194
""" Yield all the sections in the file. If the optional |type|
@@ -231,14 +248,18 @@ def address_offsets(self, start, size=1):
231248
end <= seg['p_vaddr'] + seg['p_filesz']):
232249
yield start - seg['p_vaddr'] + seg['p_offset']
233250

234-
def has_dwarf_info(self):
251+
def has_dwarf_info(self, strict=False):
235252
""" Check whether this file appears to have debugging information.
236253
We assume that if it has the .debug_info or .zdebug_info section, it
237254
has all the other required sections as well.
255+
256+
Unless you pass strict=True, the presence of .eh_frame section,
257+
which is DWARF adjacent but hardly DWARF proper, will count as debug info.
258+
Stripped files contain .eh_frame but none of the .[z]debug_xxx sections.
238259
"""
239-
return bool(self.get_section_by_name('.debug_info') or
240-
self.get_section_by_name('.zdebug_info') or
241-
self.get_section_by_name('.eh_frame'))
260+
return (self.has_section('.debug_info') or
261+
self.has_section('.zdebug_info') or
262+
(not strict and self.has_section('.eh_frame')))
242263

243264
def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True):
244265
""" Return a DWARFInfo object representing the debugging information in
@@ -247,23 +268,39 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True):
247268
If relocate_dwarf_sections is True, relocations for DWARF sections
248269
are looked up and applied.
249270
250-
If follow_links is True, we will try to load the supplementary
271+
If follow_links is True, we will try to load the external and/or supplementary
251272
object file (if any), and use it to resolve references and imports.
252273
"""
253-
# Expect that has_dwarf_info was called, so at least .debug_info is
274+
# Expect that has_dwarf_info() was called, so at least .debug_info is
254275
# present.
255276
# Sections that aren't found will be passed as None to DWARFInfo.
256277

278+
# TODO: support linking by build ID
279+
# https://sourceware.org/gdb/current/onlinedocs/gdb.html/Separate-Debug-Files.html
280+
281+
# A file may contain a debug link but not be stripped, so check for debug_info just in case
282+
debuglink_section = self.get_section_by_name('.gnu_debuglink')
283+
if debuglink_section and not self.has_dwarf_info(True) and follow_links and self.stream_loader:
284+
debuglink = struct_parse(self.structs.Gnu_debuglink, debuglink_section.stream, debuglink_section.header.sh_offset)
285+
with self.stream_loader(debuglink.filename) as ext_file:
286+
# Validate checksum...
287+
if _file_crc32(ext_file) != debuglink.checksum:
288+
raise ELFError('The linked DWARF file does not match the checksum in the link.')
289+
ext_file.seek(0, os.SEEK_SET)
290+
ext_elffile = ELFFile(ext_file, self.stream_loader)
291+
# Inheriting the stream loader like that might be wrong if the supplementary DWARF link in the other file
292+
# is relative to the other file's directory as opposed to this file's directory.
293+
return ext_elffile.get_dwarf_info(relocate_dwarf_sections=relocate_dwarf_sections, follow_links=True)
294+
257295
section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
258296
'.debug_str', '.debug_line', '.debug_frame',
259297
'.debug_loc', '.debug_ranges', '.debug_pubtypes',
260298
'.debug_pubnames', '.debug_addr',
261299
'.debug_str_offsets', '.debug_line_str',
262300
'.debug_loclists', '.debug_rnglists',
263-
'.debug_sup', '.gnu_debugaltlink', '.gnu_debuglink',
264-
'.debug_types')
301+
'.debug_sup', '.gnu_debugaltlink', '.debug_types')
265302

266-
compressed = bool(self.get_section_by_name('.zdebug_info'))
303+
compressed = self.has_section('.zdebug_info')
267304
if compressed:
268305
section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
269306

@@ -275,7 +312,7 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True):
275312
debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
276313
debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
277314
debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name,
278-
debug_sup_name, gnu_debugaltlink_name, gnu_debuglink, debug_types_sec_name,
315+
debug_sup_name, gnu_debugaltlink_name, debug_types_sec_name,
279316
eh_frame_sec_name) = section_names
280317

281318
debug_sections = {}
@@ -318,13 +355,23 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True):
318355
debug_rnglists_sec=debug_sections[debug_rnglists_sec_name],
319356
debug_sup_sec=debug_sections[debug_sup_name],
320357
gnu_debugaltlink_sec=debug_sections[gnu_debugaltlink_name],
321-
gnu_debuglink_sec=debug_sections[gnu_debuglink],
322358
debug_types_sec=debug_sections[debug_types_sec_name]
323359
)
324360
if follow_links:
325361
dwarfinfo.supplementary_dwarfinfo = self.get_supplementary_dwarfinfo(dwarfinfo)
326362
return dwarfinfo
327-
363+
364+
def has_dwarf_link(self):
365+
""" Whether the binary's debug info is in an
366+
external file. Use get_dwarf_link to retrieve the path to it.
367+
"""
368+
return self.has_section('.gnu_debuglink')
369+
370+
def get_dwarf_link(self):
371+
""" Read the .gnu_debuglink section, return an object with filename (as bytes) and checksum (as number) in it.
372+
"""
373+
section = self.get_section_by_name('.gnu_debuglink')
374+
return struct_parse(self.structs.Gnu_debuglink, section.stream, section.header.sh_offset) if section else None
328375

329376
def get_supplementary_dwarfinfo(self, dwarfinfo):
330377
"""

elftools/elf/structs.py

+7
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non
112112
self._create_riscv_attributes()
113113
self._create_elf_hash()
114114
self._create_gnu_hash()
115+
self._create_gnu_debuglink()
115116

116117
#-------------------------------- PRIVATE --------------------------------#
117118

@@ -562,3 +563,9 @@ def _create_gnu_hash(self):
562563
self.Elf_word('bloom_shift'),
563564
Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')),
564565
Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')))
566+
567+
def _create_gnu_debuglink(self):
568+
self.Gnu_debuglink = Struct('Gnu_debuglink',
569+
CString("filename"),
570+
Padding(lambda ctx: 3 - len(ctx.filename) % 4, strict=True),
571+
self.Elf_word("checksum"))

test/test_debuglink.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def subprograms_from_debuglink(self, elf: ELFFile) -> dict[str, (int, int)]:
4444
# Retrieve the subprograms from the DWARF info
4545
dwarf_info = elf.get_dwarf_info(follow_links=True, relocate_dwarf_sections=True)
4646

47-
if dwarf_info.supplementary_dwarfinfo:
48-
for CU in dwarf_info.supplementary_dwarfinfo.iter_CUs():
47+
if dwarf_info:
48+
for CU in dwarf_info.iter_CUs():
4949
for DIE in CU.iter_DIEs():
5050
if DIE.tag == 'DW_TAG_subprogram':
5151
attributes = DIE.attributes
@@ -70,9 +70,24 @@ def subprograms_from_debuglink(self, elf: ELFFile) -> dict[str, (int, int)]:
7070
def test_debuglink(self):
7171
with open('test/testfiles_for_unittests/debuglink', "rb") as elf_file:
7272
elf = ELFFile(elf_file, stream_loader=self.stream_loader)
73+
# Contains eh_frame and gnu_debuglink, but no DWARF
74+
self.assertTrue(elf.has_dwarf_info(False))
75+
self.assertFalse(elf.has_dwarf_info(True))
76+
self.assertTrue(elf.has_dwarf_link())
77+
78+
link = elf.get_dwarf_link()
79+
self.assertIsNotNone(link)
80+
self.assertEqual(link.filename, b'debuglink.debug')
81+
self.assertEqual(link.checksum, 0x29b7c5f1)
82+
7383
subprograms = self.subprograms_from_debuglink(elf)
7484
self.assertEqual(subprograms, {b'main': (0x1161, 0x52), b'addNumbers': (0x1149, 0x18)})
7585

86+
# Test the filesystem aware ELFFile loading
87+
elf = ELFFile.load_from_path('test/testfiles_for_unittests/debuglink')
88+
subprograms = self.subprograms_from_debuglink(elf)
89+
self.assertEqual(subprograms, {b'main': (0x1161, 0x52), b'addNumbers': (0x1149, 0x18)})
90+
7691
if __name__ == '__main__':
7792
unittest.main()
7893

test/test_refaddr_bitness.py

-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ def test_main(self):
5151
debug_rnglists_sec = None,
5252
debug_sup_sec = None,
5353
gnu_debugaltlink_sec = None,
54-
gnu_debuglink_sec = None,
5554
debug_types_sec=None
5655
)
5756

0 commit comments

Comments
 (0)