@@ -115,7 +115,10 @@ def log(msg: str) -> None:
115
115
# f"{root}-{pid}-{tslug}.out",
116
116
]:
117
117
with open (filename , "a" ) as f :
118
- print (f"{ pid } :{ tslug } : { msg } " , file = f , flush = True )
118
+ try :
119
+ print (f"{ pid } :{ tslug } : { msg } " , file = f , flush = True )
120
+ except UnicodeError :
121
+ print (f"{ pid } :{ tslug } : { ascii (msg )} " , file = f , flush = True )
119
122
120
123
def arg_repr (arg : Any ) -> str :
121
124
"""Make a customized repr for logged values."""
@@ -176,7 +179,14 @@ def _decorator(meth: AnyCallable) -> AnyCallable:
176
179
177
180
178
181
class InstructionWalker :
179
- """Utility to step through trails of instructions."""
182
+ """Utility to step through trails of instructions.
183
+
184
+ We have two reasons to need sequences of instructions from a code object:
185
+ First, in strict sequence to visit all the instructions in the object.
186
+ This is `walk(follow_jumps=False)`. Second, we want to follow jumps to
187
+ understand how execution will flow: `walk(follow_jumps=True)`.
188
+
189
+ """
180
190
181
191
def __init__ (self , code : CodeType ) -> None :
182
192
self .code = code
@@ -213,19 +223,35 @@ def walk(
213
223
def populate_branch_trails (code : CodeType , code_info : CodeInfo ) -> None :
214
224
"""
215
225
Populate the `branch_trails` attribute on `code_info`.
226
+
227
+ Instructions can have a jump_target, where they might jump to next. Some
228
+ instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so
229
+ they aren't interesting to us, since they aren't the start of a branch
230
+ possibility.
231
+
232
+ Instructions that might or might not jump somewhere else are branch
233
+ possibilities. For each of those, we track a trail of instructions. These
234
+ are lists of instruction offsets, the next instructions that can execute.
235
+ We follow the trail until we get to a new source line. That gives us the
236
+ arc from the original instruction's line to the new source line.
237
+
216
238
"""
239
+ log (f"populate_branch_trails: { code } " )
217
240
iwalker = InstructionWalker (code )
218
241
for inst in iwalker .walk (follow_jumps = False ):
219
242
log (f"considering { inst = } " )
220
243
if not inst .jump_target :
244
+ # We only care about instructions with jump targets.
221
245
log ("no jump_target" )
222
246
continue
223
247
if inst .opcode in ALWAYS_JUMPS :
248
+ # We don't care about unconditional jumps.
224
249
log ("always jumps" )
225
250
continue
226
251
227
252
from_line = inst .line_number
228
- assert from_line is not None
253
+ if from_line is None :
254
+ continue
229
255
230
256
def walk_one_branch (
231
257
start_at : TOffset , branch_kind : str
@@ -255,14 +281,26 @@ def walk_one_branch(
255
281
)
256
282
return inst_offsets , (from_line , to_line )
257
283
else :
258
- log (f" no possible branch from @{ start_at } : { inst_offsets } " )
284
+ log (f"no possible branch from @{ start_at } : { inst_offsets } " )
259
285
return [], None
260
286
261
- code_info .branch_trails [inst .offset ] = (
287
+ # Calculate two trails: one from the next instruction, and one from the
288
+ # jump_target instruction.
289
+ trails = [
262
290
walk_one_branch (start_at = inst .offset + 2 , branch_kind = "not-taken" ),
263
291
walk_one_branch (start_at = inst .jump_target , branch_kind = "taken" ),
264
- )
265
-
292
+ ]
293
+ code_info .branch_trails [inst .offset ] = trails
294
+
295
+ # Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions
296
+ # other than the original jump possibility instruction. Register each
297
+ # trail under all of their offsets so we can pick up in the middle of a
298
+ # trail if need be.
299
+ for trail in trails :
300
+ for offset in trail [0 ]:
301
+ if offset not in code_info .branch_trails :
302
+ code_info .branch_trails [offset ] = []
303
+ code_info .branch_trails [offset ].append (trail )
266
304
267
305
@dataclass
268
306
class CodeInfo :
@@ -271,19 +309,17 @@ class CodeInfo:
271
309
tracing : bool
272
310
file_data : TTraceFileData | None
273
311
byte_to_line : dict [TOffset , TLineNo ] | None
312
+
274
313
# Keys are start instruction offsets for branches.
275
- # Values are two tuples :
276
- # (
314
+ # Values are lists :
315
+ # [
277
316
# ([offset, offset, ...], (from_line, to_line)),
278
317
# ([offset, offset, ...], (from_line, to_line)),
279
- # )
318
+ # ]
280
319
# Two possible trails from the branch point, left and right.
281
320
branch_trails : dict [
282
321
TOffset ,
283
- tuple [
284
- tuple [list [TOffset ], TArc | None ],
285
- tuple [list [TOffset ], TArc | None ],
286
- ],
322
+ list [tuple [list [TOffset ], TArc | None ]],
287
323
]
288
324
289
325
@@ -447,7 +483,9 @@ def sysmon_py_start( # pylint: disable=useless-return
447
483
branch_trails = {},
448
484
)
449
485
self .code_infos [id (code )] = code_info
450
- populate_branch_trails (code , code_info ) # TODO: should be a method?
486
+ if self .trace_arcs :
487
+ populate_branch_trails (code , code_info )
488
+ log (f"branch_trails for { code } :\n { code_info .branch_trails } " )
451
489
self .code_objects .append (code )
452
490
453
491
if tracing_code :
@@ -487,8 +525,8 @@ def sysmon_py_return( # pylint: disable=useless-return
487
525
@panopticon ("code" , "line" )
488
526
def sysmon_line_lines (self , code : CodeType , line_number : TLineNo ) -> MonitorReturn :
489
527
"""Handle sys.monitoring.events.LINE events for line coverage."""
490
- code_info = self .code_infos [ id (code )]
491
- if code_info .file_data is not None :
528
+ code_info = self .code_infos . get ( id (code ))
529
+ if code_info is not None and code_info .file_data is not None :
492
530
cast (set [TLineNo ], code_info .file_data ).add (line_number )
493
531
log (f"adding { line_number = } " )
494
532
return DISABLE
@@ -509,6 +547,7 @@ def sysmon_branch_either(
509
547
) -> MonitorReturn :
510
548
"""Handle BRANCH_RIGHT and BRANCH_LEFT events."""
511
549
code_info = self .code_infos [id (code )]
550
+ added_arc = False
512
551
if code_info .file_data is not None :
513
552
dest_info = code_info .branch_trails .get (instruction_offset )
514
553
log (f"{ dest_info = } " )
@@ -519,4 +558,17 @@ def sysmon_branch_either(
519
558
if destination_offset in offsets :
520
559
cast (set [TArc ], code_info .file_data ).add (arc )
521
560
log (f"adding { arc = } " )
561
+ added_arc = True
562
+ break
563
+
564
+ if not added_arc :
565
+ # This could be an exception jumping from line to line.
566
+ assert code_info .byte_to_line is not None
567
+ l1 = code_info .byte_to_line [instruction_offset ]
568
+ l2 = code_info .byte_to_line [destination_offset ]
569
+ if l1 != l2 :
570
+ arc = (l1 , l2 )
571
+ cast (set [TArc ], code_info .file_data ).add (arc )
572
+ log (f"adding unforeseen { arc = } " )
573
+
522
574
return DISABLE
0 commit comments