3
3
regex_automata:: meta:: Regex ,
4
4
regex_syntax:: {
5
5
ast,
6
- hir:: { self , Hir , HirKind } ,
6
+ hir:: { self , Hir } ,
7
7
} ,
8
8
} ;
9
9
@@ -296,35 +296,6 @@ impl ConfiguredHIR {
296
296
}
297
297
}
298
298
299
- /// Turns this configured HIR into one that only matches when both sides of
300
- /// the match correspond to a word boundary.
301
- ///
302
- /// Note that the HIR returned is like turning `pat` into
303
- /// `(?m:^|\W)(pat)(?m:$|\W)`. That is, the true match is at capture group
304
- /// `1` and not `0`.
305
- pub ( crate ) fn into_word ( self ) -> Result < ConfiguredHIR , Error > {
306
- // In theory building the HIR for \W should never fail, but there are
307
- // likely some pathological cases (particularly with respect to certain
308
- // values of limits) where it could in theory fail.
309
- let non_word = {
310
- let mut config = self . config . clone ( ) ;
311
- config. fixed_strings = false ;
312
- ConfiguredHIR :: new ( config, & [ r"\W" ] ) ?
313
- } ;
314
- let line_anchor_start = Hir :: look ( self . line_anchor_start ( ) ) ;
315
- let line_anchor_end = Hir :: look ( self . line_anchor_end ( ) ) ;
316
- let hir = Hir :: concat ( vec ! [
317
- Hir :: alternation( vec![ line_anchor_start, non_word. hir. clone( ) ] ) ,
318
- Hir :: capture( hir:: Capture {
319
- index: 1 ,
320
- name: None ,
321
- sub: Box :: new( renumber_capture_indices( self . hir) ?) ,
322
- } ) ,
323
- Hir :: alternation( vec![ non_word. hir, line_anchor_end] ) ,
324
- ] ) ;
325
- Ok ( ConfiguredHIR { config : self . config , hir } )
326
- }
327
-
328
299
/// Turns this configured HIR into an equivalent one, but where it must
329
300
/// match at the start and end of a line.
330
301
pub ( crate ) fn into_whole_line ( self ) -> ConfiguredHIR {
@@ -336,12 +307,20 @@ impl ConfiguredHIR {
336
307
}
337
308
338
309
/// Turns this configured HIR into an equivalent one, but where it must
339
- /// match at the start and end of the haystack .
340
- pub ( crate ) fn into_anchored ( self ) -> ConfiguredHIR {
310
+ /// match at word boundaries .
311
+ pub ( crate ) fn into_word ( self ) -> ConfiguredHIR {
341
312
let hir = Hir :: concat ( vec ! [
342
- Hir :: look( hir:: Look :: Start ) ,
313
+ Hir :: look( if self . config. unicode {
314
+ hir:: Look :: WordStartHalfUnicode
315
+ } else {
316
+ hir:: Look :: WordStartHalfAscii
317
+ } ) ,
343
318
self . hir,
344
- Hir :: look( hir:: Look :: End ) ,
319
+ Hir :: look( if self . config. unicode {
320
+ hir:: Look :: WordEndHalfUnicode
321
+ } else {
322
+ hir:: Look :: WordEndHalfAscii
323
+ } ) ,
345
324
] ) ;
346
325
ConfiguredHIR { config : self . config , hir }
347
326
}
@@ -365,50 +344,6 @@ impl ConfiguredHIR {
365
344
}
366
345
}
367
346
368
- /// This increments the index of every capture group in the given hir by 1. If
369
- /// any increment results in an overflow, then an error is returned.
370
- fn renumber_capture_indices ( hir : Hir ) -> Result < Hir , Error > {
371
- Ok ( match hir. into_kind ( ) {
372
- HirKind :: Empty => Hir :: empty ( ) ,
373
- HirKind :: Literal ( hir:: Literal ( lit) ) => Hir :: literal ( lit) ,
374
- HirKind :: Class ( cls) => Hir :: class ( cls) ,
375
- HirKind :: Look ( x) => Hir :: look ( x) ,
376
- HirKind :: Repetition ( mut x) => {
377
- x. sub = Box :: new ( renumber_capture_indices ( * x. sub ) ?) ;
378
- Hir :: repetition ( x)
379
- }
380
- HirKind :: Capture ( mut cap) => {
381
- cap. index = match cap. index . checked_add ( 1 ) {
382
- Some ( index) => index,
383
- None => {
384
- // This error message kind of sucks, but it's probably
385
- // impossible for it to happen. The only way a capture
386
- // index can overflow addition is if the regex is huge
387
- // (or something else has gone horribly wrong).
388
- let msg = "could not renumber capture index, too big" ;
389
- return Err ( Error :: any ( msg) ) ;
390
- }
391
- } ;
392
- cap. sub = Box :: new ( renumber_capture_indices ( * cap. sub ) ?) ;
393
- Hir :: capture ( cap)
394
- }
395
- HirKind :: Concat ( subs) => {
396
- let subs = subs
397
- . into_iter ( )
398
- . map ( |sub| renumber_capture_indices ( sub) )
399
- . collect :: < Result < Vec < Hir > , Error > > ( ) ?;
400
- Hir :: concat ( subs)
401
- }
402
- HirKind :: Alternation ( subs) => {
403
- let subs = subs
404
- . into_iter ( )
405
- . map ( |sub| renumber_capture_indices ( sub) )
406
- . collect :: < Result < Vec < Hir > , Error > > ( ) ?;
407
- Hir :: alternation ( subs)
408
- }
409
- } )
410
- }
411
-
412
347
/// Returns true if the given literal string contains any byte from the line
413
348
/// terminator given.
414
349
fn has_line_terminator ( lineterm : LineTerminator , literal : & str ) -> bool {
0 commit comments