Skip to content

Commit 2b2fe64

Browse files
authored
Merge pull request #37 from iximeow/main
avoid contending on single process-wide precompiled regexes
2 parents 80666ca + b08ef8c commit 2b2fe64

File tree

3 files changed

+135
-116
lines changed

3 files changed

+135
-116
lines changed

src/file.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ pub fn reader(filename: &str, opts: &super::Options) -> Box<dyn BufRead> {
1717
};
1818

1919
if path.extension() == Some(OsStr::new("gz")) {
20-
Box::new(BufReader::with_capacity(128 * 1024, GzDecoder::new(file)))
20+
Box::new(BufReader::with_capacity(1024 * 1024, GzDecoder::new(file)))
2121
} else {
22-
Box::new(BufReader::with_capacity(128 * 1024, file))
22+
Box::new(BufReader::with_capacity(1024 * 1024, file))
2323
}
2424
}

src/lib.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#![feature(test)]
22
#[macro_use]
3-
extern crate lazy_static;
4-
#[macro_use]
53
extern crate serde_derive;
64
#[macro_use]
75
extern crate enum_map;
@@ -126,14 +124,15 @@ fn increment_maybe(counters: &mut NameMap, name: FieldName, maybe_value: Option<
126124
}
127125

128126
pub fn print_unknown_user_agents(path: &str, opts: &Options) {
127+
let ctx = user_agent::ParseCtx::new();
129128
file::reader(path, opts).split(b'\n').for_each(|line| {
130129
let l = &line.unwrap();
131130
let r: request::Request = serde_json::from_slice(l).unwrap();
132-
if user_agent::parse(r.user_agent.as_ref()).is_none() { println!("{}", r.user_agent) }
131+
if ctx.parse(r.user_agent.as_ref()).is_none() { println!("{}", r.user_agent) }
133132
});
134133
}
135134

136-
pub fn count_line(times: &mut TimeMap, line: &str) {
135+
pub fn count_line(ctx: &user_agent::ParseCtx, times: &mut TimeMap, line: &str) {
137136
let r: request::Request = serde_json::from_str(&line).unwrap();
138137

139138
if duplicate_request(&r) {
@@ -148,7 +147,7 @@ pub fn count_line(times: &mut TimeMap, line: &str) {
148147
let user_key = r.client_ip.parse().expect("ipaddr parse error");
149148

150149
increment(counters, FieldName::tls_cipher, r.tls_cipher.as_ref(), user_key);
151-
if let Some(ua) = user_agent::parse(r.user_agent.as_ref()) {
150+
if let Some(ua) = ctx.parse(r.user_agent.as_ref()) {
152151
increment(counters, FieldName::rubygems, ua.rubygems, user_key);
153152
increment_maybe(counters, FieldName::bundler, ua.bundler, user_key);
154153
increment_maybe(counters, FieldName::ruby, ua.ruby, user_key);
@@ -162,6 +161,7 @@ pub fn stream_stats<'a>(mut stream: Box<dyn BufRead + 'a>, opts: &Options) -> Ti
162161
let mut times = TimeMap::default();
163162
let mut lineno = 0;
164163

164+
let ctx = user_agent::ParseCtx::new();
165165
let mut line = String::with_capacity(1024*1024);
166166

167167
loop {
@@ -185,7 +185,7 @@ pub fn stream_stats<'a>(mut stream: Box<dyn BufRead + 'a>, opts: &Options) -> Ti
185185
}
186186
}
187187

188-
count_line(&mut times, line.as_str());
188+
count_line(&ctx, &mut times, line.as_str());
189189
}
190190

191191
if opts.verbose {

src/user_agent.rs

+127-108
Original file line numberDiff line numberDiff line change
@@ -14,105 +14,122 @@ pub struct UserAgent<'a> {
1414
pub gemstash: Option<&'a str>,
1515
}
1616

17-
pub fn parse(a: &str) -> Option<UserAgent> {
17+
pub struct ParseCtx {
18+
bundler_pattern: Regex,
19+
ruby_pattern: Regex,
20+
gem_pattern: Regex,
21+
}
22+
23+
impl ParseCtx {
24+
/*
25+
* Here are some more regexes that indirect commented out so they didn't get moved to ParseCtx.
26+
* have fun :)
27+
*/
28+
/*
1829
lazy_static! {
1930
// Here is the named regex. The regex created below does not include names, because that interface has borrowing issues 😬
2031
// \Abundler/(?<bundler>[0-9a-zA-Z.\-]+) rubygems/(?<rubygems>[0-9a-zA-Z.\-]+) ruby/(?<ruby>[0-9a-zA-Z.\-]+) \((?<platform>.*)\) command/(.*?)(?: jruby/(?<jruby>[0-9a-zA-Z.\-]+))?(?: truffleruby/(?<truffleruby>[0-9a-zA-Z.\-]+))?(?: options/(?<options>.*?))?(?: ci/(?<ci>.*?))? ([a-f0-9]{16})(?: Gemstash/(?<gemstash>[0-9a-zA-Z.\-]+))?\z
21-
static ref BUNDLER_PATTERN: Regex = Regex::new(r"\Abundler/([0-9a-zA-Z.\-]+) rubygems/([0-9a-zA-Z.\-]+) ruby/([0-9a-zA-Z.\-]+) \(([^)]*)\) command/(.*?)(?: jruby/([0-9a-zA-Z.\-]+))?(?: truffleruby/([0-9a-zA-Z.\-]+))?(?: options/(.*?))?(?: ci/(.*?))? [a-f0-9]{16}(?: Gemstash/([0-9a-zA-Z.\-]+))?\z").unwrap();
22-
static ref RUBY_PATTERN: Regex = Regex::new(r"\A(?:Ruby, )?RubyGems/([0-9a-z.\-]+) (.*) Ruby/([0-9a-z.\-]+) \(.*?\)(?: jruby| truffleruby| rbx)?(?: Gemstash/([0-9a-z.\-]+))?\z").unwrap();
23-
static ref GEM_PATTERN: Regex = Regex::new(r"\ARuby, Gems ([0-9a-z.\-]+)\z").unwrap();
32+
}
33+
*/
34+
pub fn new() -> Self {
35+
Self {
36+
bundler_pattern: Regex::new(r"\Abundler/([0-9a-zA-Z.\-]+) rubygems/([0-9a-zA-Z.\-]+) ruby/([0-9a-zA-Z.\-]+) \(([^)]*)\) command/(.*?)(?: jruby/([0-9a-zA-Z.\-]+))?(?: truffleruby/([0-9a-zA-Z.\-]+))?(?: options/(.*?))?(?: ci/(.*?))? [a-f0-9]{16}(?: Gemstash/([0-9a-zA-Z.\-]+))?\z").unwrap(),
37+
ruby_pattern: Regex::new(r"\A(?:Ruby, )?RubyGems/([0-9a-z.\-]+) (.*) Ruby/([0-9a-z.\-]+) \(.*?\)(?: jruby| truffleruby| rbx)?(?: Gemstash/([0-9a-z.\-]+))?\z").unwrap(),
38+
gem_pattern: Regex::new(r"\ARuby, Gems ([0-9a-z.\-]+)\z").unwrap(),
39+
}
2440
}
2541

26-
let mut bl = BUNDLER_PATTERN.capture_locations();
27-
let mut rl = RUBY_PATTERN.capture_locations();
28-
let mut gl = GEM_PATTERN.capture_locations();
29-
30-
if BUNDLER_PATTERN.captures_read(&mut bl, a).is_some() {
31-
Some(UserAgent {
32-
bundler: match bl.get(1) {
33-
Some(loc) => Some(&a[loc.0..loc.1]),
34-
_ => None,
35-
},
36-
rubygems: match bl.get(2) {
37-
Some(loc) => &a[loc.0..loc.1],
38-
_ => panic!("parse failed on {:?}", a),
39-
},
40-
ruby: match bl.get(3) {
41-
Some(loc) => Some(&a[loc.0..loc.1]),
42-
_ => None,
43-
},
44-
platform: match bl.get(4) {
45-
Some(loc) => Some(&a[loc.0..loc.1]),
46-
_ => None,
47-
},
48-
command: match bl.get(5) {
49-
Some(loc) => Some(&a[loc.0..loc.1]),
50-
_ => None,
51-
},
52-
jruby: match bl.get(6) {
53-
Some(loc) => Some(&a[loc.0..loc.1]),
54-
_ => None,
55-
},
56-
truffleruby: match bl.get(7) {
57-
Some(loc) => Some(&a[loc.0..loc.1]),
58-
_ => None,
59-
},
60-
options: match bl.get(8) {
61-
Some(loc) => Some(&a[loc.0..loc.1]),
62-
_ => None,
63-
},
64-
ci: match bl.get(9) {
65-
Some(loc) => Some(&a[loc.0..loc.1]),
66-
_ => None,
67-
},
68-
gemstash: match bl.get(11) {
69-
Some(loc) => Some(&a[loc.0..loc.1]),
70-
_ => None,
71-
},
72-
})
73-
} else if RUBY_PATTERN.captures_read(&mut rl, a).is_some() {
74-
return Some(UserAgent {
75-
bundler: None,
76-
rubygems: match rl.get(1) {
77-
Some(loc) => &a[loc.0..loc.1],
78-
_ => panic!("parse failed on {:?}", a),
79-
},
80-
ruby: match rl.get(3) {
81-
Some(loc) => Some(&a[loc.0..loc.1]),
82-
_ => None,
83-
},
84-
platform: match rl.get(2) {
85-
Some(loc) => Some(&a[loc.0..loc.1]),
86-
_ => None,
87-
},
88-
command: None,
89-
jruby: None,
90-
truffleruby: None,
91-
options: None,
92-
ci: None,
93-
gemstash: match rl.get(4) {
94-
Some(loc) => Some(&a[loc.0..loc.1]),
95-
_ => None,
96-
},
97-
});
98-
} else if GEM_PATTERN.captures_read(&mut gl, a).is_some() {
99-
return Some(UserAgent {
100-
bundler: None,
101-
rubygems: match gl.get(1) {
102-
Some(loc) => &a[loc.0..loc.1],
103-
_ => panic!("parse failed on {:?}", a),
104-
},
105-
ruby: None,
106-
platform: None,
107-
command: None,
108-
jruby: None,
109-
truffleruby: None,
110-
options: None,
111-
ci: None,
112-
gemstash: None,
113-
});
114-
} else {
115-
return None;
42+
pub fn parse<'line>(&self, a: &'line str) -> Option<UserAgent<'line>> {
43+
let mut bl = self.bundler_pattern.capture_locations();
44+
let mut rl = self.ruby_pattern.capture_locations();
45+
let mut gl = self.gem_pattern.capture_locations();
46+
if self.bundler_pattern.captures_read(&mut bl, a).is_some() {
47+
Some(UserAgent {
48+
bundler: match bl.get(1) {
49+
Some(loc) => Some(&a[loc.0..loc.1]),
50+
_ => None,
51+
},
52+
rubygems: match bl.get(2) {
53+
Some(loc) => &a[loc.0..loc.1],
54+
_ => panic!("parse failed on {:?}", a),
55+
},
56+
ruby: match bl.get(3) {
57+
Some(loc) => Some(&a[loc.0..loc.1]),
58+
_ => None,
59+
},
60+
platform: match bl.get(4) {
61+
Some(loc) => Some(&a[loc.0..loc.1]),
62+
_ => None,
63+
},
64+
command: match bl.get(5) {
65+
Some(loc) => Some(&a[loc.0..loc.1]),
66+
_ => None,
67+
},
68+
jruby: match bl.get(6) {
69+
Some(loc) => Some(&a[loc.0..loc.1]),
70+
_ => None,
71+
},
72+
truffleruby: match bl.get(7) {
73+
Some(loc) => Some(&a[loc.0..loc.1]),
74+
_ => None,
75+
},
76+
options: match bl.get(8) {
77+
Some(loc) => Some(&a[loc.0..loc.1]),
78+
_ => None,
79+
},
80+
ci: match bl.get(9) {
81+
Some(loc) => Some(&a[loc.0..loc.1]),
82+
_ => None,
83+
},
84+
gemstash: match bl.get(11) {
85+
Some(loc) => Some(&a[loc.0..loc.1]),
86+
_ => None,
87+
},
88+
})
89+
} else if self.ruby_pattern.captures_read(&mut rl, a).is_some() {
90+
return Some(UserAgent {
91+
bundler: None,
92+
rubygems: match rl.get(1) {
93+
Some(loc) => &a[loc.0..loc.1],
94+
_ => panic!("parse failed on {:?}", a),
95+
},
96+
ruby: match rl.get(3) {
97+
Some(loc) => Some(&a[loc.0..loc.1]),
98+
_ => None,
99+
},
100+
platform: match rl.get(2) {
101+
Some(loc) => Some(&a[loc.0..loc.1]),
102+
_ => None,
103+
},
104+
command: None,
105+
jruby: None,
106+
truffleruby: None,
107+
options: None,
108+
ci: None,
109+
gemstash: match rl.get(4) {
110+
Some(loc) => Some(&a[loc.0..loc.1]),
111+
_ => None,
112+
},
113+
});
114+
} else if self.gem_pattern.captures_read(&mut gl, a).is_some() {
115+
return Some(UserAgent {
116+
bundler: None,
117+
rubygems: match gl.get(1) {
118+
Some(loc) => &a[loc.0..loc.1],
119+
_ => panic!("parse failed on {:?}", a),
120+
},
121+
ruby: None,
122+
platform: None,
123+
command: None,
124+
jruby: None,
125+
truffleruby: None,
126+
options: None,
127+
ci: None,
128+
gemstash: None,
129+
});
130+
} else {
131+
return None;
132+
}
116133
}
117134
}
118135

@@ -128,8 +145,9 @@ mod tests {
128145

129146
#[test]
130147
fn test_parse() {
148+
let ctx = ParseCtx::new();
131149
assert_eq!(
132-
parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41"),
150+
ctx.parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41"),
133151
Some(UserAgent {
134152
bundler: Some("1.12.5"),
135153
rubygems: "2.6.10",
@@ -145,7 +163,7 @@ mod tests {
145163
);
146164

147165
assert_eq!(
148-
parse("Ruby, RubyGems/2.4.8 x86_64-linux Ruby/2.1.6 (2015-04-13 patchlevel 336)"),
166+
ctx.parse("Ruby, RubyGems/2.4.8 x86_64-linux Ruby/2.1.6 (2015-04-13 patchlevel 336)"),
149167
Some(UserAgent {
150168
bundler: None,
151169
rubygems: "2.4.8",
@@ -161,7 +179,7 @@ mod tests {
161179
);
162180

163181
assert_eq!(
164-
parse("Ruby, Gems 1.1.1"),
182+
ctx.parse("Ruby, Gems 1.1.1"),
165183
Some(UserAgent {
166184
bundler: None,
167185
rubygems: "1.1.1",
@@ -185,23 +203,24 @@ mod tests {
185203
let file = file::reader("test/client_user_agents.txt", &opts);
186204
for line in file.lines() {
187205
let input = &line.unwrap();
188-
parse(input).unwrap_or_else(|| panic!("couldn't parse {:?}", input));
206+
ctx.parse(input).unwrap_or_else(|| panic!("couldn't parse {:?}", input));
189207
}
190208
}
191209

192210
#[bench]
193211
fn bench_parse(b: &mut Bencher) {
212+
let ctx = ParseCtx::new();
194213
b.iter(|| {
195-
parse("bundler/1.16.1 rubygems/2.6.11 ruby/2.4.1 (x86_64-pc-linux-gnu) command/install options/no_install,mirror.https://rubygems.org/,mirror.https://rubygems.org/.fallback_timeout/,path 59dbf8e99fa09c0a");
196-
parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41");
197-
parse("bundler/1.16.1 rubygems/2.7.6 ruby/2.5.1 (x86_64-pc-linux-gnu) command/install rbx/3.105 options/no_install,git.allow_insecure,build.nokogiri,jobs,path,app_config,silence_root_warning,bin,gemfile e710485d04febb1e");
198-
parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41");
199-
parse("bundler/1.15.4 rubygems/2.6.14 ruby/2.4.2 (x86_64-w64-mingw32) command/install options/ 6e8fa23dbf26d4ff Gemstash/1.1.0");
200-
parse("bundler/1.16.2 rubygems/2.7.6 ruby/2.5.0 (x86_64-Oracle Corporation-linux) command/install jruby/9.2.1.0-SNAPSHOT options/no_install,retry,jobs,gemfile ci/travis,ci fe5e45257d515f1f");
201-
parse("bundler/1.5.1 rubygems/2.2.0 ruby/2.1.0 (x86_64-unknown-linux-gnu) command/install fe5e45257d515f1f");
202-
parse("Ruby, Gems 1.1.1");
203-
parse("Ruby, RubyGems/1.3.7 x86_64-linux Ruby/1.9.2 (2010-08-18 patchlevel 0)");
204-
parse("Ruby, RubyGems/2.6.6 x86_64-linux Ruby/2.3.1 (2018-01-06 patchlevel 0) rbx");
214+
ctx.parse("bundler/1.16.1 rubygems/2.6.11 ruby/2.4.1 (x86_64-pc-linux-gnu) command/install options/no_install,mirror.https://rubygems.org/,mirror.https://rubygems.org/.fallback_timeout/,path 59dbf8e99fa09c0a");
215+
ctx.parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41");
216+
ctx.parse("bundler/1.16.1 rubygems/2.7.6 ruby/2.5.1 (x86_64-pc-linux-gnu) command/install rbx/3.105 options/no_install,git.allow_insecure,build.nokogiri,jobs,path,app_config,silence_root_warning,bin,gemfile e710485d04febb1e");
217+
ctx.parse("bundler/1.12.5 rubygems/2.6.10 ruby/2.3.1 (x86_64-pc-linux-gnu) command/install options/orig_path 95ac718b0e500f41");
218+
ctx.parse("bundler/1.15.4 rubygems/2.6.14 ruby/2.4.2 (x86_64-w64-mingw32) command/install options/ 6e8fa23dbf26d4ff Gemstash/1.1.0");
219+
ctx.parse("bundler/1.16.2 rubygems/2.7.6 ruby/2.5.0 (x86_64-Oracle Corporation-linux) command/install jruby/9.2.1.0-SNAPSHOT options/no_install,retry,jobs,gemfile ci/travis,ci fe5e45257d515f1f");
220+
ctx.parse("bundler/1.5.1 rubygems/2.2.0 ruby/2.1.0 (x86_64-unknown-linux-gnu) command/install fe5e45257d515f1f");
221+
ctx.parse("Ruby, Gems 1.1.1");
222+
ctx.parse("Ruby, RubyGems/1.3.7 x86_64-linux Ruby/1.9.2 (2010-08-18 patchlevel 0)");
223+
ctx.parse("Ruby, RubyGems/2.6.6 x86_64-linux Ruby/2.3.1 (2018-01-06 patchlevel 0) rbx");
205224
})
206225
}
207226
}

0 commit comments

Comments
 (0)