|
| 1 | +// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 2 | +// // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 3 | +// // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 4 | +// // option. This file may not be copied, modified, or distributed |
| 5 | +// // except according to those terms. |
| 6 | + |
| 7 | +// use std::borrow::ToOwned; |
| 8 | +// use std::collections::hash_map::{Entry, HashMap}; |
| 9 | + |
| 10 | +#![allow(clippy::manual_pattern_char_comparison)] |
| 11 | + |
| 12 | +extern crate criterion; |
| 13 | +extern crate tendril; |
| 14 | +use std::collections::{hash_map::Entry, HashMap}; |
| 15 | + |
| 16 | +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; |
| 17 | +use tendril::StrTendril; |
| 18 | + |
| 19 | +static EN_1: &str = "Days turn to nights turn to paper into rocks into plastic"; |
| 20 | + |
| 21 | +static EN_2: &str = "Here the notes in my laboratory journal cease. I was able to write the last \ |
| 22 | + words only with great effort. By now it was already clear to me that LSD had \ |
| 23 | + been the cause of the remarkable experience of the previous Friday, for the \ |
| 24 | + altered perceptions were of the same type as before, only much more intense. I \ |
| 25 | + had to struggle to speak intelligibly. I asked my laboratory assistant, who was \ |
| 26 | + informed of the self-experiment, to escort me home. We went by bicycle, no \ |
| 27 | + automobile being available because of wartime restrictions on their use. On the \ |
| 28 | + way home, my condition began to assume threatening forms. Everything in my \ |
| 29 | + field of vision wavered and was distorted as if seen in a curved mirror. I also \ |
| 30 | + had the sensation of being unable to move from the spot. Nevertheless, my \ |
| 31 | + assistant later told me that we had traveled very rapidly. Finally, we arrived \ |
| 32 | + at home safe and sound, and I was just barely capable of asking my companion to \ |
| 33 | + summon our family doctor and request milk from the neighbors.\n\n\ |
| 34 | + In spite of my delirious, bewildered condition, I had brief periods of clear \ |
| 35 | + and effective thinking—and chose milk as a nonspecific antidote for poisoning."; |
| 36 | + |
| 37 | +static KR_1: &str = "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \ |
| 38 | + 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \ |
| 39 | + 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다."; |
| 40 | + |
| 41 | +static HTML_KR_1: &str = "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\ |
| 42 | + https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \ |
| 43 | + 메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \ |
| 44 | + 아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>"; |
| 45 | + |
| 46 | +const SMALL_SIZE: usize = 65536; |
| 47 | +const LARGE_SIZE: usize = 1 << 20; |
| 48 | + |
| 49 | +fn index_words_string(input: &str) -> HashMap<char, Vec<String>> { |
| 50 | + let mut index = HashMap::new(); |
| 51 | + for word in input.split(|c| c == ' ') { |
| 52 | + if word.is_empty() { |
| 53 | + continue; |
| 54 | + } |
| 55 | + let word = word.to_owned(); |
| 56 | + match index.entry(word.chars().next().unwrap()) { |
| 57 | + Entry::Occupied(mut e) => { |
| 58 | + let x: &mut Vec<String> = e.get_mut(); |
| 59 | + x.push(word); |
| 60 | + }, |
| 61 | + Entry::Vacant(e) => { |
| 62 | + e.insert(vec![word]); |
| 63 | + }, |
| 64 | + } |
| 65 | + } |
| 66 | + index |
| 67 | +} |
| 68 | + |
| 69 | +fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> { |
| 70 | + let mut index = HashMap::new(); |
| 71 | + let mut t = input.clone(); |
| 72 | + loop { |
| 73 | + match t.pop_front_char_run(|c| c != ' ') { |
| 74 | + None => return index, |
| 75 | + Some((_, false)) => (), |
| 76 | + Some((word, true)) => match index.entry(word.chars().next().unwrap()) { |
| 77 | + Entry::Occupied(mut e) => { |
| 78 | + e.get_mut().push(word); |
| 79 | + }, |
| 80 | + Entry::Vacant(e) => { |
| 81 | + e.insert(vec![word]); |
| 82 | + }, |
| 83 | + }, |
| 84 | + } |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +fn test_correctness(txt: &str) { |
| 89 | + use std::borrow::ToOwned; |
| 90 | + use tendril::SliceExt; |
| 91 | + |
| 92 | + let input_string = txt.to_owned(); |
| 93 | + let count_s = index_words_string(&input_string); |
| 94 | + let mut keys: Vec<char> = count_s.keys().cloned().collect(); |
| 95 | + keys.sort(); |
| 96 | + |
| 97 | + let input_tendril = txt.to_tendril(); |
| 98 | + let count_t = index_words_tendril(&input_tendril); |
| 99 | + let mut keys_t: Vec<char> = count_t.keys().cloned().collect(); |
| 100 | + keys_t.sort(); |
| 101 | + |
| 102 | + assert_eq!(keys, keys_t); |
| 103 | + |
| 104 | + for k in &keys { |
| 105 | + let vs = &count_s[k]; |
| 106 | + let vt = &count_t[k]; |
| 107 | + assert_eq!(vs.len(), vt.len()); |
| 108 | + assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t)); |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +fn index_words_small_string(b: &mut Bencher, txt: &str) { |
| 113 | + let mut s = String::new(); |
| 114 | + while s.len() < SMALL_SIZE { |
| 115 | + s.push_str(txt); |
| 116 | + } |
| 117 | + b.iter(|| index_words_string(&s)); |
| 118 | +} |
| 119 | + |
| 120 | +fn index_words_small_tendril(b: &mut Bencher, txt: &str) { |
| 121 | + let mut t = StrTendril::new(); |
| 122 | + while t.len() < SMALL_SIZE { |
| 123 | + t.push_slice(txt); |
| 124 | + } |
| 125 | + b.iter(|| index_words_tendril(&t)); |
| 126 | +} |
| 127 | + |
| 128 | +fn index_words_big_string(b: &mut Bencher, txt: &str) { |
| 129 | + let mut s = String::new(); |
| 130 | + while s.len() < LARGE_SIZE { |
| 131 | + s.push_str(txt); |
| 132 | + } |
| 133 | + b.iter(|| index_words_string(&s)); |
| 134 | +} |
| 135 | + |
| 136 | +fn index_words_big_tendril(b: &mut Bencher, txt: &str) { |
| 137 | + let mut t = StrTendril::new(); |
| 138 | + while t.len() < LARGE_SIZE { |
| 139 | + t.push_slice(txt); |
| 140 | + } |
| 141 | + b.iter(|| index_words_tendril(&t)); |
| 142 | +} |
| 143 | + |
| 144 | +fn run_bench_group(c: &mut Criterion, group_name: &str, txt: &str) { |
| 145 | + let mut group = c.benchmark_group(group_name); |
| 146 | + |
| 147 | + test_correctness(txt); |
| 148 | + |
| 149 | + group.bench_with_input("index_words_small_string", txt, index_words_small_string); |
| 150 | + group.bench_with_input("index_words_small_tendril", txt, index_words_small_tendril); |
| 151 | + group.bench_with_input("index_words_big_string", txt, index_words_big_string); |
| 152 | + group.bench_with_input("index_words_big_tendril", txt, index_words_big_tendril); |
| 153 | +} |
| 154 | + |
| 155 | +fn tendril_benchmarks(c: &mut Criterion) { |
| 156 | + run_bench_group(c, "en_1", EN_1); |
| 157 | + run_bench_group(c, "en_2", EN_2); |
| 158 | + run_bench_group(c, "kr_1", KR_1); |
| 159 | + run_bench_group(c, "html_kr_1", HTML_KR_1); |
| 160 | +} |
| 161 | + |
| 162 | +criterion_group!(benches, tendril_benchmarks); |
| 163 | +criterion_main!(benches); |
0 commit comments