Skip to content

Commit 97ea65f

Browse files
committed
refactor(service): lift identify cache-coherence into IdentifyAggregator
1 parent 10fc001 commit 97ea65f

4 files changed

Lines changed: 281 additions & 128 deletions

File tree

service/src/identification/cache.rs

Lines changed: 36 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::cache::CacheStatus::{Cached, NonCached};
22
use crate::cache::{
3-
CACHE_KEY_VERSION, CACHE_PREFIX, CacheKey, CacheStatus, deserialize_option_redis_value,
3+
CACHE_KEY_VERSION, CACHE_PREFIX, CacheStatus, deserialize_option_redis_value,
44
serialize_option_redis_value, spawn_cache_write,
55
};
66
use crate::db::game::{
@@ -9,6 +9,7 @@ use crate::db::game::{
99
};
1010
use crate::db::game_file::get_game_files_from_game_id;
1111
use crate::error::ServiceResult;
12+
use crate::model::GameMatchType;
1213
use entity::{game, game_file, signature_metadata_mapping};
1314
use hex::encode as hex_encode;
1415
use log::{debug, warn};
@@ -28,44 +29,15 @@ pub struct IdentifyEntry {
2829
pub metadata_mappings: Vec<signature_metadata_mapping::Model>,
2930
}
3031

31-
#[derive(Debug, Clone, Copy)]
32-
pub enum IdentifyCacheType {
33-
IdentifySha256,
34-
IdentifySha1,
35-
IdentifyMd5,
36-
IdentifyFilenameSize,
37-
}
38-
39-
impl CacheKey for IdentifyCacheType {
40-
fn get_cache_key(&self, identifier: &str) -> String {
41-
match &self {
42-
IdentifyCacheType::IdentifySha256 => {
43-
format!("{CACHE_PREFIX}:cache:{CACHE_KEY_VERSION}:identify:sha256:{identifier}")
44-
}
45-
IdentifyCacheType::IdentifySha1 => {
46-
format!("{CACHE_PREFIX}:cache:{CACHE_KEY_VERSION}:identify:sha1:{identifier}")
47-
}
48-
IdentifyCacheType::IdentifyMd5 => {
49-
format!("{CACHE_PREFIX}:cache:{CACHE_KEY_VERSION}:identify:md5:{identifier}")
50-
}
51-
IdentifyCacheType::IdentifyFilenameSize => {
52-
format!(
53-
"{CACHE_PREFIX}:cache:{CACHE_KEY_VERSION}:identify:filename_size:{identifier}"
54-
)
55-
}
56-
}
57-
}
58-
}
59-
60-
impl IdentifyCacheType {
61-
fn metric_label(&self) -> &'static str {
62-
match self {
63-
IdentifyCacheType::IdentifySha256 => "sha256",
64-
IdentifyCacheType::IdentifySha1 => "sha1",
65-
IdentifyCacheType::IdentifyMd5 => "md5",
66-
IdentifyCacheType::IdentifyFilenameSize => "filename_size",
67-
}
68-
}
32+
/// Render the redis key for an identify cache entry. Panics on
33+
/// [`GameMatchType::NoMatch`] (not a cacheable match type); callers
34+
/// should only pass match types whose
35+
/// [`GameMatchType::cache_segment`] returns `Some`.
36+
fn identify_cache_key(match_type: GameMatchType, identifier: &str) -> String {
37+
let segment = match_type
38+
.cache_segment()
39+
.expect("identify_cache_key called with non-cacheable match type");
40+
format!("{CACHE_PREFIX}:cache:{CACHE_KEY_VERSION}:identify:{segment}:{identifier}")
6941
}
7042

7143
/// Produce the opaque identifier segment used in the filename+size identify
@@ -80,35 +52,22 @@ pub fn filename_size_key(file_name: &str, file_size: i64) -> String {
8052
hex_encode(hasher.finalize())
8153
}
8254

83-
pub async fn delete_identify_cache(
84-
hash: &str,
85-
r#type: IdentifyCacheType,
86-
redis_conn: &mut MultiplexedConnection,
87-
) -> ServiceResult<()> {
88-
let cache_key = r#type.get_cache_key(hash);
89-
debug!("Deleting cache for key: {cache_key}");
90-
if let Err(e) = redis_conn.del(&cache_key).await {
91-
warn!("cache delete failed for {cache_key}: {e}");
92-
}
93-
Ok(())
94-
}
95-
9655
/// Push every identify cache key derived from `file` (sha256, sha1, md5,
9756
/// filename+size) into `out`. Skips fields that are missing on the model so
9857
/// the caller can pipeline a single `DEL` over only the keys that exist.
9958
pub fn collect_identify_cache_keys(file: &game_file::Model, out: &mut Vec<String>) {
10059
if let Some(sha256) = &file.sha256 {
101-
out.push(IdentifyCacheType::IdentifySha256.get_cache_key(sha256));
60+
out.push(identify_cache_key(GameMatchType::SHA256, sha256));
10261
}
10362
if let Some(sha1) = &file.sha1 {
104-
out.push(IdentifyCacheType::IdentifySha1.get_cache_key(sha1));
63+
out.push(identify_cache_key(GameMatchType::SHA1, sha1));
10564
}
10665
if let Some(md5) = &file.md5 {
107-
out.push(IdentifyCacheType::IdentifyMd5.get_cache_key(md5));
66+
out.push(identify_cache_key(GameMatchType::MD5, md5));
10867
}
10968
if let Some(size) = file.file_size_in_bytes {
11069
let key = filename_size_key(&file.file_name, size);
111-
out.push(IdentifyCacheType::IdentifyFilenameSize.get_cache_key(&key));
70+
out.push(identify_cache_key(GameMatchType::FileNameAndSize, &key));
11271
}
11372
}
11473

@@ -145,26 +104,23 @@ pub async fn find_game_and_metadata_ids_by_filename_size_cached(
145104
db_conn: &DbConn,
146105
) -> ServiceResult<CacheStatus<Option<IdentifyEntry>>> {
147106
let key = filename_size_key(file_name, file_size);
148-
let cache_key = IdentifyCacheType::IdentifyFilenameSize.get_cache_key(&key);
107+
let cache_key = identify_cache_key(GameMatchType::FileNameAndSize, &key);
108+
let metric_segment = GameMatchType::FileNameAndSize
109+
.cache_segment()
110+
.expect("FileNameAndSize is cacheable");
149111

150112
if let Ok(Some(cached_val)) = redis_conn
151113
.get_ex(&cache_key, Expiry::EX(IDENTIFY_CACHE_LIFETIME))
152114
.await
153115
{
154116
debug!("Cache hit for filename+size");
155-
crate::metrics::record_cache_hit(
156-
"identify",
157-
IdentifyCacheType::IdentifyFilenameSize.metric_label(),
158-
);
117+
crate::metrics::record_cache_hit("identify", metric_segment);
159118
let deserialized = deserialize_option_redis_value(cached_val)?;
160119
return Ok(Cached(deserialized));
161120
}
162121

163122
debug!("Cache miss for filename+size");
164-
crate::metrics::record_cache_miss(
165-
"identify",
166-
IdentifyCacheType::IdentifyFilenameSize.metric_label(),
167-
);
123+
crate::metrics::record_cache_miss("identify", metric_segment);
168124

169125
let entry = find_game_and_id_mapping_by_name_and_size(file_name, file_size, db_conn)
170126
.await?
@@ -186,34 +142,38 @@ pub async fn find_game_and_metadata_ids_by_filename_size_cached(
186142

187143
pub async fn find_game_and_metadata_ids_by_hash_cached(
188144
hash: &str,
189-
r#type: IdentifyCacheType,
145+
match_type: GameMatchType,
190146
redis_conn: &mut MultiplexedConnection,
191147
db_conn: &DbConn,
192148
) -> ServiceResult<CacheStatus<Option<IdentifyEntry>>> {
193-
let cache_key = r#type.get_cache_key(hash);
149+
let cache_key = identify_cache_key(match_type, hash);
150+
let metric_segment = match_type
151+
.cache_segment()
152+
.expect("hash cache called with non-cacheable match type");
194153

195154
if let Ok(Some(cached_val)) = redis_conn
196155
.get_ex(&cache_key, Expiry::EX(IDENTIFY_CACHE_LIFETIME))
197156
.await
198157
{
199158
debug!("Cache hit for key: {hash}");
200-
crate::metrics::record_cache_hit("identify", r#type.metric_label());
159+
crate::metrics::record_cache_hit("identify", metric_segment);
201160
let deserialized = deserialize_option_redis_value(cached_val)?;
202161
return Ok(Cached(deserialized));
203162
}
204163

205164
debug!("Cache miss for key: {hash}");
206-
crate::metrics::record_cache_miss("identify", r#type.metric_label());
165+
crate::metrics::record_cache_miss("identify", metric_segment);
207166

208-
let entry = match r#type {
209-
IdentifyCacheType::IdentifySha256 => {
210-
find_game_and_id_mapping_by_sha256(hash, db_conn).await?
211-
}
212-
IdentifyCacheType::IdentifySha1 => find_game_and_id_mapping_by_sha1(hash, db_conn).await?,
213-
IdentifyCacheType::IdentifyMd5 => find_game_and_id_mapping_by_md5(hash, db_conn).await?,
214-
IdentifyCacheType::IdentifyFilenameSize => {
167+
let entry = match match_type {
168+
GameMatchType::SHA256 => find_game_and_id_mapping_by_sha256(hash, db_conn).await?,
169+
GameMatchType::SHA1 => find_game_and_id_mapping_by_sha1(hash, db_conn).await?,
170+
GameMatchType::MD5 => find_game_and_id_mapping_by_md5(hash, db_conn).await?,
171+
GameMatchType::FileNameAndSize => {
215172
unreachable!("filename+size has a dedicated cached wrapper")
216173
}
174+
GameMatchType::NoMatch => {
175+
unreachable!("NoMatch is not a cacheable match type")
176+
}
217177
}
218178
.map(|(game, mappings)| IdentifyEntry {
219179
game,

service/src/identification/mod.rs

Lines changed: 37 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
pub mod cache;
2+
mod protocol;
23

34
use crate::cache::CacheStatus;
45
use crate::cache::CacheStatus::{Cached, NonCached};
56
use crate::db::game::{find_all_relations_of_game, get_game_by_id};
67
use crate::error::{ServiceError, ServiceResult};
78
use crate::identification::cache::{
8-
IdentifyCacheType, IdentifyEntry, find_game_and_metadata_ids_by_filename_size_cached,
9+
IdentifyEntry, find_game_and_metadata_ids_by_filename_size_cached,
910
find_game_and_metadata_ids_by_hash_cached,
1011
};
12+
use crate::identification::protocol::IdentifyAggregator;
1113
use crate::matching::manual::build_result;
1214
use crate::model::{
1315
GameAndRelationMatchResult, GameAndRelationMatchResultBuilder, GameAndRelationsResult,
@@ -18,6 +20,7 @@ use log::debug;
1820
use redis::aio::MultiplexedConnection;
1921
use sea_orm::DbConn;
2022
use sea_orm::prelude::Uuid;
23+
use std::ops::ControlFlow;
2124
use strum::IntoEnumIterator;
2225

2326
pub async fn get_game_by_id_from_db(game_id: Uuid, conn: &DbConn) -> ServiceResult<PlaymatchGame> {
@@ -95,6 +98,10 @@ pub async fn identify_game_and_metadata_mappings(
9598
/// Walk the supported match types in order (sha256, sha1, md5, name+size) and return the first
9699
/// hit, together with whether the hit was served from cache. If every hash that could have been
97100
/// checked produced a cached-but-empty result, surface a `Cached(None)`; otherwise `NonCached(None)`.
101+
///
102+
/// Cache-coherence bookkeeping lives in [`IdentifyAggregator`]; this fn is
103+
/// the impure dispatch shell that runs the cache lookups and records the
104+
/// per-attempt metric.
98105
async fn identify_game(
99106
search: &GameFileMatchSearch,
100107
redis_conn: &mut MultiplexedConnection,
@@ -109,94 +116,72 @@ async fn identify_game(
109116
.filter(|hash| hash.is_some())
110117
.count();
111118

112-
let mut cached_but_empty = 0;
119+
let mut agg = IdentifyAggregator::new(expected_count);
113120

114121
for match_type in GameMatchType::iter().filter(|t| *t != GameMatchType::NoMatch) {
115-
let (attempted, type_result) = match match_type {
122+
let outcome = match match_type {
116123
GameMatchType::SHA256 => match &search.sha256 {
117-
Some(hash) => (
118-
true,
124+
Some(hash) => {
119125
find_game_and_metadata_ids_by_hash_cached(
120126
hash,
121-
IdentifyCacheType::IdentifySha256,
127+
GameMatchType::SHA256,
122128
redis_conn,
123129
db_conn,
124130
)
125-
.await?,
126-
),
127-
None => (false, NonCached(None)),
131+
.await?
132+
}
133+
None => continue,
128134
},
129135
GameMatchType::SHA1 => match &search.sha1 {
130-
Some(hash) => (
131-
true,
136+
Some(hash) => {
132137
find_game_and_metadata_ids_by_hash_cached(
133138
hash,
134-
IdentifyCacheType::IdentifySha1,
139+
GameMatchType::SHA1,
135140
redis_conn,
136141
db_conn,
137142
)
138-
.await?,
139-
),
140-
None => (false, NonCached(None)),
143+
.await?
144+
}
145+
None => continue,
141146
},
142147
GameMatchType::MD5 => match &search.md5 {
143-
Some(hash) => (
144-
true,
148+
Some(hash) => {
145149
find_game_and_metadata_ids_by_hash_cached(
146150
hash,
147-
IdentifyCacheType::IdentifyMd5,
151+
GameMatchType::MD5,
148152
redis_conn,
149153
db_conn,
150154
)
151-
.await?,
152-
),
153-
None => (false, NonCached(None)),
155+
.await?
156+
}
157+
None => continue,
154158
},
155-
GameMatchType::FileNameAndSize => (
156-
true,
159+
GameMatchType::FileNameAndSize => {
157160
find_game_and_metadata_ids_by_filename_size_cached(
158161
&search.file_name,
159162
search.file_size,
160163
redis_conn,
161164
db_conn,
162165
)
163-
.await?,
164-
),
166+
.await?
167+
}
165168
GameMatchType::NoMatch => unreachable!(),
166169
};
167170

168-
match type_result {
169-
Cached(Some(entry)) => {
170-
debug!("Cache hit for game match ({match_type:?}): {entry:?}");
171-
crate::metrics::record_identify_attempt(match_type.metric_label(), true);
172-
return Ok(Cached(Some((match_type, entry))));
173-
}
174-
NonCached(Some(entry)) => {
175-
debug!("Cache miss for game match ({match_type:?}): {entry:?}");
176-
crate::metrics::record_identify_attempt(match_type.metric_label(), true);
177-
return Ok(NonCached(Some((match_type, entry))));
178-
}
179-
Cached(None) => {
180-
if attempted {
181-
crate::metrics::record_identify_attempt(match_type.metric_label(), false);
182-
}
183-
cached_but_empty += 1;
184-
}
185-
NonCached(None) => {
186-
if attempted {
187-
crate::metrics::record_identify_attempt(match_type.metric_label(), false);
188-
}
189-
continue;
190-
}
171+
let hit = matches!(&outcome, Cached(Some(_)) | NonCached(Some(_)));
172+
crate::metrics::record_identify_attempt(match_type.metric_label(), hit);
173+
174+
if let ControlFlow::Break(result) = agg.observe(match_type, outcome) {
175+
debug!("Identify resolved on match type {match_type:?}");
176+
return Ok(result);
191177
}
192178
}
193179

194-
if cached_but_empty == expected_count && cached_but_empty != 0 {
180+
let result = agg.finalize();
181+
if matches!(result, Cached(None)) {
195182
debug!("All possible game matches were cached as empty, returning cached no-match");
196-
Ok(Cached(None))
197-
} else {
198-
Ok(NonCached(None))
199183
}
184+
Ok(result)
200185
}
201186

202187
async fn build_relation_match_result(

0 commit comments

Comments
 (0)