Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ ic-custom-domains-base = "0.1"
ic-http-certification = { version = "3.1.0", optional = true }
ic-transport-types = "0.47"
ic-http-gateway-protocol = { package = "ic-http-gateway-protocol", git = "https://github.com/dfinity/ic-http-gateway-protocol", tag = "v0.5.1" }
isbot = "0.1"
itertools = "0.14.0"
lazy_static = "1.5.0"
maxminddb = "0.27.0"
Expand Down Expand Up @@ -96,6 +97,7 @@ tracing-subscriber = { version = "0.3.18", features = [
"json",
] }
url = "2.5.3"
urlencoding = "2.1.3"
# DO NOT upgrade, this breaks monorepo compatibility
# Read https://github.com/uuid-rs/uuid/releases/tag/1.13.0
uuid = { version = "=1.12.1", features = ["v7"] }
Expand Down
24 changes: 24 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{net::SocketAddr, path::PathBuf, time::Duration};
use ::http::HeaderValue;
use clap::{Args, Parser};
use fqdn::FQDN;
use http::Uri;
use humantime::parse_duration;
#[cfg(feature = "acme")]
use ic_bn_lib_common::types::acme::{AcmeUrl, Challenge, DnsBackend};
Expand Down Expand Up @@ -97,6 +98,9 @@ pub struct Cli {
#[command(flatten, next_help_heading = "Shedding Latency")]
pub shed_latency: ShedShardedCli<RequestType>,

#[command(flatten, next_help_heading = "Prerender")]
pub prerender: Prerender,

#[cfg(all(target_os = "linux", feature = "sev-snp"))]
#[command(flatten, next_help_heading = "SEV-SNP")]
pub sev_snp: ic_bn_lib_common::types::utils::SevSnpCli,
Expand Down Expand Up @@ -562,6 +566,26 @@ pub struct RateLimit {
pub rate_limit_bypass_token: Option<String>,
}

#[derive(Args)]
pub struct Prerender {
/// Domains that are eligible for pre-prender.
/// If no domains specified - pre-render is not active.
#[clap(env, long)]
pub prerender_domains: Vec<FQDN>,

/// URL of the server-side renderer
#[clap(env, long)]
pub prerender_url: Option<Uri>,

/// Secret to authenticate with a pre-renderer
#[clap(env, long)]
pub prerender_secret: Option<HeaderValue>,

/// Timeout for executing pre-render request
#[clap(env, long, default_value = "1m", value_parser = parse_duration)]
pub prerender_timeout: Duration,
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
89 changes: 89 additions & 0 deletions src/routing/middleware/is_bot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use std::sync::Arc;

use axum::{
extract::{Request, State},
middleware::Next,
response::Response,
};
use http::header::USER_AGENT;
use isbot::Bots;

#[derive(Clone, Debug)]
pub struct IsBot(pub bool);

pub struct IsBotState {
bots: Bots,
}

impl Default for IsBotState {
fn default() -> Self {
let mut bots = Bots::default();
bots.append(&["GoogleOther"]);

Self { bots }
}
}

impl IsBotState {
pub fn is_bot(&self, ua: &str) -> bool {
self.bots.is_bot(ua)
}
}

pub async fn middleware(
State(state): State<Arc<IsBotState>>,
mut request: Request,
next: Next,
) -> Response {
let ua = request
.headers()
.get(USER_AGENT)
.and_then(|x| x.to_str().ok());

let is_bot = IsBot(ua.is_some_and(|x| state.is_bot(x)));
request.extensions_mut().insert(is_bot);

next.run(request).await
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_is_bot() {
let state = IsBotState::default();

let bots = &[
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36",
"Googlebot-Image/1.0",
"Googlebot-Video/1.0",
"Mozilla/5.0 (X11; Linux x86_64; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Safari/537.36",
"Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36",
"Mozilla/5.0 (compatible; Google-InspectionTool/1.0;)",
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Google-InspectionTool/1.0;)",
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; GoogleOther)",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.3; +https://openai.com/gptbot)",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/116.0.1938.76 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36; compatible; OAI-SearchBot/1.3; +https://openai.com/searchbot",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)",
];

for ua in bots {
assert!(state.is_bot(ua))
}

let browsers = &[
"Mozilla/5.0 (Linux; Android 15; SM-S931B Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/127.0.6533.103 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 15; SM-S931U Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36",
"Mozilla/5.0 (Android 15; Mobile; SM-G556B/DS; rv:130.0) Gecko/130.0 Firefox/130.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
];

for ua in browsers {
assert!(!state.is_bot(ua))
}
}
}
2 changes: 2 additions & 0 deletions src/routing/middleware/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ pub mod cors;
pub mod denylist;
pub mod geoip;
pub mod headers;
pub mod is_bot;
pub mod preprocess;
pub mod prerender;
pub mod request_id;
pub mod validate;
Loading
Loading