Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ src/schema.rs.orig
/blob-report/
/playwright/.cache/

docs/superpowers/
42 changes: 42 additions & 0 deletions Dockerfile.integration
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Image for running the crates.io server in integration tests.
# Debug build for fast compilation and full debug output.

ARG RUST_VERSION=1.94.1
Copy link
Copy Markdown
Member

@carols10cents carols10cents Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again I don't use docker, but this looks significantly different than the existing docker file at https://github.com/rust-lang/crates.io/blob/main/backend.Dockerfile, could you explain why this one is different?

View changes since the review


FROM rust:${RUST_VERSION}

RUN cargo install diesel_cli --version 2.3.7 --no-default-features --features postgres

WORKDIR /app
COPY . /app
RUN cargo build --bin server

RUN cp target/debug/server /usr/local/bin/crates-io-server

EXPOSE 8888

RUN cat > /diesel.toml << 'TOML'
[print_schema]
file = "/dev/null"
TOML

RUN cat > /entrypoint.sh << 'EOF'
#!/bin/sh
set -e

# Use a minimal diesel config that skips schema regeneration --
# the schema.rs is already baked into the binary at build time.
export DIESEL_CONFIG_FILE=/diesel.toml

until diesel migration run 2>&1; do
echo "waiting for postgres..." >&2
sleep 2
done

./script/init-local-index.sh 2>/dev/null || true

exec crates-io-server
EOF

RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
2 changes: 1 addition & 1 deletion crates/crates_io_database/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ claims = "=0.8.0"
crates_io_test_db = { path = "../crates_io_test_db" }
googletest = "=0.14.2"
insta = { version = "=1.47.2", features = ["filters", "json"] }
tokio = { version = "=1.52.0", features = ["macros", "rt"] }
tokio = { version = "=1.52.0", features = ["macros", "rt", "rt-multi-thread"] }
2 changes: 2 additions & 0 deletions crates/crates_io_database/src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub use self::email::{Email, NewEmail};
pub use self::follow::Follow;
pub use self::keyword::{CrateKeyword, Keyword};
pub use self::krate::{Crate, CrateName, NewCrate};
pub use self::oauth_provider::{OAuthProviderId, UnknownOAuthProvider};
pub use self::owner::{CrateOwner, Owner, OwnerKind};
pub use self::team::{NewTeam, Team};
pub use self::token::ApiToken;
Expand All @@ -35,6 +36,7 @@ mod email;
mod follow;
mod keyword;
pub mod krate;
mod oauth_provider;
mod owner;
pub mod team;
pub mod token;
Expand Down
102 changes: 102 additions & 0 deletions crates/crates_io_database/src/models/oauth_provider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use std::io::Write;
use std::str::FromStr;

use diesel::deserialize::{self, FromSql};
use diesel::pg::{Pg, PgValue};
use diesel::query_builder::QueryId;
use diesel::serialize::{self, IsNull, Output, ToSql};

use crate::schema::sql_types::OauthProvider as OauthProviderSql;

// Diesel's `#[derive(SqlType)]` does not emit `QueryId`. Binding an
// `OAuthProviderId` value into a query path requires it, so we implement it
// here rather than patching generated schema.rs.
impl QueryId for OauthProviderSql {
type QueryId = OauthProviderSql;
const HAS_STATIC_QUERY_ID: bool = true;
}

/// Identifier for an OAuth provider that a `User` can be associated with.
///
/// Maps to the `oauth_provider` Postgres enum type. The `OAuthProvider`
/// trait in the main crate represents provider *behavior*; this enum
/// represents provider *identity* (which provider a row refers to).
#[derive(
Debug,
Copy,
Clone,
PartialEq,
Eq,
Hash,
serde::Serialize,
diesel::FromSqlRow,
diesel::AsExpression,
)]
#[diesel(sql_type = OauthProviderSql)]
#[serde(rename_all = "snake_case")]
pub enum OAuthProviderId {
Github,
}

impl OAuthProviderId {
pub fn as_str(&self) -> &'static str {
match self {
OAuthProviderId::Github => "github",
}
}
}

impl FromStr for OAuthProviderId {
type Err = UnknownOAuthProvider;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"github" => Ok(OAuthProviderId::Github),
other => Err(UnknownOAuthProvider(other.to_string())),
}
}
}

#[derive(Debug, thiserror::Error)]
#[error("unknown oauth provider: {0}")]
pub struct UnknownOAuthProvider(pub String);

impl FromSql<OauthProviderSql, Pg> for OAuthProviderId {
fn from_sql(bytes: PgValue<'_>) -> deserialize::Result<Self> {
let s = std::str::from_utf8(bytes.as_bytes())?;
Ok(s.parse()?)
}
}

impl ToSql<OauthProviderSql, Pg> for OAuthProviderId {
fn to_sql(&self, out: &mut Output<'_, '_, Pg>) -> serialize::Result {
out.write_all(self.as_str().as_bytes())?;
Ok(IsNull::No)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn as_str_roundtrips_through_from_str() {
let s = OAuthProviderId::Github.as_str();
let parsed: OAuthProviderId = s.parse().expect("as_str output must parse back");
assert_eq!(parsed, OAuthProviderId::Github);
}

#[test]
fn from_str_rejects_unknown_provider() {
let err = "gitlab"
.parse::<OAuthProviderId>()
.expect_err("unknown provider must fail");
assert_eq!(err.0, "gitlab");
}

#[test]
fn serde_serializes_to_snake_case() {
let s = serde_json::to_string(&OAuthProviderId::Github).unwrap();
assert_eq!(s, "\"github\"");
}
}
191 changes: 188 additions & 3 deletions crates/crates_io_database/src/models/user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use diesel::upsert::excluded;
use diesel_async::{AsyncPgConnection, RunQueryDsl};
use serde::Serialize;

use crate::models::{Crate, CrateOwner, Email, Owner, OwnerKind};
use crate::models::{Crate, CrateOwner, Email, OAuthProviderId, Owner, OwnerKind};
use crate::schema::{crate_owners, emails, oauth_github, users};
use crates_io_diesel_helpers::lower;

Expand All @@ -25,6 +25,7 @@ pub struct User {
pub account_lock_until: Option<DateTime<Utc>>,
pub is_admin: bool,
pub publish_notifications: bool,
pub primary_oauth_provider: OAuthProviderId,
}

impl User {
Expand Down Expand Up @@ -54,6 +55,62 @@ impl User {
Ok(users.collect())
}

/// Look up a user by their external OAuth identity.
///
/// `provider` is the machine name of an OAuth provider (e.g., "github").
/// `account_id` is the provider-native identifier as a string; each
/// provider's storage table parses it into the column's native type
/// (GitHub uses BIGINT; Bitbucket will use TEXT).
///
/// Returns `Ok(None)` if no user matches. Returns `Ok(None)` (not an
/// error) when the account_id fails to parse for a provider that
/// expects a specific shape — the semantic is "is this a known user",
/// not "is this input well-formed".
pub async fn find_by_oauth_identity(
conn: &mut AsyncPgConnection,
provider: &str,
account_id: &str,
) -> QueryResult<Option<User>> {
match provider {
// Must match `crates_io::oauth::github_provider::PROVIDER_NAME`.
// Kept as a literal here becuase this crate can't depend on the
// main crate without creating a circular dependency.
"github" => {
let Ok(gh_id) = account_id.parse::<i64>() else {
tracing::debug!(
provider,
account_id,
"oauth identity lookup skipped: account_id not numeric",
);
return Ok(None);
};
users::table
.inner_join(oauth_github::table.on(oauth_github::user_id.eq(users::id)))
.filter(oauth_github::account_id.eq(gh_id))
.select(User::as_select())
.first(conn)
.await
.optional()
}
_ => Ok(None),
}
}

/// Fetches the encrypted OAuth token stored in `oauth_github` for this user.
///
/// All token reads now go through this table rather than `users.gh_encrypted_token`
/// so that the read-path works correctly after the Tier 1 identity cutover.
pub async fn github_encrypted_token(
&self,
conn: &mut AsyncPgConnection,
) -> QueryResult<Vec<u8>> {
oauth_github::table
.filter(oauth_github::user_id.eq(self.id))
.select(oauth_github::encrypted_token)
.first(conn)
.await
}

/// Queries the database for the verified emails
/// belonging to a given user
pub async fn verified_email(
Expand Down Expand Up @@ -91,12 +148,34 @@ pub struct NewUser<'a> {

impl NewUser<'_> {
/// Inserts the user into the database, or fails if the user already exists.
///
/// Also inserts a corresponding `oauth_github` row so that the token
/// read-path (which now reads from `oauth_github.encrypted_token` instead
/// of `users.gh_encrypted_token`) works without a full OAuth login flow.
pub async fn insert(&self, mut conn: &AsyncPgConnection) -> QueryResult<User> {
diesel::insert_into(users::table)
let user = diesel::insert_into(users::table)
.values(self)
.returning(User::as_returning())
.get_result(&mut conn)
.await
.await?;

diesel::insert_into(oauth_github::table)
.values((
oauth_github::account_id.eq(user.gh_id as i64),
oauth_github::user_id.eq(user.id),
oauth_github::login.eq(&user.gh_login),
oauth_github::encrypted_token.eq(&user.gh_encrypted_token),
))
.on_conflict(oauth_github::account_id)
// Update the token on conflict so the token read-path (which now
// reads from oauth_github.encrypted_token) always has a fresh value.
// do_nothing() would silently skip the update, leaving a stale token.
.do_update()
.set(oauth_github::encrypted_token.eq(excluded(oauth_github::encrypted_token)))
.execute(&mut conn)
.await?;

Ok(user)
}

/// Inserts the user into the database, or updates an existing one.
Expand Down Expand Up @@ -198,3 +277,109 @@ impl NewOauthGithub<'_> {
.await
}
}

#[cfg(test)]
mod tests {
use super::*;
use crates_io_test_db::TestDatabase;
use diesel_async::RunQueryDsl;

async fn setup() -> (TestDatabase, AsyncPgConnection) {
let db = TestDatabase::new();
let conn = db.async_connect().await;
(db, conn)
}

#[tokio::test(flavor = "multi_thread")]
async fn find_by_oauth_identity_returns_user_for_known_github_account() {
let (_db, mut conn) = setup().await;

let user_id = diesel::insert_into(users::table)
.values((
users::gh_id.eq(1001),
users::gh_login.eq("alice"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
))
.returning(users::id)
.get_result::<i32>(&mut conn)
.await
.unwrap();

diesel::insert_into(oauth_github::table)
.values((
oauth_github::account_id.eq(1001i64),
oauth_github::user_id.eq(user_id),
oauth_github::login.eq("alice"),
oauth_github::encrypted_token.eq(vec![0u8; 32]),
))
.execute(&mut conn)
.await
.unwrap();

let result = User::find_by_oauth_identity(&mut conn, "github", "1001")
.await
.unwrap();

assert!(result.is_some(), "expected Some(user), got None");
assert_eq!(result.unwrap().id, user_id);
}

#[tokio::test(flavor = "multi_thread")]
async fn find_by_oauth_identity_returns_none_for_unknown_provider() {
let (_db, mut conn) = setup().await;

let result = User::find_by_oauth_identity(&mut conn, "bitbucket", "some-account")
.await
.unwrap();

assert!(result.is_none(), "expected None for unknown provider, got {result:?}");
}

#[tokio::test(flavor = "multi_thread")]
async fn find_by_oauth_identity_rejects_non_numeric_github_account_id() {
let (_db, mut conn) = setup().await;

let result = User::find_by_oauth_identity(&mut conn, "github", "not-a-number")
.await
.unwrap();

assert!(
result.is_none(),
"expected Ok(None) for non-numeric github account_id, got {result:?}"
);
}

#[tokio::test(flavor = "multi_thread")]
async fn primary_oauth_provider_defaults_to_github_and_round_trips() {
let (_db, mut conn) = setup().await;

let defaulted_id = diesel::insert_into(users::table)
.values((
users::gh_id.eq(2001),
users::gh_login.eq("defaulted"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
))
.returning(users::id)
.get_result::<i32>(&mut conn)
.await
.unwrap();

let defaulted = User::find(&mut conn, defaulted_id).await.unwrap();
assert_eq!(defaulted.primary_oauth_provider, OAuthProviderId::Github);

let explicit_id = diesel::insert_into(users::table)
.values((
users::gh_id.eq(2002),
users::gh_login.eq("explicit"),
users::gh_encrypted_token.eq(vec![0u8; 32]),
users::primary_oauth_provider.eq(OAuthProviderId::Github),
))
.returning(users::id)
.get_result::<i32>(&mut conn)
.await
.unwrap();

let explicit = User::find(&mut conn, explicit_id).await.unwrap();
assert_eq!(explicit.primary_oauth_provider, OAuthProviderId::Github);
}
}
Loading