Skip to content

Commit dc0265b

Browse files
committed
database: only acquire exclusive lock for write operations
1 parent 343d937 commit dc0265b

1 file changed

Lines changed: 156 additions & 131 deletions

File tree

src/sess.rs

Lines changed: 156 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,16 @@ pub struct SessionIo<'sess, 'ctx: 'sess> {
599599
git_versions: Mutex<IndexMap<PathBuf, GitVersions<'ctx>>>,
600600
}
601601

602+
/// What accessing a dependency's git database requires.
603+
enum DbAction {
604+
/// The database exists and is up to date; only read access is needed.
605+
Ready,
606+
/// The database does not exist yet and must be initialized and fetched.
607+
Init,
608+
/// The database exists but must be fetched to pick up upstream updates.
609+
Fetch,
610+
}
611+
602612
impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
603613
/// Create a new session wrapper.
604614
pub fn new(sess: &'sess Session<'ctx>) -> SessionIo<'sess, 'ctx> {
@@ -666,46 +676,143 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
666676
}
667677
}
668678

669-
/// Access the git database for a dependency.
679+
/// Access the git database for a dependency, creating or updating it as
680+
/// needed.
670681
///
671-
/// Acquires the per-dependency filesystem lock and delegates to
672-
/// [`git_database_locked`]. Callers that already hold the lock for this
673-
/// dependency should call [`git_database_locked`] directly to avoid
674-
/// deadlocking.
682+
/// Locking is fine-grained. The common case -- the database already exists
683+
/// and is current -- only takes a shared lock, so concurrent read-only
684+
/// callers run in parallel instead of serializing. Only when an
685+
/// initialization or fetch is actually required is the lock escalated to
686+
/// exclusive, and the database state is re-checked under it in case another
687+
/// process performed the work while we were briefly unlocked. The lock is
688+
/// released before returning; the returned handle is used for read-only
689+
/// access. Callers that themselves mutate (e.g. a checkout holding a
690+
/// long-lived shared lock and fetching on a fallback path) manage their own
691+
/// locking around this call.
675692
async fn git_database(
676693
&'io self,
677694
name: &str,
678695
url: &str,
679696
force_fetch: Option<bool>,
680697
fetch_ref: Option<&str>,
681698
) -> Result<Git<'ctx>> {
682-
let _lock = FsLock::acquire_exclusive(self.sess.dep_lock_path(name, url)).await?;
683-
self.git_database_locked(name, url, force_fetch, fetch_ref)
684-
.await
699+
self.sess.stats.num_calls_git_database.increment();
700+
// TODO: Make the assembled future shared and keep it in a lookup table.
701+
// Then use that table to return the future if it already exists.
702+
// This ensures that the gitdb is setup only once, and makes the
703+
// whole process faster for later calls.
704+
let lock_path = self.sess.dep_lock_path(name, url);
705+
706+
// Fast path: under a shared lock, check whether the database is already
707+
// present and up to date. If so, nothing has to be mutated and we can
708+
// return right away, letting concurrent readers run in parallel.
709+
let shared = FsLock::acquire_shared(lock_path.clone()).await?;
710+
let (git, action) = self.git_database_action(name, url, force_fetch)?;
711+
if let DbAction::Ready = action {
712+
return Ok(git);
713+
}
714+
drop(shared);
715+
716+
// A mutation (initialization or fetch) is required. Take the exclusive
717+
// lock and re-evaluate: another process may have completed the work
718+
// while we were briefly unlocked, in which case there is nothing to do.
719+
let _exclusive = FsLock::acquire_exclusive(lock_path).await?;
720+
let (git, action) = self.git_database_action(name, url, force_fetch)?;
721+
match action {
722+
DbAction::Ready => Ok(git),
723+
DbAction::Init => {
724+
ensure!(
725+
!self.sess.local_only,
726+
help = "Re-run without `--local`, or provide a local path dependency.",
727+
"Cannot initialize git dependency while `--local` is enabled."
728+
);
729+
log::info!("initializing git database for {} from {}", name, url);
730+
self.sess.stats.num_database_init.increment();
731+
// The progress bar object for cloning. We only use it for the
732+
// last fetch operation, which is the only network operation here.
733+
let pb = Some(ProgressHandler::new(
734+
self.sess.multiprogress.clone(),
735+
GitProgressOps::Clone,
736+
name,
737+
));
738+
git.clone()
739+
.spawn_with(|c| c.arg("init").arg("--bare"), None)
740+
.await?;
741+
// Disable automatic garbage collection on the shared database.
742+
// Checkouts created with `git clone --shared` borrow objects from
743+
// this database via `objects/info/alternates`, so pruning here
744+
// could corrupt live checkouts that reference those objects.
745+
git.clone()
746+
.spawn_with(|c| c.arg("config").arg("gc.auto").arg("0"), None)
747+
.await?;
748+
git.clone()
749+
.spawn_with(|c| c.arg("remote").arg("add").arg("origin").arg(url), None)
750+
.await?;
751+
git.clone()
752+
.fetch("origin", pb)
753+
.and_then(|_| async {
754+
if let Some(reference) = fetch_ref {
755+
git.clone().fetch_ref("origin", reference, None).await
756+
} else {
757+
Ok(())
758+
}
759+
})
760+
.await
761+
.map_err(move |cause| {
762+
Error::from(SessionErrors::GitDatabaseAccess(
763+
"initialize",
764+
url.contains("git@"),
765+
cause.into(),
766+
))
767+
})
768+
.map(move |_| git)
769+
}
770+
DbAction::Fetch => {
771+
log::info!("fetching updates for {} from {}", name, url);
772+
self.sess.stats.num_database_fetch.increment();
773+
// The progress bar object for fetching.
774+
let pb = Some(ProgressHandler::new(
775+
self.sess.multiprogress.clone(),
776+
GitProgressOps::Fetch,
777+
name,
778+
));
779+
git.clone()
780+
.fetch("origin", pb)
781+
.and_then(|_| async {
782+
if let Some(reference) = fetch_ref {
783+
git.clone().fetch_ref("origin", reference, None).await
784+
} else {
785+
Ok(())
786+
}
787+
})
788+
.await
789+
.map_err(move |cause| {
790+
Error::from(SessionErrors::GitDatabaseAccess(
791+
"update",
792+
url.contains("git@"),
793+
cause.into(),
794+
))
795+
})
796+
.map(move |_| git)
797+
}
798+
}
685799
}
686800

687-
/// Access the git database for a dependency, assuming the per-dependency
688-
/// filesystem lock is already held by the caller.
801+
/// Locate the bare git database for a dependency and decide what, if
802+
/// anything, has to be done to bring it up to date.
689803
///
690-
/// If the database does not exist, it is created. If the database has not
691-
/// been updated recently, the remote is fetched.
692-
async fn git_database_locked(
804+
/// This only inspects the database (creating its directory if missing); it
805+
/// never mutates git objects, so it is safe to evaluate under a shared lock.
806+
/// The fetch decision mirrors `force_fetch`: `Some(true)` always fetches,
807+
/// `Some(false)` never fetches, and `None` fetches only if the manifest has
808+
/// been modified since the last fetch.
809+
fn git_database_action(
693810
&'io self,
694811
name: &str,
695812
url: &str,
696813
force_fetch: Option<bool>,
697-
fetch_ref: Option<&str>,
698-
) -> Result<Git<'ctx>> {
699-
// TODO: Make the assembled future shared and keep it in a lookup table.
700-
// Then use that table to return the future if it already exists.
701-
// This ensures that the gitdb is setup only once, and makes the
702-
// whole process faster for later calls.
703-
self.sess.stats.num_calls_git_database.increment();
704-
814+
) -> Result<(Git<'ctx>, DbAction)> {
705815
let db_name = self.sess.git_db_name(name, url);
706-
707-
// Determine the location of the git database and create it if its does
708-
// not yet exist.
709816
let db_dir = self.sess.db_parent().join("git").join("db").join(db_name);
710817
let db_dir = self.sess.intern_path(db_dir);
711818
std::fs::create_dir_all(db_dir)
@@ -716,99 +823,22 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
716823
&self.sess.config.git,
717824
self.sess.git_throttle.clone(),
718825
);
719-
720-
// Either initialize the repository or update it if needed.
721-
if !db_dir.join("config").exists() {
722-
ensure!(
723-
!self.sess.local_only,
724-
help = "Re-run without `--local`, or provide a local path dependency.",
725-
"Cannot initialize git dependency while `--local` is enabled."
726-
);
727-
// Initialize.
728-
log::info!("initializing git database for {} from {}", name, url);
729-
self.sess.stats.num_database_init.increment();
730-
// The progress bar object for cloning. We only use it for the
731-
// last fetch operation, which is the only network operation here.
732-
let pb = Some(ProgressHandler::new(
733-
self.sess.multiprogress.clone(),
734-
GitProgressOps::Clone,
735-
name,
736-
));
737-
git.clone()
738-
.spawn_with(|c| c.arg("init").arg("--bare"), None)
739-
.await?;
740-
// Disable automatic garbage collection on the shared database.
741-
// Checkouts created with `git clone --shared` borrow objects from
742-
// this database via `objects/info/alternates`, so pruning here
743-
// could corrupt live checkouts that reference those objects.
744-
git.clone()
745-
.spawn_with(|c| c.arg("config").arg("gc.auto").arg("0"), None)
746-
.await?;
747-
git.clone()
748-
.spawn_with(|c| c.arg("remote").arg("add").arg("origin").arg(url), None)
749-
.await?;
750-
git.clone()
751-
.fetch("origin", pb)
752-
.and_then(|_| async {
753-
if let Some(reference) = fetch_ref {
754-
git.clone().fetch_ref("origin", reference, None).await
755-
} else {
756-
Ok(())
757-
}
758-
})
759-
.await
760-
.map_err(move |cause| {
761-
Error::from(SessionErrors::GitDatabaseAccess(
762-
"initialize",
763-
url.contains("git@"),
764-
cause.into(),
765-
))
766-
})
767-
.map(move |_| git)
826+
let action = if !db_dir.join("config").exists() {
827+
DbAction::Init
768828
} else {
769-
// Update if the manifest has been modified since the last fetch.
770829
let db_mtime = try_modification_time(db_dir.join("FETCH_HEAD"));
771830
let skip_fetch = match force_fetch {
772831
Some(true) => false,
773832
Some(false) => true,
774833
None => self.sess.manifest_mtime < db_mtime,
775834
};
776835
if skip_fetch || self.sess.local_only {
777-
log::info!(
778-
"skipping fetch of {:?} (skip_fetch={}, local_only={})",
779-
db_dir,
780-
skip_fetch,
781-
self.sess.local_only
782-
);
783-
return Ok(git);
836+
DbAction::Ready
837+
} else {
838+
DbAction::Fetch
784839
}
785-
log::info!("fetching updates for {} from {}", name, url);
786-
self.sess.stats.num_database_fetch.increment();
787-
// The progress bar object for fetching.
788-
let pb = Some(ProgressHandler::new(
789-
self.sess.multiprogress.clone(),
790-
GitProgressOps::Fetch,
791-
name,
792-
));
793-
git.clone()
794-
.fetch("origin", pb)
795-
.and_then(|_| async {
796-
if let Some(reference) = fetch_ref {
797-
git.clone().fetch_ref("origin", reference, None).await
798-
} else {
799-
Ok(())
800-
}
801-
})
802-
.await
803-
.map_err(move |cause| {
804-
Error::from(SessionErrors::GitDatabaseAccess(
805-
"update",
806-
url.contains("git@"),
807-
cause.into(),
808-
))
809-
})
810-
.map(move |_| git)
811-
}
840+
};
841+
Ok((git, action))
812842
}
813843

814844
/// Determine the list of versions available for a git dependency.
@@ -976,10 +1006,10 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
9761006
url: &str,
9771007
revision: &str,
9781008
) -> Result<()> {
1009+
// Make sure the database exists, then take the exclusive lock for the
1010+
// by-hash fetch, which is the only mutation here.
1011+
let git = self.git_database(name, url, Some(false), None).await?;
9791012
let _wlock = FsLock::acquire_exclusive(self.sess.dep_lock_path(name, url)).await?;
980-
let git = self
981-
.git_database_locked(name, url, Some(false), None)
982-
.await?;
9831013
let pb = Some(ProgressHandler::new(
9841014
self.sess.multiprogress.clone(),
9851015
GitProgressOps::Fetch,
@@ -1051,12 +1081,15 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
10511081
force: bool,
10521082
update_list: &[String],
10531083
) -> Result<&'ctx Path> {
1054-
// A checkout reads from the shared bare database (via `git clone
1055-
// --shared`) but never writes to it, so a shared lock is enough and
1056-
// lets concurrent invocations check out in parallel against a warm,
1057-
// pre-populated database. The only writer path is the fallback fetch
1058-
// below (when the locked revision is missing from the database), which
1059-
// briefly drops this lock and takes an exclusive one of its own.
1084+
// Resolve (and, if necessary, initialize) the bare database before
1085+
// taking the lock we hold across the checkout. A checkout reads from the
1086+
// database (via `git clone --shared`) but never writes to it, so a
1087+
// shared lock is enough and lets concurrent invocations check out in
1088+
// parallel against a warm, pre-populated database. The only writer path
1089+
// is the fallback fetch below (when the locked revision is missing from
1090+
// the database), which briefly drops this lock and takes an exclusive
1091+
// one of its own.
1092+
let db = self.git_database(name, url, Some(false), None).await?;
10601093
let mut db_lock = Some(FsLock::acquire_shared(self.sess.dep_lock_path(name, url)).await?);
10611094

10621095
#[derive(Eq, PartialEq)]
@@ -1074,15 +1107,9 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
10741107
log::debug!("currently `{}` (want `{}`)", current, revision);
10751108
if current == revision {
10761109
CheckoutState::Clean
1077-
} else if let Ok(db) =
1078-
self.git_database_locked(name, url, Some(false), None).await
1079-
{
1080-
if let Ok(remote) = local_git.clone().remote_url("origin").await {
1081-
if remote == db.path.to_str().unwrap() {
1082-
CheckoutState::ToCheckout
1083-
} else {
1084-
CheckoutState::ToClone
1085-
}
1110+
} else if let Ok(remote) = local_git.clone().remote_url("origin").await {
1111+
if remote == db.path.to_str().unwrap() {
1112+
CheckoutState::ToCheckout
10861113
} else {
10871114
CheckoutState::ToClone
10881115
}
@@ -1140,11 +1167,9 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
11401167
// check under the shared lock. If the revision is missing (e.g. the
11411168
// cached database predates the locked commit), it must be fetched,
11421169
// which mutates the database: drop the shared lock, fetch via
1143-
// `git_database` (which locks exclusively), then re-take the shared
1144-
// lock and continue.
1145-
let mut git = self
1146-
.git_database_locked(name, url, Some(false), None)
1147-
.await?;
1170+
// `git_database` (which escalates to an exclusive lock internally),
1171+
// then re-take the shared lock and continue.
1172+
let mut git = db.clone();
11481173
let revision_check = revision.to_string();
11491174
let have_commit = git
11501175
.clone()

0 commit comments

Comments
 (0)