@@ -599,6 +599,16 @@ pub struct SessionIo<'sess, 'ctx: 'sess> {
599599 git_versions : Mutex < IndexMap < PathBuf , GitVersions < ' ctx > > > ,
600600}
601601
602+ /// What accessing a dependency's git database requires.
603+ enum DbAction {
604+ /// The database exists and is up to date; only read access is needed.
605+ Ready ,
606+ /// The database does not exist yet and must be initialized and fetched.
607+ Init ,
608+ /// The database exists but must be fetched to pick up upstream updates.
609+ Fetch ,
610+ }
611+
602612impl < ' io , ' sess : ' io , ' ctx : ' sess > SessionIo < ' sess , ' ctx > {
603613 /// Create a new session wrapper.
604614 pub fn new ( sess : & ' sess Session < ' ctx > ) -> SessionIo < ' sess , ' ctx > {
@@ -666,46 +676,143 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
666676 }
667677 }
668678
669- /// Access the git database for a dependency.
679+ /// Access the git database for a dependency, creating or updating it as
680+ /// needed.
670681 ///
671- /// Acquires the per-dependency filesystem lock and delegates to
672- /// [`git_database_locked`]. Callers that already hold the lock for this
673- /// dependency should call [`git_database_locked`] directly to avoid
674- /// deadlocking.
682+ /// Locking is fine-grained. The common case -- the database already exists
683+ /// and is current -- only takes a shared lock, so concurrent read-only
684+ /// callers run in parallel instead of serializing. Only when an
685+ /// initialization or fetch is actually required is the lock escalated to
686+ /// exclusive, and the database state is re-checked under it in case another
687+ /// process performed the work while we were briefly unlocked. The lock is
688+ /// released before returning; the returned handle is used for read-only
689+ /// access. Callers that themselves mutate (e.g. a checkout holding a
690+ /// long-lived shared lock and fetching on a fallback path) manage their own
691+ /// locking around this call.
675692 async fn git_database (
676693 & ' io self ,
677694 name : & str ,
678695 url : & str ,
679696 force_fetch : Option < bool > ,
680697 fetch_ref : Option < & str > ,
681698 ) -> Result < Git < ' ctx > > {
682- let _lock = FsLock :: acquire_exclusive ( self . sess . dep_lock_path ( name, url) ) . await ?;
683- self . git_database_locked ( name, url, force_fetch, fetch_ref)
684- . await
699+ self . sess . stats . num_calls_git_database . increment ( ) ;
700+ // TODO: Make the assembled future shared and keep it in a lookup table.
701+ // Then use that table to return the future if it already exists.
702+ // This ensures that the gitdb is setup only once, and makes the
703+ // whole process faster for later calls.
704+ let lock_path = self . sess . dep_lock_path ( name, url) ;
705+
706+ // Fast path: under a shared lock, check whether the database is already
707+ // present and up to date. If so, nothing has to be mutated and we can
708+ // return right away, letting concurrent readers run in parallel.
709+ let shared = FsLock :: acquire_shared ( lock_path. clone ( ) ) . await ?;
710+ let ( git, action) = self . git_database_action ( name, url, force_fetch) ?;
711+ if let DbAction :: Ready = action {
712+ return Ok ( git) ;
713+ }
714+ drop ( shared) ;
715+
716+ // A mutation (initialization or fetch) is required. Take the exclusive
717+ // lock and re-evaluate: another process may have completed the work
718+ // while we were briefly unlocked, in which case there is nothing to do.
719+ let _exclusive = FsLock :: acquire_exclusive ( lock_path) . await ?;
720+ let ( git, action) = self . git_database_action ( name, url, force_fetch) ?;
721+ match action {
722+ DbAction :: Ready => Ok ( git) ,
723+ DbAction :: Init => {
724+ ensure ! (
725+ !self . sess. local_only,
726+ help = "Re-run without `--local`, or provide a local path dependency." ,
727+ "Cannot initialize git dependency while `--local` is enabled."
728+ ) ;
729+ log:: info!( "initializing git database for {} from {}" , name, url) ;
730+ self . sess . stats . num_database_init . increment ( ) ;
731+ // The progress bar object for cloning. We only use it for the
732+ // last fetch operation, which is the only network operation here.
733+ let pb = Some ( ProgressHandler :: new (
734+ self . sess . multiprogress . clone ( ) ,
735+ GitProgressOps :: Clone ,
736+ name,
737+ ) ) ;
738+ git. clone ( )
739+ . spawn_with ( |c| c. arg ( "init" ) . arg ( "--bare" ) , None )
740+ . await ?;
741+ // Disable automatic garbage collection on the shared database.
742+ // Checkouts created with `git clone --shared` borrow objects from
743+ // this database via `objects/info/alternates`, so pruning here
744+ // could corrupt live checkouts that reference those objects.
745+ git. clone ( )
746+ . spawn_with ( |c| c. arg ( "config" ) . arg ( "gc.auto" ) . arg ( "0" ) , None )
747+ . await ?;
748+ git. clone ( )
749+ . spawn_with ( |c| c. arg ( "remote" ) . arg ( "add" ) . arg ( "origin" ) . arg ( url) , None )
750+ . await ?;
751+ git. clone ( )
752+ . fetch ( "origin" , pb)
753+ . and_then ( |_| async {
754+ if let Some ( reference) = fetch_ref {
755+ git. clone ( ) . fetch_ref ( "origin" , reference, None ) . await
756+ } else {
757+ Ok ( ( ) )
758+ }
759+ } )
760+ . await
761+ . map_err ( move |cause| {
762+ Error :: from ( SessionErrors :: GitDatabaseAccess (
763+ "initialize" ,
764+ url. contains ( "git@" ) ,
765+ cause. into ( ) ,
766+ ) )
767+ } )
768+ . map ( move |_| git)
769+ }
770+ DbAction :: Fetch => {
771+ log:: info!( "fetching updates for {} from {}" , name, url) ;
772+ self . sess . stats . num_database_fetch . increment ( ) ;
773+ // The progress bar object for fetching.
774+ let pb = Some ( ProgressHandler :: new (
775+ self . sess . multiprogress . clone ( ) ,
776+ GitProgressOps :: Fetch ,
777+ name,
778+ ) ) ;
779+ git. clone ( )
780+ . fetch ( "origin" , pb)
781+ . and_then ( |_| async {
782+ if let Some ( reference) = fetch_ref {
783+ git. clone ( ) . fetch_ref ( "origin" , reference, None ) . await
784+ } else {
785+ Ok ( ( ) )
786+ }
787+ } )
788+ . await
789+ . map_err ( move |cause| {
790+ Error :: from ( SessionErrors :: GitDatabaseAccess (
791+ "update" ,
792+ url. contains ( "git@" ) ,
793+ cause. into ( ) ,
794+ ) )
795+ } )
796+ . map ( move |_| git)
797+ }
798+ }
685799 }
686800
687- /// Access the git database for a dependency, assuming the per-dependency
688- /// filesystem lock is already held by the caller .
801+ /// Locate the bare git database for a dependency and decide what, if
802+ /// anything, has to be done to bring it up to date .
689803 ///
690- /// If the database does not exist, it is created. If the database has not
691- /// been updated recently, the remote is fetched.
692- async fn git_database_locked (
804+ /// This only inspects the database (creating its directory if missing); it
805+ /// never mutates git objects, so it is safe to evaluate under a shared lock.
806+ /// The fetch decision mirrors `force_fetch`: `Some(true)` always fetches,
807+ /// `Some(false)` never fetches, and `None` fetches only if the manifest has
808+ /// been modified since the last fetch.
809+ fn git_database_action (
693810 & ' io self ,
694811 name : & str ,
695812 url : & str ,
696813 force_fetch : Option < bool > ,
697- fetch_ref : Option < & str > ,
698- ) -> Result < Git < ' ctx > > {
699- // TODO: Make the assembled future shared and keep it in a lookup table.
700- // Then use that table to return the future if it already exists.
701- // This ensures that the gitdb is setup only once, and makes the
702- // whole process faster for later calls.
703- self . sess . stats . num_calls_git_database . increment ( ) ;
704-
814+ ) -> Result < ( Git < ' ctx > , DbAction ) > {
705815 let db_name = self . sess . git_db_name ( name, url) ;
706-
707- // Determine the location of the git database and create it if its does
708- // not yet exist.
709816 let db_dir = self . sess . db_parent ( ) . join ( "git" ) . join ( "db" ) . join ( db_name) ;
710817 let db_dir = self . sess . intern_path ( db_dir) ;
711818 std:: fs:: create_dir_all ( db_dir)
@@ -716,99 +823,22 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
716823 & self . sess . config . git ,
717824 self . sess . git_throttle . clone ( ) ,
718825 ) ;
719-
720- // Either initialize the repository or update it if needed.
721- if !db_dir. join ( "config" ) . exists ( ) {
722- ensure ! (
723- !self . sess. local_only,
724- help = "Re-run without `--local`, or provide a local path dependency." ,
725- "Cannot initialize git dependency while `--local` is enabled."
726- ) ;
727- // Initialize.
728- log:: info!( "initializing git database for {} from {}" , name, url) ;
729- self . sess . stats . num_database_init . increment ( ) ;
730- // The progress bar object for cloning. We only use it for the
731- // last fetch operation, which is the only network operation here.
732- let pb = Some ( ProgressHandler :: new (
733- self . sess . multiprogress . clone ( ) ,
734- GitProgressOps :: Clone ,
735- name,
736- ) ) ;
737- git. clone ( )
738- . spawn_with ( |c| c. arg ( "init" ) . arg ( "--bare" ) , None )
739- . await ?;
740- // Disable automatic garbage collection on the shared database.
741- // Checkouts created with `git clone --shared` borrow objects from
742- // this database via `objects/info/alternates`, so pruning here
743- // could corrupt live checkouts that reference those objects.
744- git. clone ( )
745- . spawn_with ( |c| c. arg ( "config" ) . arg ( "gc.auto" ) . arg ( "0" ) , None )
746- . await ?;
747- git. clone ( )
748- . spawn_with ( |c| c. arg ( "remote" ) . arg ( "add" ) . arg ( "origin" ) . arg ( url) , None )
749- . await ?;
750- git. clone ( )
751- . fetch ( "origin" , pb)
752- . and_then ( |_| async {
753- if let Some ( reference) = fetch_ref {
754- git. clone ( ) . fetch_ref ( "origin" , reference, None ) . await
755- } else {
756- Ok ( ( ) )
757- }
758- } )
759- . await
760- . map_err ( move |cause| {
761- Error :: from ( SessionErrors :: GitDatabaseAccess (
762- "initialize" ,
763- url. contains ( "git@" ) ,
764- cause. into ( ) ,
765- ) )
766- } )
767- . map ( move |_| git)
826+ let action = if !db_dir. join ( "config" ) . exists ( ) {
827+ DbAction :: Init
768828 } else {
769- // Update if the manifest has been modified since the last fetch.
770829 let db_mtime = try_modification_time ( db_dir. join ( "FETCH_HEAD" ) ) ;
771830 let skip_fetch = match force_fetch {
772831 Some ( true ) => false ,
773832 Some ( false ) => true ,
774833 None => self . sess . manifest_mtime < db_mtime,
775834 } ;
776835 if skip_fetch || self . sess . local_only {
777- log:: info!(
778- "skipping fetch of {:?} (skip_fetch={}, local_only={})" ,
779- db_dir,
780- skip_fetch,
781- self . sess. local_only
782- ) ;
783- return Ok ( git) ;
836+ DbAction :: Ready
837+ } else {
838+ DbAction :: Fetch
784839 }
785- log:: info!( "fetching updates for {} from {}" , name, url) ;
786- self . sess . stats . num_database_fetch . increment ( ) ;
787- // The progress bar object for fetching.
788- let pb = Some ( ProgressHandler :: new (
789- self . sess . multiprogress . clone ( ) ,
790- GitProgressOps :: Fetch ,
791- name,
792- ) ) ;
793- git. clone ( )
794- . fetch ( "origin" , pb)
795- . and_then ( |_| async {
796- if let Some ( reference) = fetch_ref {
797- git. clone ( ) . fetch_ref ( "origin" , reference, None ) . await
798- } else {
799- Ok ( ( ) )
800- }
801- } )
802- . await
803- . map_err ( move |cause| {
804- Error :: from ( SessionErrors :: GitDatabaseAccess (
805- "update" ,
806- url. contains ( "git@" ) ,
807- cause. into ( ) ,
808- ) )
809- } )
810- . map ( move |_| git)
811- }
840+ } ;
841+ Ok ( ( git, action) )
812842 }
813843
814844 /// Determine the list of versions available for a git dependency.
@@ -976,10 +1006,10 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
9761006 url : & str ,
9771007 revision : & str ,
9781008 ) -> Result < ( ) > {
1009+ // Make sure the database exists, then take the exclusive lock for the
1010+ // by-hash fetch, which is the only mutation here.
1011+ let git = self . git_database ( name, url, Some ( false ) , None ) . await ?;
9791012 let _wlock = FsLock :: acquire_exclusive ( self . sess . dep_lock_path ( name, url) ) . await ?;
980- let git = self
981- . git_database_locked ( name, url, Some ( false ) , None )
982- . await ?;
9831013 let pb = Some ( ProgressHandler :: new (
9841014 self . sess . multiprogress . clone ( ) ,
9851015 GitProgressOps :: Fetch ,
@@ -1051,12 +1081,15 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
10511081 force : bool ,
10521082 update_list : & [ String ] ,
10531083 ) -> Result < & ' ctx Path > {
1054- // A checkout reads from the shared bare database (via `git clone
1055- // --shared`) but never writes to it, so a shared lock is enough and
1056- // lets concurrent invocations check out in parallel against a warm,
1057- // pre-populated database. The only writer path is the fallback fetch
1058- // below (when the locked revision is missing from the database), which
1059- // briefly drops this lock and takes an exclusive one of its own.
1084+ // Resolve (and, if necessary, initialize) the bare database before
1085+ // taking the lock we hold across the checkout. A checkout reads from the
1086+ // database (via `git clone --shared`) but never writes to it, so a
1087+ // shared lock is enough and lets concurrent invocations check out in
1088+ // parallel against a warm, pre-populated database. The only writer path
1089+ // is the fallback fetch below (when the locked revision is missing from
1090+ // the database), which briefly drops this lock and takes an exclusive
1091+ // one of its own.
1092+ let db = self . git_database ( name, url, Some ( false ) , None ) . await ?;
10601093 let mut db_lock = Some ( FsLock :: acquire_shared ( self . sess . dep_lock_path ( name, url) ) . await ?) ;
10611094
10621095 #[ derive( Eq , PartialEq ) ]
@@ -1074,15 +1107,9 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
10741107 log:: debug!( "currently `{}` (want `{}`)" , current, revision) ;
10751108 if current == revision {
10761109 CheckoutState :: Clean
1077- } else if let Ok ( db) =
1078- self . git_database_locked ( name, url, Some ( false ) , None ) . await
1079- {
1080- if let Ok ( remote) = local_git. clone ( ) . remote_url ( "origin" ) . await {
1081- if remote == db. path . to_str ( ) . unwrap ( ) {
1082- CheckoutState :: ToCheckout
1083- } else {
1084- CheckoutState :: ToClone
1085- }
1110+ } else if let Ok ( remote) = local_git. clone ( ) . remote_url ( "origin" ) . await {
1111+ if remote == db. path . to_str ( ) . unwrap ( ) {
1112+ CheckoutState :: ToCheckout
10861113 } else {
10871114 CheckoutState :: ToClone
10881115 }
@@ -1140,11 +1167,9 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> {
11401167 // check under the shared lock. If the revision is missing (e.g. the
11411168 // cached database predates the locked commit), it must be fetched,
11421169 // which mutates the database: drop the shared lock, fetch via
1143- // `git_database` (which locks exclusively), then re-take the shared
1144- // lock and continue.
1145- let mut git = self
1146- . git_database_locked ( name, url, Some ( false ) , None )
1147- . await ?;
1170+ // `git_database` (which escalates to an exclusive lock internally),
1171+ // then re-take the shared lock and continue.
1172+ let mut git = db. clone ( ) ;
11481173 let revision_check = revision. to_string ( ) ;
11491174 let have_commit = git
11501175 . clone ( )
0 commit comments