Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion gcp/workers/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def checkout(self, source_repo):
source_repo.repo_url,
os.path.join(self._sources_dir, source_repo.name),
git_callbacks=self._git_callbacks(source_repo),
branch=source_repo.repo_branch)
branch=source_repo.repo_branch,
force_update=True)

def _vuln_ids_from_gcs_blob(self, client: storage.Client,
source_repo: osv.SourceRepository,
Expand Down
15 changes: 11 additions & 4 deletions go/cmd/gitter/gitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func isAuthError(err error) bool {
(strings.Contains(strings.ToLower(errString), "repository") && strings.Contains(strings.ToLower(errString), "not found"))
}

func fetchBlob(ctx context.Context, url string) ([]byte, error) {
func fetchBlob(ctx context.Context, url string, forceUpdate bool) ([]byte, error) {
repoDirName := getRepoDirName(url)
repoPath := path.Join(gitStorePath, repoDirName)
archivePath := repoPath + ".zst"
Expand All @@ -113,7 +113,7 @@ func fetchBlob(ctx context.Context, url string) ([]byte, error) {
lastFetchMu.Unlock()

// Check if we need to fetch
if !ok || time.Since(accessTime) > fetchTimeout {
if forceUpdate || !ok || time.Since(accessTime) > fetchTimeout {
logger.Info("Fetching git blob", slog.String("url", url), slog.Duration("sinceAccessTime", time.Since(accessTime)))
if _, err := os.Stat(path.Join(repoPath, ".git")); os.IsNotExist(err) {
// Clone
Expand Down Expand Up @@ -240,8 +240,9 @@ func gitHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Missing url parameter", http.StatusBadRequest)
return
}
forceUpdate := r.URL.Query().Get("force-update") == "true"

logger.Info("Received request", slog.String("url", url), slog.String("remoteAddr", r.RemoteAddr))
logger.Info("Received request", slog.String("url", url), slog.Bool("forceUpdate", forceUpdate), slog.String("remoteAddr", r.RemoteAddr))
// If request came from a local ip, don't do the check
if !isLocalRequest(r) {
// Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp://
Expand All @@ -251,9 +252,15 @@ func gitHandler(w http.ResponseWriter, r *http.Request) {
}
}

// Keep the key as the url regardless of forceUpdate.
// Occasionally this could be problematic if an existing unforce updated
// query is already inplace, no force update will happen.
// That is highly unlikely in our use case, as importer only queries
// the repo once, and always with force update.
// This is a tradeoff for simplicity to avoid having to setup locks per repo.
//nolint:contextcheck // I can't change singleflight's interface
fileData, err, _ := g.Do(url, func() (any, error) {
return fetchBlob(r.Context(), url)
return fetchBlob(r.Context(), url, forceUpdate)
})

if err != nil {
Expand Down
44 changes: 33 additions & 11 deletions osv/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,28 @@ def open_repo(checkout_dir):
return repo


def clone(git_url, checkout_dir, git_callbacks=None, blobless=False):
def clone(git_url,
checkout_dir,
git_callbacks=None,
blobless=False,
force_update=False):
"""Perform a clone."""
# Don't user Gitter for oss-fuzz-vulns repo because it requires auth
logging.info('Cloning %s to %s.', git_url, checkout_dir)
if GITTER_HOST and git_url != 'ssh://github.com/google/oss-fuzz-vulns':
try:
os.makedirs(checkout_dir, exist_ok=True)
params = {'url': _git_mirror(git_url)}
if force_update:
params['force-update'] = 'true'

# Long timeout duration (1hr) because it could be cloning a large repo
resp = requests.get(
f'{GITTER_HOST}/getgit',
params={'url': _git_mirror(git_url)},
params=params,
stream=True,
timeout=3600
) # Long timeout duration (1hr) because it could be cloning a large repo
timeout=3600,
)
if resp.status_code == 403:
raise RepoInaccessibleError()
if resp.status_code == 400:
Expand Down Expand Up @@ -189,13 +198,19 @@ def clone_with_retries(git_url,
checkout_dir,
git_callbacks=None,
branch=None,
blobless=False):
blobless=False,
force_update=False):
"""Clone with retries."""
logging.info('Cloning %s to %s', git_url, checkout_dir)
os.makedirs(checkout_dir, exist_ok=True)
for attempt in range(CLONE_TRIES):
try:
repo = clone(git_url, checkout_dir, git_callbacks, blobless=blobless)
repo = clone(
git_url,
checkout_dir,
git_callbacks,
blobless=blobless,
force_update=force_update)
repo.cache = {}
if branch:
_checkout_branch(repo, branch)
Expand All @@ -216,7 +231,8 @@ def clone_with_retries(git_url,
def _use_existing_checkout(git_url,
checkout_dir,
git_callbacks=None,
branch=None):
branch=None,
force_update=False):
"""Update and use existing checkout."""
repo = open_repo(checkout_dir)
repo.cache = {}
Expand All @@ -235,7 +251,7 @@ def _use_existing_checkout(git_url,
raise NoBranchError('Branch "%s" not found in repo "%s"' %
(branch, git_url)) from e

reset_repo(repo, git_callbacks)
reset_repo(repo, git_callbacks, force=force_update)
logging.info('Using existing checkout at %s', checkout_dir)
return repo

Expand All @@ -244,13 +260,18 @@ def ensure_updated_checkout(git_url,
checkout_dir,
git_callbacks=None,
branch=None,
blobless=False):
blobless=False,
force_update=False):
"""Ensure updated checkout."""
if os.path.exists(checkout_dir):
# Already exists, reset and checkout latest revision.
try:
return _use_existing_checkout(
git_url, checkout_dir, git_callbacks=git_callbacks, branch=branch)
git_url,
checkout_dir,
git_callbacks=git_callbacks,
branch=branch,
force_update=force_update)
except Exception as e:
# Failed to re-use existing checkout. Delete it and start over.
err_str = str(e)
Expand All @@ -265,7 +286,8 @@ def ensure_updated_checkout(git_url,
checkout_dir,
git_callbacks=git_callbacks,
branch=branch,
blobless=blobless)
blobless=blobless,
force_update=force_update)
logging.info('Repo now at: %s', repo.head.peel().message)
return repo

Expand Down
Loading