Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
outputs/
datasets/*
!datasets/sacre_coeur/
*.safetensors
5 changes: 4 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
url = https://github.com/mihaidusmanu/d2-net.git
[submodule "third_party/SuperGluePretrainedNetwork"]
path = third_party/SuperGluePretrainedNetwork
url = https://github.com/magicleap/SuperGluePretrainedNetwork.git
url = https://github.com/VuThanhDat14122004/SuperGluePretrainedNetwork.git
[submodule "third_party/deep-image-retrieval"]
path = third_party/deep-image-retrieval
url = https://github.com/naver/deep-image-retrieval.git
[submodule "third_party/r2d2"]
path = third_party/r2d2
url = https://github.com/naver/r2d2.git
[submodule "third_party/RoMa"]
path = third_party/RoMa
url = https://github.com/VuThanhDat14122004/RoMa.git
277 changes: 26 additions & 251 deletions README.md

Large diffs are not rendered by default.

Binary file added datasets/sacre_coeur/mapping/query/night.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33,115 changes: 32,969 additions & 146 deletions demo.ipynb

Large diffs are not rendered by default.

62 changes: 55 additions & 7 deletions hloc/extract_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,17 @@
'output': 'global-feats-cosplace',
'model': {'name': 'cosplace'},
'preprocessing': {'resize_max': 1024},
}
},
"megaloc": {
"output": "global-feats-megaloc",
"model": {"name": "megaloc"},
"preprocessing": {"resize_max": 1024},
},
"salad": {
"output": "global-feats-salad",
"model": {"name": "salad"},
"preprocessing": {"resize_max": 640},
},
}


Expand Down Expand Up @@ -200,6 +210,7 @@ def __init__(self, root, conf, paths=None):

def __getitem__(self, idx):
name = self.names[idx]
image_path = str(self.root / name)
image = read_image(self.root / name, self.conf.grayscale)
image = image.astype(np.float32)
size = image.shape[:2][::-1]
Expand All @@ -208,8 +219,14 @@ def __getitem__(self, idx):
or max(size) > self.conf.resize_max):
scale = self.conf.resize_max / max(size)
size_new = tuple(int(round(x*scale)) for x in size)
try:
if self.conf.size_roma:
size_new = (int(round(size[0]*scale/self.conf.patch_size)*self.conf.patch_size),
int(round(size[1]*scale/self.conf.patch_size)*self.conf.patch_size))
except:
pass
image = resize_image(image, size_new, self.conf.interpolation)

# print(f"New size: {size_new}")
if self.conf.grayscale:
image = image[None]
else:
Expand All @@ -219,6 +236,7 @@ def __getitem__(self, idx):
data = {
'image': image,
'original_size': np.array(size),
'image_path': image_path
}
return data

Expand All @@ -233,44 +251,61 @@ def main(conf: Dict,
as_half: bool = True,
image_list: Optional[Union[Path, List[str]]] = None,
feature_path: Optional[Path] = None,
overwrite: bool = False) -> Path:
feature_raw_path: Optional[Path] = None,
overwrite: bool = False,
dict_keypoints_index: Optional[Path] = None) -> Path:
# only use for benchmark

logger.info('Extracting local features with configuration:'
f'\n{pprint.pformat(conf)}')

dataset = ImageDataset(image_dir, conf['preprocessing'], image_list)
if feature_path is None:
feature_path = Path(export_dir, conf['output']+'.h5')
feature_path.parent.mkdir(exist_ok=True, parents=True)
print(f"\n---------------------feature path : {feature_path}----------------------\n")
skip_names = set(list_h5_names(feature_path)
if feature_path.exists() and not overwrite else ())
print(f"\n------------len dataset bef: {len(dataset.names)}--------------\n")
dataset.names = [n for n in dataset.names if n not in skip_names]
# dataset.names = [n for n in dataset.names] #test
print(f"\n------------len dataset aft: {len(dataset.names)}--------------\n")
if len(dataset.names) == 0:
logger.info('Skipping the extraction.')
return feature_path

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
Model = dynamic_load(extractors, conf['model']['name'])
model = Model(conf['model']).eval().to(device)
# print name of device being used

loader = torch.utils.data.DataLoader(
dataset, num_workers=1, shuffle=False, pin_memory=True)
dataset, num_workers=1, shuffle=False, pin_memory=True) # default batch_size=1
# loader = torch.utils.data.DataLoader(
# dataset, num_workers=1, shuffle=False, pin_memory=False) # default batch_size=1
for idx, data in enumerate(tqdm(loader)):
name = dataset.names[idx]
pred = model({'image': data['image'].to(device, non_blocking=True)})
pred = {k: v[0].cpu().numpy() for k, v in pred.items()}

pred['image_size'] = original_size = data['original_size'][0].numpy()
pred['image'] = data['image']
pred['image_path'] = data['image_path']
list_keypoints = []
if 'keypoints' in pred:
size = np.array(data['image'].shape[-2:][::-1])
scales = (original_size / size).astype(np.float32)
pred['keypoints'] = (pred['keypoints'] + .5) * scales[None] - .5
if 'scales' in pred:
pred['scales'] *= scales.mean()
# add keypoint uncertainties scaled to the original resolution
uncertainty = getattr(model, 'detection_noise', 1) * scales.mean()
uncertainty = getattr(model, 'detection_noise', 1) * scales.mean() ## detection_noise = 2 in hloc extractors/superpoint.py

if as_half:
for k in pred:
if k == 'image_path':
continue
dt = pred[k].dtype
if (dt == np.float32) and (dt != np.float16):
pred[k] = pred[k].astype(np.float16)
Expand All @@ -284,16 +319,29 @@ def main(conf: Dict,
grp.create_dataset(k, data=v)
if 'keypoints' in pred:
grp['keypoints'].attrs['uncertainty'] = uncertainty
list_keypoints = [(int(xy[0]), int(xy[1])) for xy in pred['keypoints']]
except OSError as error:
if 'No space left on device' in error.args[0]:
logger.error(
'Out of disk space: storing features on disk can take '
'significant space, did you enable the as_half flag?')
del grp, fd[name]
raise error

# create dict_keypoints_index, file['image_name'] = {(pointx1, pointy1):index1, (pointx2, pointy2):index2, ...}
if dict_keypoints_index is not None and 'keypoints' in pred:
with h5py.File(str(dict_keypoints_index), 'a', libver='latest') as fd_index:
if name in fd_index:
del fd_index[name]
fd_index.create_group(name)
# keep integer values for keypoints
for idx_kp, kp in enumerate(list_keypoints):
fd_index[name].create_dataset(str(kp), data=idx_kp)
del pred

# make copy feature.h5 named feature_raw.h5
if feature_raw_path:
import shutil
shutil.copyfile(feature_path, feature_raw_path)
logger.info(f'Copied {feature_path} to {feature_raw_path} for roma preprocessing.')
logger.info('Finished exporting features.')
return feature_path

Expand Down
62 changes: 62 additions & 0 deletions hloc/extractors/aliked.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import sys
from pathlib import Path
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import to_pil_image

from ..utils.base_model import BaseModel

sys.path.append(str(Path(__file__).parent / '../../third_party'))
from ALIKED.nets.aliked import ALIKED as Aliked

def split_images(image):
_, _, H, W = image.shape
img1 = to_pil_image(image[:, :, :H//2, :W//2].squeeze(0))
img2 = to_pil_image(image[:, :, :H//2, W//2:].squeeze(0))
img3 = to_pil_image(image[:, :, H//2:, :W//2].squeeze(0))
img4 = to_pil_image(image[:, :, H//2:, W//2:].squeeze(0))
img5 = to_pil_image(F.interpolate(
image,
size=(H//2, W//2),
mode="bilinear",
align_corners=False
).squeeze(0))
return [img1, img2, img3, img4, img5]

class ALIKED(BaseModel):
def _init(self, conf):
self.net = Aliked(
model_name=conf.get('model_name', 'aliked-n16'),
device=conf.get('device', 'cuda'),
top_k=conf.get('top_k', int(-1)),
scores_th=conf.get('scores_th', float(0.2)),
n_limit=conf.get('n_limit', int(5000)),
load_pretrained=conf.get('pretrained', True)
)

def _forward(self, data):
images = data["image"]
img1, img2, img3, img4, img5 = split_images(images)
result1 = self.net.run(img1)
result2 = self.net.run(img2)
result3 = self.net.run(img3)
result4 = self.net.run(img4)
results = self.net.run(img5)

results['keypoints'][:,0] *= 2
results['keypoints'][:,1] *= 2
results.update({
"keypoints_1": result1["keypoints"],
"scores_1": result1["scores"],
"descriptors_1": result1["descriptors"],
"keypoints_2": result2["keypoints"],
"scores_2": result2["scores"],
"descriptors_2": result2["descriptors"],
"keypoints_3": result3["keypoints"],
"scores_3": result3["scores"],
"descriptors_3": result3["descriptors"],
"keypoints_4": result4["keypoints"],
"scores_4": result4["scores"],
"descriptors_4": result4["descriptors"]
})
return results
32 changes: 32 additions & 0 deletions hloc/extractors/megaloc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Code to use MegaLoc as a global features extractor.

MegaLoc paper: https://arxiv.org/abs/2502.17237
"""

import torch
import torchvision.transforms as tvf

from ..utils.base_model import BaseModel


# Load model



class MegaPlaces(BaseModel):
required_inputs = ["image"]

def _init(self, conf):
self.net = torch.hub.load("/lamar/lamar/MegaLoc", "get_trained_model", source='local').eval()
# self.net = get_trained_model().eval()
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
self.norm_rgb = tvf.Normalize(mean=mean, std=std)

def _forward(self, data):
image = self.norm_rgb(data["image"])
desc = self.net(image)
return {
"global_descriptor": desc,
}
45 changes: 45 additions & 0 deletions hloc/extractors/salad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Global image feature extractor Salad:
Optimal Transport Aggregation for Visual Place Recognition
Sergio Izquierdo, Javier Civera; CVPR 2024.
https://github.com/serizba/salad
"""
import math

import torch
import torchvision.transforms as tvf

from ..utils.base_model import BaseModel


class Salad(BaseModel):
default_conf = {
"backbone": "dinov2_vitb14",
"pretrained": True,
"patch_size": 14,
}
required_inputs = ["image"]

def _init(self, conf):
self.net = torch.hub.load(
"sarlinpe/salad",
"dinov2_salad",
backbone=conf["backbone"],
pretrained=conf["pretrained"],
).eval()

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
self.norm_rgb = tvf.Normalize(mean=mean, std=std)

def _forward(self, data):
image = self.norm_rgb(data["image"])
_, _, h, w = image.shape
patch_size = self.conf["patch_size"]
if h % patch_size or w % patch_size:
h_inp = math.ceil(h / patch_size) * patch_size
w_inp = math.ceil(w / patch_size) * patch_size
image = torch.nn.functional.pad(image, [0, w_inp - w, 0, h_inp - h])
desc = self.net(image)
return {
"global_descriptor": desc,
}
1 change: 1 addition & 0 deletions hloc/localize_sfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def pose_from_cluster(
**kwargs):

kpq = get_keypoints(features_path, qname)
kpq = kpq.astype(np.float32)
kpq += 0.5 # COLMAP coordinates

kp_idx_to_3D = defaultdict(list)
Expand Down
Loading