RuojinCai · awarebayes · Sep 20, 2023 · Sep 20, 2023 · Sep 20, 2023 · Sep 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
@@ -136,6 +136,87 @@ python train.py doppelgangers/configs/training_configs/doppelgangers_classifier_
 python train_multi_gpu.py doppelgangers/configs/training_configs/doppelgangers_classifier_flip.yaml
 ```
 
+## Usage with [HLOC](https://github.com/cvg/Hierarchical-Localization)
+
+**You will need to have some additional data in your matches.h5**
+
+**I have created [this fork of HLOC](https://github.com/awarebayes/Hierarchical-Localization/tree/doppelgangers-integration-1), it hopefully will later be merged**
+
+```python
+from doppelgangers.utils import inference_on_hloc as doppelganger_removal,
+                                overwrite_hloc as doppelganger_overwrite
+sfm_pairs = outputs / "pairs-sfm.txt"
+sfm_pairs_filtered = outputs / "pairs-sfm-filtered.txt"
+features = outputs / "features.h5"
+vlad_features = outputs / "vlad.h5"
+matches = outputs / "matches.h5"
+
+ref_dir = outputs / "ref"
+dense_conf_json = outputs / "dense_conf.json"
+
+feature_conf = extract_features.confs["superpoint_aachen"]
+matcher_conf = match_features.confs["superpoint+lightglue"]
+
+# Global Features
+extract_features.main(
+    extract_features.confs["netvlad"],
+    images,
+    outputs,
+    image_list=references,
+    feature_path=vlad_features,
+)
+
+# Local sparse features
+extract_features.main(
+    feature_conf, images,
+    image_list=references,
+    feature_path=features
+)
+
+pairs_from_retrieval.main(
+    descriptors=vlad_features,
+    output=sfm_pairs,
+    num_matched=16,
+    query_list=references,
+)
+
+# Matching features 
+match_features.main(
+  matcher_conf, 
+  sfm_pairs,
+  features=features,
+  matches=matches
+)
+
+# Doppelganger usage
+doppelganger_removal.main(
+    weights_path='...',
+    features_file=features,
+    matches_file=matches,
+    sfm_filtered=sfm_pairs_filtered,
+    image_dir=images,
+    pair_path=sfm_pairs,
+    batch_size=16
+)
+
+doppelganger_overwrite.main(
+    sfm_filtered=sfm_pairs_filtered,
+    pair_path=sfm_pairs,
+    matches_file=matches
+)
+
+# SFM
+sfm = PixSfM(conf="low_memory")
+sfm.reconstruction(
+    ref_dir,
+    images,
+    sfm_pairs,
+    features,
+    matches,
+    image_list=references,
+)
+```
+
 ## Citation
 ```
 @inproceedings{cai2023doppelgangers,

diff --git a/doppelgangers/datasets/hloc_dataset.py b/doppelgangers/datasets/hloc_dataset.py
@@ -0,0 +1,102 @@
+import os.path as osp
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+import cv2
+from ..utils.dataset import read_loftr_matches
+
+class HlocDoppelgangersDataset(Dataset):
+    def __init__(self,
+                 image_dir,
+                 matches_file,
+                 features_file,
+                 pair_path,
+                 img_size,
+                 **kwargs):
+        """
+        Doppelgangers test dataset: loading images and loftr matches for Doppelgangers model.
+
+        Args:
+            image_dir (str): root directory for images.
+            loftr_match_dir (str): root directory for loftr matches.
+            pair_path (str): pair_list.npy path. This contains image pair information.
+            img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
+                                        This is useful during training with batches and testing with memory intensive algorithms.
+        """
+        super().__init__()
+        self.image_dir = image_dir    
+        self.matches_f = matches_file
+        self.features_f = features_file
+        self.pairs_info = []
+        for i1 in matches_file.keys():
+            for i2 in matches_file[i1].keys():
+                self.pairs_info.append((i1, i2))
+
+        self.img_size = img_size
+
+
+    def __len__(self):
+        return len(self.pairs_info)
+
+    def __getitem__(self, idx):
+        image_1_name, image_2_name = self.pairs_info[idx]
+
+        features1 = self.features_f[image_1_name]
+        features2 = self.features_f[image_2_name]
+        keypoints1 = np.array(features1['keypoints'])
+        keypoints2 = np.array(features2['keypoints'])
+        matches_data = self.matches_f[image_1_name][image_2_name]
+        matches = np.array(matches_data['matches'])
+        conf = np.array(matches_data['scores'])
+        keypoints1 = keypoints1[matches[..., 0]].astype(np.int32)
+        keypoints2 = keypoints2[matches[..., 1]].astype(np.int32)
+
+        if np.sum(conf>0.8) == 0:
+            matches = None
+        else:
+            F, mask = cv2.findFundamentalMat(keypoints1[conf>0.8],keypoints2[conf>0.8],cv2.FM_RANSAC, 3, 0.99)
+            if mask is None or F is None:
+                matches = None
+            else:
+                matches = np.array(np.ones((keypoints1.shape[0], 2)) * np.arange(keypoints1.shape[0]).reshape(-1,1)).astype(int)[conf>0.8][mask.ravel()==1]
+
+        img_name1 = osp.join(self.image_dir, image_1_name)
+        img_name2 = osp.join(self.image_dir, image_2_name)
+
+        image = read_loftr_matches(img_name1, img_name2, self.img_size, 8, True, keypoints1, keypoints2, matches, warp=True, conf=conf)
+
+        return {
+            'image': image,
+            'image1_name': image_1_name,
+            'image2_name': image_2_name,
+        }
+
+def get_datasets(cfg):
+    te_dataset = HlocDoppelgangersDataset(
+                    cfg.image_dir,
+                    cfg.matches_file,
+                    cfg.features_file,
+                    cfg.test.pair_path,
+                    img_size=getattr(cfg.test, "img_size", 640))
+
+    return te_dataset
+
+
+def init_np_seed(worker_id):
+    seed = torch.initial_seed()
+    np.random.seed(seed % 4294967296)
+
+
+def get_data_loaders(cfg):
+    te_dataset = get_datasets(cfg)    
+    test_loader = torch.utils.data.DataLoader(
+        dataset=te_dataset, batch_size=cfg.test.batch_size,
+        shuffle=False, num_workers=cfg.num_workers, drop_last=False,
+        worker_init_fn=init_np_seed)
+
+    loaders = {
+        "test_loader": test_loader,
+    }
+    return loaders
+