haofanwang · mst-rajatmishra · Jan 12, 2025
diff --git a/swapper.py b/swapper.py
@@ -1,264 +1,180 @@
-"""
-This project is developed by Haofan Wang to support face swap in single frame. Multi-frame will be supported soon!
-
-It is highly built on the top of insightface, sd-webui-roop and CodeFormer.
-"""
-
 import os
 import cv2
 import copy
 import argparse
+import logging
 import insightface
 import onnxruntime
 import numpy as np
 from PIL import Image
 from typing import List, Union, Dict, Set, Tuple
+from concurrent.futures import ThreadPoolExecutor
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# Global variables for caching models
+face_analyser = None
+face_swapper = None
 
 
 def getFaceSwapModel(model_path: str):
+    """Load the face swap model."""
     model = insightface.model_zoo.get_model(model_path)
     return model
 
 
-def getFaceAnalyser(model_path: str, providers,
-                    det_size=(320, 320)):
-    face_analyser = insightface.app.FaceAnalysis(name="buffalo_l", root="./checkpoints", providers=providers)
-    face_analyser.prepare(ctx_id=0, det_size=det_size)
+def getFaceAnalyser(model_path: str, providers, det_size=(320, 320)):
+    """Load the face analyser model."""
+    global face_analyser
+    if face_analyser is None:
+        face_analyser = insightface.app.FaceAnalysis(name="buffalo_l", root="./checkpoints", providers=providers)
+        face_analyser.prepare(ctx_id=0, det_size=det_size)
     return face_analyser
 
 
-def get_one_face(face_analyser,
-                 frame:np.ndarray):
-    face = face_analyser.get(frame)
+def get_one_face(face_analyser, frame: np.ndarray):
+    """Get a single face from the frame."""
+    faces = face_analyser.get(frame)
     try:
-        return min(face, key=lambda x: x.bbox[0])
+        return min(faces, key=lambda x: x.bbox[0])
     except ValueError:
         return None
 
-
-def get_many_faces(face_analyser,
-                   frame:np.ndarray):
-    """
-    get faces from left to right by order
-    """
+
+def get_many_faces(face_analyser, frame: np.ndarray):
+    """Get all faces in the frame, sorted from left to right."""
     try:
-        face = face_analyser.get(frame)
-        return sorted(face, key=lambda x: x.bbox[0])
+        faces = face_analyser.get(frame)
+        return sorted(faces, key=lambda x: x.bbox[0])
     except IndexError:
         return None
 
 
-def swap_face(face_swapper,
-              source_faces,
-              target_faces,
-              source_index,
-              target_index,
-              temp_frame):
-    """
-    paste source_face on target image
-    """
+def swap_face(face_swapper, source_faces, target_faces, source_index, target_index, temp_frame):
+    """Swap the face from source to target image."""
     source_face = source_faces[source_index]
     target_face = target_faces[target_index]
-
     return face_swapper.get(temp_frame, target_face, source_face, paste_back=True)
-
-
-def process(source_img: Union[Image.Image, List],
-            target_img: Image.Image,
-            source_indexes: str,
-            target_indexes: str,
-            model: str):
-    # load machine default available providers
-    providers = onnxruntime.get_available_providers()
 
-    # load face_analyser
+
+def parse_indexes(index_str, num_faces):
+    """Parse face indexes from a string with optional ranges (e.g., '0,1,2-4')."""
+    indexes = []
+    for part in index_str.split(','):
+        if '-' in part:
+            start, end = map(int, part.split('-'))
+            indexes.extend(range(start, end + 1))
+        else:
+            indexes.append(int(part))
+    return [i for i in indexes if i < num_faces]
+
+
+def process(source_img: Union[Image.Image, List], target_img: Image.Image, source_indexes: str, target_indexes: str, model: str):
+    """Main function to perform face swapping."""
+    # Load providers and models
+    providers = onnxruntime.get_available_providers()
     face_analyser = getFaceAnalyser(model, providers)
-    
-    # load face_swapper
+
+    # Load face swapper model
     model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model)
     face_swapper = getFaceSwapModel(model_path)
-    
-    # read target image
+
+    # Convert target image to numpy array
     target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
-
-    # detect faces that will be replaced in the target image
     target_faces = get_many_faces(face_analyser, target_img)
-    num_target_faces = len(target_faces)
-    num_source_images = len(source_img)
-
-    if target_faces is not None:
-        temp_frame = copy.deepcopy(target_img)
-        if isinstance(source_img, list) and num_source_images == num_target_faces:
-            print("Replacing faces in target image from the left to the right by order")
-            for i in range(num_target_faces):
-                source_faces = get_many_faces(face_analyser, cv2.cvtColor(np.array(source_img[i]), cv2.COLOR_RGB2BGR))
-                source_index = i
-                target_index = i
-
-                if source_faces is None:
-                    raise Exception("No source faces found!")
-
-                temp_frame = swap_face(
-                    face_swapper,
-                    source_faces,
-                    target_faces,
-                    source_index,
-                    target_index,
-                    temp_frame
-                )
-        elif num_source_images == 1:
-            # detect source faces that will be replaced into the target image
-            source_faces = get_many_faces(face_analyser, cv2.cvtColor(np.array(source_img[0]), cv2.COLOR_RGB2BGR))
-            num_source_faces = len(source_faces)
-            print(f"Source faces: {num_source_faces}")
-            print(f"Target faces: {num_target_faces}")
 
+    # Check if faces are found in the target image
+    if not target_faces:
+        logging.error("No target faces found!")
+        return None
+
+    temp_frame = copy.deepcopy(target_img)
+
+    # Process the source image(s)
+    if isinstance(source_img, list):
+        num_source_images = len(source_img)
+        num_target_faces = len(target_faces)
+
+        if num_source_images != num_target_faces:
+            logging.warning("The number of source images does not match the number of target faces. Proceeding with face-by-face swap.")
+
+        for i in range(num_target_faces):
+            source_faces = get_many_faces(face_analyser, cv2.cvtColor(np.array(source_img[i]), cv2.COLOR_RGB2BGR))
             if source_faces is None:
-                raise Exception("No source faces found!")
-
-            if target_indexes == "-1":
-                if num_source_faces == 1:
-                    print("Replacing all faces in target image with the same face from the source image")
-                    num_iterations = num_target_faces
-                elif num_source_faces < num_target_faces:
-                    print("There are less faces in the source image than the target image, replacing as many as we can")
-                    num_iterations = num_source_faces
-                elif num_target_faces < num_source_faces:
-                    print("There are less faces in the target image than the source image, replacing as many as we can")
-                    num_iterations = num_target_faces
-                else:
-                    print("Replacing all faces in the target image with the faces from the source image")
-                    num_iterations = num_target_faces
-
-                for i in range(num_iterations):
-                    source_index = 0 if num_source_faces == 1 else i
-                    target_index = i
-
-                    temp_frame = swap_face(
-                        face_swapper,
-                        source_faces,
-                        target_faces,
-                        source_index,
-                        target_index,
-                        temp_frame
-                    )
-            else:
-                print("Replacing specific face(s) in the target image with specific face(s) from the source image")
-
-                if source_indexes == "-1":
-                    source_indexes = ','.join(map(lambda x: str(x), range(num_source_faces)))
-
-                if target_indexes == "-1":
-                    target_indexes = ','.join(map(lambda x: str(x), range(num_target_faces)))
-
-                source_indexes = source_indexes.split(',')
-                target_indexes = target_indexes.split(',')
-                num_source_faces_to_swap = len(source_indexes)
-                num_target_faces_to_swap = len(target_indexes)
-
-                if num_source_faces_to_swap > num_source_faces:
-                    raise Exception("Number of source indexes is greater than the number of faces in the source image")
-
-                if num_target_faces_to_swap > num_target_faces:
-                    raise Exception("Number of target indexes is greater than the number of faces in the target image")
-
-                if num_source_faces_to_swap > num_target_faces_to_swap:
-                    num_iterations = num_source_faces_to_swap
-                else:
-                    num_iterations = num_target_faces_to_swap
-
-                if num_source_faces_to_swap == num_target_faces_to_swap:
-                    for index in range(num_iterations):
-                        source_index = int(source_indexes[index])
-                        target_index = int(target_indexes[index])
-
-                        if source_index > num_source_faces-1:
-                            raise ValueError(f"Source index {source_index} is higher than the number of faces in the source image")
-
-                        if target_index > num_target_faces-1:
-                            raise ValueError(f"Target index {target_index} is higher than the number of faces in the target image")
-
-                        temp_frame = swap_face(
-                            face_swapper,
-                            source_faces,
-                            target_faces,
-                            source_index,
-                            target_index,
-                            temp_frame
-                        )
-        else:
-            raise Exception("Unsupported face configuration")
-        result = temp_frame
+                raise ValueError(f"No faces found in source image {i}.")
+
+            temp_frame = swap_face(face_swapper, source_faces, target_faces, i, i, temp_frame)
     else:
-        print("No target faces found!")
-
-    result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
-    return result_image
+        num_source_faces = len(get_many_faces(face_analyser, cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)))
+        target_indexes = parse_indexes(target_indexes, len(target_faces))
+        source_indexes = parse_indexes(source_indexes, num_source_faces)
 
+        for source_index, target_index in zip(source_indexes, target_indexes):
+            temp_frame = swap_face(face_swapper, get_many_faces(face_analyser, cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)),
+                                   target_faces, source_index, target_index, temp_frame)
 
-def parse_args():
-    parser = argparse.ArgumentParser(description="Face swap.")
-    parser.add_argument("--source_img", type=str, required=True, help="The path of source image, it can be multiple images, dir;dir2;dir3.")
-    parser.add_argument("--target_img", type=str, required=True, help="The path of target image.")
-    parser.add_argument("--output_img", type=str, required=False, default="result.png", help="The path and filename of output image.")
-    parser.add_argument("--source_indexes", type=str, required=False, default="-1", help="Comma separated list of the face indexes to use (left to right) in the source image, starting at 0 (-1 uses all faces in the source image")
-    parser.add_argument("--target_indexes", type=str, required=False, default="-1", help="Comma separated list of the face indexes to swap (left to right) in the target image, starting at 0 (-1 swaps all faces in the target image")
-    parser.add_argument("--face_restore", action="store_true", help="The flag for face restoration.")
-    parser.add_argument("--background_enhance", action="store_true", help="The flag for background enhancement.")
-    parser.add_argument("--face_upsample", action="store_true", help="The flag for face upsample.")
-    parser.add_argument("--upscale", type=int, default=1, help="The upscale value, up to 4.")
-    parser.add_argument("--codeformer_fidelity", type=float, default=0.5, help="The codeformer fidelity.")
-    args = parser.parse_args()
-    return args
+    # Convert the result back to an image
+    result_image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
+    return result_image
 
 
-if __name__ == "__main__":
-
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Face swap with optional restoration and enhancement.")
+    parser.add_argument("--source_img", type=str, required=True, help="Path(s) to source image(s), separated by semicolons.")
+    parser.add_argument("--target_img", type=str, required=True, help="Path to the target image.")
+    parser.add_argument("--output_img", type=str, default="result.png", help="Path to save the output image.")
+    parser.add_argument("--source_indexes", type=str, default="-1", help="Comma-separated list of source face indexes to use.")
+    parser.add_argument("--target_indexes", type=str, default="-1", help="Comma-separated list of target face indexes to swap.")
+    parser.add_argument("--face_restore", action="store_true", help="Enable face restoration after swapping.")
+    parser.add_argument("--background_enhance", action="store_true", help="Enable background enhancement.")
+    parser.add_argument("--face_upsample", action="store_true", help="Enable face upsampling.")
+    parser.add_argument("--upscale", type=int, default=1, help="Upscale factor, up to 4.")
+    parser.add_argument("--codeformer_fidelity", type=float, default=0.5, help="CodeFormer fidelity parameter.")
+    return parser.parse_args()
+
+
+def main():
     args = parse_args()
-
+
+    # Split the source image paths
     source_img_paths = args.source_img.split(';')
-    print("Source image paths:", source_img_paths)
+    logging.info(f"Source image paths: {source_img_paths}")
     target_img_path = args.target_img
-
+
+    # Open images
     source_img = [Image.open(img_path) for img_path in source_img_paths]
     target_img = Image.open(target_img_path)
 
-    # download from https://huggingface.co/deepinsight/inswapper/tree/main
+    # Load the pre-trained face swap model
     model = "./checkpoints/inswapper_128.onnx"
     result_image = process(source_img, target_img, args.source_indexes, args.target_indexes, model)
-
+
+    if result_image is None:
+        logging.error("Face swap failed.")
+        return
+
+    # Optional face restoration
     if args.face_restore:
         from restoration import *
-
-        # make sure the ckpts downloaded successfully
-        check_ckpts()
-
-        # https://huggingface.co/spaces/sczhou/CodeFormer
+        check_ckpts()  # Ensure the restoration models are available
         upsampler = set_realesrgan()
         device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
-
-        codeformer_net = ARCH_REGISTRY.get("CodeFormer")(dim_embd=512,
-                                                         codebook_size=1024,
-                                                         n_head=8,
-                                                         n_layers=9,
-                                                         connect_list=["32", "64", "128", "256"],
-                                                        ).to(device)
-        ckpt_path = "CodeFormer/CodeFormer/weights/CodeFormer/codeformer.pth"
-        checkpoint = torch.load(ckpt_path)["params_ema"]
+        codeformer_net = ARCH_REGISTRY.get("CodeFormer")(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9).to(device)
+        checkpoint = torch.load("CodeFormer/CodeFormer/weights/CodeFormer/codeformer.pth")["params_ema"]
         codeformer_net.load_state_dict(checkpoint)
         codeformer_net.eval()
-
-        result_image = cv2.cvtColor(np.array(result_image), cv2.COLOR_RGB2BGR)
-        result_image = face_restoration(result_image, 
-                                        args.background_enhance, 
-                                        args.face_upsample, 
-                                        args.upscale, 
-                                        args.codeformer_fidelity,
-                                        upsampler,
-                                        codeformer_net,
-                                        device)
+
+        result_image = np.array(result_image)
+        result_image = face_restoration(result_image, args.background_enhance, args.face_upsample, args.upscale, args.codeformer_fidelity,
+                                       upsampler, codeformer_net, device)
         result_image = Image.fromarray(result_image)
-    
-    # save result
+
+    # Save the result
     result_image.save(args.output_img)
-    print(f'Result saved successfully: {args.output_img}')
+    logging.info(f"Result saved to: {args.output_img}")
+
+
+if __name__ == "__main__":
+    main()