Skip to content

ryouchinsa/sam3-cpp-macos

Repository files navigation

Segment Anything Model 3 CPP Wrapper for macOS and Ubuntu GPU

This code is to run Segment Anything Model 3 ONNX models in c++ code and implemented on the macOS app RectLabel.

We recommend working through this blog post side-by-side with the Google Colab notebook.

sam3_polygon.mp4

Install CUDA, cuDNN, PyTorch, and ONNX Runtime.

Install Segment Anything Model 3 CPP Wrapper.

git clone https://github.com/ryouchinsa/sam3-cpp-macos.git
pip install git+https://github.com/huggingface/transformers@2a61590a479d3b1f77059f75caee7cc22760019d

Install tokenizers-cpp.

For macOS, download tokenizers-cpp from Hugging Face.

For Ubuntu GPU.

git clone --recursive https://github.com/mlc-ai/tokenizers-cpp.git

cp /content/sam3-cpp-macos/tokenizers-cpp/CMakeLists.txt .
cp /content/sam3-cpp-macos/tokenizers-cpp/msgpack/CMakeLists.txt msgpack
cp /content/sam3-cpp-macos/tokenizers-cpp/sentencepiece/CMakeLists.txt sentencepiece

apt update
apt install -y curl gcc make
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

import os
os.environ['CARGO_HOME'] = '/root/.cargo'
os.environ['PATH'] = f"{os.environ['CARGO_HOME']}/bin:{os.environ['PATH']}"

rustc --version
cargo --version

cd tokenizers-cpp/example
./build_and_run.sh
cd ..

mkdir lib
cp ./example/build/tokenizers/sentencepiece/src/libsentencepiece.a lib/
cp ./example/build/tokenizers/libtokenizers_c.a lib/
cp ./example/build/tokenizers/libtokenizers_cpp.a lib/
cd ..

Download SAM 3 model from Hugging Face.

hf auth login
hf download facebook/sam3 model.safetensors tokenizer.json

Export ONNX models. This script is originated from sam3-image.

Edit --model-path according to your downloaded huggingface path.

cd sam3-cpp-macos

# macOS
python export_v2.py --all --model-path /Users/ryo/Downloads/sam3-model --output-dir sam3 --device cpu --image-height 1008 --image-width 1008 --quantize

# Ubuntu GPU
python export_v2.py --all --model-path /root/.cache/huggingface/hub/models--facebook--sam3/snapshots/3c879f39826c281e95690f02c7821c4de09afae7 --output-dir sam3 --image-height 1008 --image-width 1008

If you skip exporting, download exported SAM 3 ONNX models from Hugging Face.

Build and run.

# macOS
cmake -S . -B build -DONNXRUNTIME_ROOT_DIR=/Users/ryo/Downloads/onnxruntime-osx-universal2-1.23.2 -DTOKENIZERS_ROOT_DIR=/Users/ryo/Downloads/tokenizers-cpp

# Ubuntu GPU
cmake -S . -B build -DONNXRUNTIME_ROOT_DIR=/content/onnxruntime-linux-x64-gpu-1.23.2 -DTOKENIZERS_ROOT_DIR=/content/tokenizers-cpp

cmake --build build

# macOS
./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -text="zebra" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -boxes="pos:124,113,183,329" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -boxes="pos:124,113,183,329;neg:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -text="zebra" -boxes="pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -text="zebra,water" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -boxes="pos:0,0,364,187-pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -text="tree,zebra" -boxes="pos:0,0,364,187-pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cpu" -text="zebra,water,tree" -threshold=0.25

# Ubuntu GPU
./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -text="zebra" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -boxes="pos:124,113,183,329" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -boxes="pos:124,113,183,329;neg:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -text="zebra" -boxes="pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -text="zebra,water" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -boxes="pos:0,0,364,187-pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -text="tree,zebra" -boxes="pos:0,0,364,187-pos:379,454,329,297" -threshold=0.5

./build/sam3_cpp_test -vision_encoder="sam3/vision-encoder.onnx" -text_encoder="sam3/text-encoder.onnx" -decoder="sam3/decoder.onnx" -tokenizer="sam3/tokenizer.json" -image="david-tomaseti-Vw2HZQ1FGjU-unsplash.jpg" -device="cuda:0" -text="zebra,water,tree" -threshold=0.25

About

Segment Anything Model 3 CPP Wrapper for macOS and Ubuntu GPU

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors