feat(embeddings): support passthrough remote model ids#156
Open
sanikolaev wants to merge 1 commit intomasterfrom
Open
feat(embeddings): support passthrough remote model ids#156sanikolaev wants to merge 1 commit intomasterfrom
sanikolaev wants to merge 1 commit intomasterfrom
Conversation
3d92e0e to
ecba6b3
Compare
Windows test results 5 files 5 suites 21m 18s ⏱️ For more details on these failures, see this check. Results for commit ecba6b3. ♻️ This comment has been updated with latest results. |
clt❌ CLT tests in Failed tests:🔧 Edit failed tests in UI: test/clt-tests/mcl/auto-embeddings-json-api.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd $SEARCHD_FLAGS > /dev/null; if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore/searchd.log;fi
––– output –––
OK
––– input –––
apt-get install jq -y > /dev/null; echo $?
––– output –––
- debconf: delaying package configuration, since apt-utils is not installed
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/libjq1_1.7.1-3ubuntu0.24.04.1_amd64.deb Connection failed [IP: 185.125.190.82 80]
- 0
+ E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
+ 100
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_columnar (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_columnar" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 19: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_columnar WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_columnar","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_columnar","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 26: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_columnar","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 30: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_columnar WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_columnar (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_columnar; OPTIMIZE TABLE test_json_columnar OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_columnar\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 46: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE no_auto_embed (title TEXT, vec FLOAT_VECTOR KNN_TYPE='hnsw' KNN_DIMS='384' HNSW_SIMILARITY='l2') engine='columnar'"
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/insert -d "{\"index\":\"no_auto_embed\",\"id\":1,\"doc\":{\"title\":\"test\",\"vec\":[$VECTOR]}}" | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 51: jq: command not found
––– input –––
QUERY_VEC=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"no_auto_embed\",\"knn\":{\"field\":\"vec\",\"query_vector\":[$QUERY_VEC],\"k\":1}}" | jq -r '.hits.total // "0"'
––– output –––
- 1
+ bash: line 54: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_rowwise (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='rowwise'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_rowwise" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 66: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_rowwise WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_rowwise","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_rowwise","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 73: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 77: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_rowwise WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_rowwise (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_rowwise; OPTIMIZE TABLE test_json_rowwise OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_rowwise\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 93: jq: command not found |
1. Allow explicit provider-prefixed passthrough model ids for remote endpoints - keep the existing slash-prefixed forms (openai/..., voyage/..., jina/...) working as before - add explicit colon-prefixed forms (openai:..., voyage:..., jina:...) - when the colon form is used, pass the model id through after stripping only the provider prefix - this allows OpenAI-compatible custom endpoints to receive full upstream model ids unchanged, for example: - openai:openai/text-embedding-ada-002 - openai:jinaai/jina-embeddings-v3 - preserve strict built-in validation for default provider endpoints while allowing passthrough mode for custom API_URL-based setups 2. Allow CMake to pass optional cargo features to the embeddings crate - add EMBEDDINGS_CARGO_FEATURE_ARGS in cmake/build_embeddings.cmake - if EMBEDDINGS_CARGO_FEATURES is set, convert it to a valid cargo CLI fragment: --features <value> - this makes it possible to configure builds such as download-ort from the CMake side without hard-coding the flag in the build script Additional remote-model adjustment: - cache inferred embedding dimensionality in remote providers so passthrough/custom models can learn their vector dimension from a successful response instead of requiring a built-in static mapping - apply that caching approach consistently across OpenAI, Voyage, and Jina
ecba6b3 to
2827b13
Compare
clt❌ CLT tests in Failed tests:🔧 Edit failed tests in UI:
test/clt-tests/mcl/auto-embeddings-backup-restore.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd $SEARCHD_FLAGS > /dev/null; if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore/searchd.log;fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_backup (
title TEXT,
content TEXT,
status INTEGER,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_backup (id, title, content, status) VALUES
(1, 'machine learning', 'neural networks', 1),
(2, 'deep learning', 'transformers', 1),
(3, 'computer vision', 'image processing', 2)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_backup; OPTIMIZE TABLE test_backup OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_backup WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title, content, KNN_DIST() as distance FROM test_backup WHERE KNN(vec, 3, 'artificial intelligence') ORDER BY distance"
––– output –––
OK
––– input –––
manticore-backup --version | grep -c "Manticore Backup"
––– output –––
OK
––– input –––
mkdir -p /tmp/backup && chmod 777 /tmp/backup; echo $?
––– output –––
OK
––– input –––
manticore-backup --backup-dir=/tmp/backup --tables=test_backup 2>&1 | grep -c "Backing up table"
––– output –––
OK
––– input –––
ls -d /tmp/backup/backup-* | wc -l
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FREEZE test_backup"
––– output –––
+-----------------------------------------------------+-----------------------------------------------------+
| file | normalized |
+-----------------------------------------------------+-----------------------------------------------------+
| /var/lib/manticore/test_backup/test_backup.0.spc | /var/lib/manticore/test_backup/test_backup.0.spc |
| /var/lib/manticore/test_backup/test_backup.0.spd | /var/lib/manticore/test_backup/test_backup.0.spd |
| /var/lib/manticore/test_backup/test_backup.0.spds | /var/lib/manticore/test_backup/test_backup.0.spds |
| /var/lib/manticore/test_backup/test_backup.0.spe | /var/lib/manticore/test_backup/test_backup.0.spe |
| /var/lib/manticore/test_backup/test_backup.0.sph | /var/lib/manticore/test_backup/test_backup.0.sph |
| /var/lib/manticore/test_backup/test_backup.0.sphi | /var/lib/manticore/test_backup/test_backup.0.sphi |
| /var/lib/manticore/test_backup/test_backup.0.spi | /var/lib/manticore/test_backup/test_backup.0.spi |
- | /var/lib/manticore/test_backup/test_backup.0.spidx | /var/lib/manticore/test_backup/test_backup.0.spidx |
+ | /var/lib/manticore/test_backup/test_backup.0.spknn | /var/lib/manticore/test_backup/test_backup.0.spknn |
- | /var/lib/manticore/test_backup/test_backup.0.spknn | /var/lib/manticore/test_backup/test_backup.0.spknn |
+ | /var/lib/manticore/test_backup/test_backup.0.spm | /var/lib/manticore/test_backup/test_backup.0.spm |
- | /var/lib/manticore/test_backup/test_backup.0.spm | /var/lib/manticore/test_backup/test_backup.0.spm |
+ | /var/lib/manticore/test_backup/test_backup.0.spp | /var/lib/manticore/test_backup/test_backup.0.spp |
- | /var/lib/manticore/test_backup/test_backup.0.spp | /var/lib/manticore/test_backup/test_backup.0.spp |
+ | /var/lib/manticore/test_backup/test_backup.0.spt | /var/lib/manticore/test_backup/test_backup.0.spt |
- | /var/lib/manticore/test_backup/test_backup.0.spt | /var/lib/manticore/test_backup/test_backup.0.spt |
+ | /var/lib/manticore/test_backup/test_backup.meta | /var/lib/manticore/test_backup/test_backup.meta |
- | /var/lib/manticore/test_backup/test_backup.meta | /var/lib/manticore/test_backup/test_backup.meta |
+ | /var/lib/manticore/test_backup/test_backup.settings | /var/lib/manticore/test_backup/test_backup.settings |
- | /var/lib/manticore/test_backup/test_backup.settings | /var/lib/manticore/test_backup/test_backup.settings |
+ +-----------------------------------------------------+-----------------------------------------------------+
- +-----------------------------------------------------+-----------------------------------------------------+
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_backup (id, title, content, status) VALUES (4, 'frozen insert', 'test data', 3)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "UNFREEZE test_backup"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
OK
––– input –––
mysqldump -h0 -P9306 manticore test_backup > /tmp/logical_backup.sql 2>/dev/null; echo $?
––– output –––
OK
––– input –––
grep -c "INSERT INTO" /tmp/logical_backup.sql
––– output –––
OK
––– input –––
searchd --stopwait > /dev/null 2>&1; echo $?
––– output –––
OK
––– input –––
rm -f /etc/manticoresearch/manticore.conf; rm -rf /var/lib/manticore/*; echo "Cleaned for restore"
––– output –––
OK
––– input –––
manticore-backup --backup-dir=/tmp/backup --restore 2>&1 | grep -c "backup-"
––– output –––
OK
––– input –––
BACKUP_NAME=$(manticore-backup --backup-dir=/tmp/backup --restore 2>&1 | grep backup- | awk '{print $1}' | head -1)
manticore-backup --backup-dir=/tmp/backup --restore=$BACKUP_NAME 2>&1 | grep -c "Starting to restore"
––– output –––
- 1
+ 0
––– input –––
searchd > /dev/null 2>&1; echo $?
––– output –––
- 0
+ 1
––– input –––
echo "Waiting for searchd to start"; sleep 3
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
- +----------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | count(*) |
- +----------+
- | 3 |
- +----------+
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_backup; OPTIMIZE TABLE test_backup OPTION sync=1, cutoff=1"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_backup WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- +------+
––– input –––
mysql -h0 -P9306 -e "ALTER TABLE test_backup ADD COLUMN new_field INTEGER"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "DESC test_backup" | grep "new_field"
––– output –––
- | new_field | uint | columnar fast_fetch |
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_copy (
title TEXT,
content TEXT,
status INTEGER,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_copy (id, title, content, status) VALUES
(1, 'machine learning', 'neural networks', 1),
(2, 'deep learning', 'transformers', 1),
(3, 'computer vision', 'image processing', 2),
(4, 'frozen insert', 'test data', 3)"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_copy"
––– output –––
- +----------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | count(*) |
- +----------+
- | 4 |
- +----------+
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_copy WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- | 4 |
- +------+
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_copy; OPTIMIZE TABLE test_copy OPTION sync=1, cutoff=1"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_copy WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- | 4 |
- +------+test/clt-tests/mcl/auto-embeddings-endpoints.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
apt-get install jq -y > /dev/null; echo $?
––– output –––
- debconf: delaying package configuration, since apt-utils is not installed
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/libjq1_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 185.125.190.81 80]
- 0
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/jq_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 185.125.190.81 80]
+ E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
+ 100
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE emb_test (
id BIGINT,
title TEXT,
content TEXT,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO emb_test (id, title, content) VALUES
(1, 'machine learning', 'neural networks and deep learning'),
(2, 'computer vision', 'image recognition and processing'),
(3, 'natural language', 'text analysis and understanding')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK emb_test; OPTIMIZE TABLE emb_test OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM emb_test WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
OK
––– input –––
curl -s "http://localhost:9308/cli?select%20id,%20title%20from%20emb_test%20where%20knn(vec,%202,%20'artificial%20intelligence')" | grep -v 'rows in set'
––– output –––
OK
––– input –––
curl -s "http://localhost:9308/cli_json?select%20id,%20title,%20@knn_dist%20from%20emb_test%20where%20knn(vec,%201,%20'learning')" | jq -r '.[0].data[0] | "ID: \(.id)\nTitle: \(.title)\nDistance: \(.["@knn_dist"] | tostring)"'
––– output –––
- ID: 1
+ bash: line 31: jq: command not found
- Title: machine learning
- Distance: #!/1\.082[0-9]*/!#
––– input –––
curl -s -X POST "http://localhost:9308/sql?mode=raw" -d "select count(*) from emb_test where knn(vec, 2, 'neural networks')" | jq -r '.[0].data[0]."count(*)"'
––– output –––
- 3
+ bash: line 33: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"emb_test","id":10,"doc":{"title":"quantum computing","content":"quantum algorithms"}}' | jq -r '.created'
––– output –––
- true
+ bash: line 35: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/search -d '{"index":"emb_test","knn":{"field":"vec","query":"quantum","k":1}}' | jq -r '.hits.hits[0]._source.title'
––– output –––
- quantum computing
+ bash: line 37: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/search -d '{"index":"emb_test","knn":{"field":"vec","query_text":"quantum","k":1}}'
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE chunk_test (
id BIGINT,
title TEXT,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO chunk_test (id, title) VALUES
(1, 'machine learning'),
(2, 'deep learning'),
(3, 'reinforcement learning')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM chunk_test WHERE KNN(vec, 1, 'learning')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK chunk_test; OPTIMIZE TABLE chunk_test OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM chunk_test WHERE KNN(vec, 1, 'learning')"
––– output –––
OKtest/clt-tests/mcl/auto-embeddings-json-api.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd $SEARCHD_FLAGS > /dev/null; if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore/searchd.log;fi
––– output –––
OK
––– input –––
apt-get install jq -y > /dev/null; echo $?
––– output –––
- debconf: delaying package configuration, since apt-utils is not installed
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/libjq1_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.23 80]
- 0
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/jq_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.23 80]
+ E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
+ 100
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_columnar (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_columnar" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 19: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_columnar WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_columnar","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_columnar","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 26: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_columnar","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 30: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_columnar WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_columnar (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_columnar; OPTIMIZE TABLE test_json_columnar OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_columnar\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 46: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE no_auto_embed (title TEXT, vec FLOAT_VECTOR KNN_TYPE='hnsw' KNN_DIMS='384' HNSW_SIMILARITY='l2') engine='columnar'"
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/insert -d "{\"index\":\"no_auto_embed\",\"id\":1,\"doc\":{\"title\":\"test\",\"vec\":[$VECTOR]}}" | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 51: jq: command not found
––– input –––
QUERY_VEC=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"no_auto_embed\",\"knn\":{\"field\":\"vec\",\"query_vector\":[$QUERY_VEC],\"k\":1}}" | jq -r '.hits.total // "0"'
––– output –––
- 1
+ bash: line 54: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_rowwise (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='rowwise'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_rowwise" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 66: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_rowwise WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_rowwise","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_rowwise","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 73: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 77: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_rowwise WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_rowwise (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_rowwise; OPTIMIZE TABLE test_json_rowwise OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_rowwise\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 93: jq: command not foundtest/clt-tests/mcl/auto-embeddings-syntax-check.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title, content' API_KEY='${VOYAGE_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_voyage_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_voyage_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title' API_KEY='${VOYAGE_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_voyage_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test__invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_voyage_no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "INSERT INTO test__no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test__no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_voyage_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$VOYAGE_API_KEY" ] && [ "$VOYAGE_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_voyage_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${VOYAGE_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_plain {
type = rt
path = /var/lib/manticore/test_voyage_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"voyage/voyage-3.5-lite","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_voyage_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_no_key {
type = rt
path = /var/lib/manticore/test_voyage_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"voyage/voyage-3.5-lite","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 11:38:03.675 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVINGtest/clt-tests/mcl/auto-embeddings-voyage-remote.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title, content' API_KEY='${VOYAGE_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_voyage_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_voyage_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title' API_KEY='${VOYAGE_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_voyage_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test__invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_voyage_no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "INSERT INTO test__no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test__no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_voyage_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$VOYAGE_API_KEY" ] && [ "$VOYAGE_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_voyage_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${VOYAGE_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_plain {
type = rt
path = /var/lib/manticore/test_voyage_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"voyage/voyage-3.5-lite","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_voyage_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_no_key {
type = rt
path = /var/lib/manticore/test_voyage_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"voyage/voyage-3.5-lite","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 11:38:09.473 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVINGtest/clt-tests/mcl/auto-embeddings-openai-remote.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title, content' API_KEY='${OPENAI_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_openai_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_openai_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title' API_KEY='${OPENAI_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_openai_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_openai_no_from'" | grep -q test_openai_no_from; then mysql -h0 -P9306 -e "INSERT INTO test_openai_no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_openai_no_from'" | grep -q test_openai_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_openai_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$OPENAI_API_KEY" ] && [ "$OPENAI_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_openai_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_openai_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${OPENAI_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_openai_plain {
type = rt
path = /var/lib/manticore/test_openai_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"openai/text-embedding-ada-002","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_openai_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_openai_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_openai_no_key {
type = rt
path = /var/lib/manticore/test_openai_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"openai/text-embedding-ada-002","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_openai_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 11:37:02.940 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_openai_no_key': prealloc: Invalid API key for remote model - NOT SERVING |
donhardman
approved these changes
Apr 29, 2026
clt❌ CLT tests in Failed tests:🔧 Edit failed tests in UI:
test/clt-tests/mcl/auto-embeddings-backup-restore.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd $SEARCHD_FLAGS > /dev/null; if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore/searchd.log;fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_backup (
title TEXT,
content TEXT,
status INTEGER,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_backup (id, title, content, status) VALUES
(1, 'machine learning', 'neural networks', 1),
(2, 'deep learning', 'transformers', 1),
(3, 'computer vision', 'image processing', 2)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_backup; OPTIMIZE TABLE test_backup OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_backup WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title, content, KNN_DIST() as distance FROM test_backup WHERE KNN(vec, 3, 'artificial intelligence') ORDER BY distance"
––– output –––
OK
––– input –––
manticore-backup --version | grep -c "Manticore Backup"
––– output –––
OK
––– input –––
mkdir -p /tmp/backup && chmod 777 /tmp/backup; echo $?
––– output –––
OK
––– input –––
manticore-backup --backup-dir=/tmp/backup --tables=test_backup 2>&1 | grep -c "Backing up table"
––– output –––
OK
––– input –––
ls -d /tmp/backup/backup-* | wc -l
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FREEZE test_backup"
––– output –––
+-----------------------------------------------------+-----------------------------------------------------+
| file | normalized |
+-----------------------------------------------------+-----------------------------------------------------+
| /var/lib/manticore/test_backup/test_backup.0.spc | /var/lib/manticore/test_backup/test_backup.0.spc |
| /var/lib/manticore/test_backup/test_backup.0.spd | /var/lib/manticore/test_backup/test_backup.0.spd |
| /var/lib/manticore/test_backup/test_backup.0.spds | /var/lib/manticore/test_backup/test_backup.0.spds |
| /var/lib/manticore/test_backup/test_backup.0.spe | /var/lib/manticore/test_backup/test_backup.0.spe |
| /var/lib/manticore/test_backup/test_backup.0.sph | /var/lib/manticore/test_backup/test_backup.0.sph |
| /var/lib/manticore/test_backup/test_backup.0.sphi | /var/lib/manticore/test_backup/test_backup.0.sphi |
| /var/lib/manticore/test_backup/test_backup.0.spi | /var/lib/manticore/test_backup/test_backup.0.spi |
- | /var/lib/manticore/test_backup/test_backup.0.spidx | /var/lib/manticore/test_backup/test_backup.0.spidx |
+ | /var/lib/manticore/test_backup/test_backup.0.spknn | /var/lib/manticore/test_backup/test_backup.0.spknn |
- | /var/lib/manticore/test_backup/test_backup.0.spknn | /var/lib/manticore/test_backup/test_backup.0.spknn |
+ | /var/lib/manticore/test_backup/test_backup.0.spm | /var/lib/manticore/test_backup/test_backup.0.spm |
- | /var/lib/manticore/test_backup/test_backup.0.spm | /var/lib/manticore/test_backup/test_backup.0.spm |
+ | /var/lib/manticore/test_backup/test_backup.0.spp | /var/lib/manticore/test_backup/test_backup.0.spp |
- | /var/lib/manticore/test_backup/test_backup.0.spp | /var/lib/manticore/test_backup/test_backup.0.spp |
+ | /var/lib/manticore/test_backup/test_backup.0.spt | /var/lib/manticore/test_backup/test_backup.0.spt |
- | /var/lib/manticore/test_backup/test_backup.0.spt | /var/lib/manticore/test_backup/test_backup.0.spt |
+ | /var/lib/manticore/test_backup/test_backup.meta | /var/lib/manticore/test_backup/test_backup.meta |
- | /var/lib/manticore/test_backup/test_backup.meta | /var/lib/manticore/test_backup/test_backup.meta |
+ | /var/lib/manticore/test_backup/test_backup.settings | /var/lib/manticore/test_backup/test_backup.settings |
- | /var/lib/manticore/test_backup/test_backup.settings | /var/lib/manticore/test_backup/test_backup.settings |
+ +-----------------------------------------------------+-----------------------------------------------------+
- +-----------------------------------------------------+-----------------------------------------------------+
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_backup (id, title, content, status) VALUES (4, 'frozen insert', 'test data', 3)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "UNFREEZE test_backup"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
OK
––– input –––
mysqldump -h0 -P9306 manticore test_backup > /tmp/logical_backup.sql 2>/dev/null; echo $?
––– output –––
OK
––– input –––
grep -c "INSERT INTO" /tmp/logical_backup.sql
––– output –––
OK
––– input –––
searchd --stopwait > /dev/null 2>&1; echo $?
––– output –––
OK
––– input –––
rm -f /etc/manticoresearch/manticore.conf; rm -rf /var/lib/manticore/*; echo "Cleaned for restore"
––– output –––
OK
––– input –––
manticore-backup --backup-dir=/tmp/backup --restore 2>&1 | grep -c "backup-"
––– output –––
OK
––– input –––
BACKUP_NAME=$(manticore-backup --backup-dir=/tmp/backup --restore 2>&1 | grep backup- | awk '{print $1}' | head -1)
manticore-backup --backup-dir=/tmp/backup --restore=$BACKUP_NAME 2>&1 | grep -c "Starting to restore"
––– output –––
- 1
+ 0
––– input –––
searchd > /dev/null 2>&1; echo $?
––– output –––
- 0
+ 1
––– input –––
echo "Waiting for searchd to start"; sleep 3
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_backup"
––– output –––
- +----------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | count(*) |
- +----------+
- | 3 |
- +----------+
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_backup; OPTIMIZE TABLE test_backup OPTION sync=1, cutoff=1"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_backup WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- +------+
––– input –––
mysql -h0 -P9306 -e "ALTER TABLE test_backup ADD COLUMN new_field INTEGER"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "DESC test_backup" | grep "new_field"
––– output –––
- | new_field | uint | columnar fast_fetch |
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_copy (
title TEXT,
content TEXT,
status INTEGER,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_copy (id, title, content, status) VALUES
(1, 'machine learning', 'neural networks', 1),
(2, 'deep learning', 'transformers', 1),
(3, 'computer vision', 'image processing', 2),
(4, 'frozen insert', 'test data', 3)"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_copy"
––– output –––
- +----------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | count(*) |
- +----------+
- | 4 |
- +----------+
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_copy WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- | 4 |
- +------+
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_copy; OPTIMIZE TABLE test_copy OPTION sync=1, cutoff=1"; echo $?
––– output –––
- 0
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_copy WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
- +------+
+ ERROR 2003 (HY000): Can't connect to MySQL server on '0:9306' (111)
- | id |
- +------+
- | 1 |
- | 3 |
- | 2 |
- | 4 |
- +------+test/clt-tests/mcl/auto-embeddings-endpoints.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
apt-get install jq -y > /dev/null; echo $?
––– output –––
- debconf: delaying package configuration, since apt-utils is not installed
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/libjq1_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.23 80]
- 0
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/jq_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.23 80]
+ E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
+ 100
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE emb_test (
id BIGINT,
title TEXT,
content TEXT,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
)"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO emb_test (id, title, content) VALUES
(1, 'machine learning', 'neural networks and deep learning'),
(2, 'computer vision', 'image recognition and processing'),
(3, 'natural language', 'text analysis and understanding')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK emb_test; OPTIMIZE TABLE emb_test OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM emb_test WHERE KNN(vec, 2, 'artificial intelligence')"
––– output –––
OK
––– input –––
curl -s "http://localhost:9308/cli?select%20id,%20title%20from%20emb_test%20where%20knn(vec,%202,%20'artificial%20intelligence')" | grep -v 'rows in set'
––– output –––
OK
––– input –––
curl -s "http://localhost:9308/cli_json?select%20id,%20title,%20@knn_dist%20from%20emb_test%20where%20knn(vec,%201,%20'learning')" | jq -r '.[0].data[0] | "ID: \(.id)\nTitle: \(.title)\nDistance: \(.["@knn_dist"] | tostring)"'
––– output –––
- ID: 1
+ bash: line 31: jq: command not found
- Title: machine learning
- Distance: #!/1\.082[0-9]*/!#
––– input –––
curl -s -X POST "http://localhost:9308/sql?mode=raw" -d "select count(*) from emb_test where knn(vec, 2, 'neural networks')" | jq -r '.[0].data[0]."count(*)"'
––– output –––
- 3
+ bash: line 33: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"emb_test","id":10,"doc":{"title":"quantum computing","content":"quantum algorithms"}}' | jq -r '.created'
––– output –––
- true
+ bash: line 35: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/search -d '{"index":"emb_test","knn":{"field":"vec","query":"quantum","k":1}}' | jq -r '.hits.hits[0]._source.title'
––– output –––
- quantum computing
+ bash: line 37: jq: command not found
––– input –––
curl -s -X POST http://localhost:9308/search -d '{"index":"emb_test","knn":{"field":"vec","query_text":"quantum","k":1}}'
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE chunk_test (
id BIGINT,
title TEXT,
vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO chunk_test (id, title) VALUES
(1, 'machine learning'),
(2, 'deep learning'),
(3, 'reinforcement learning')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM chunk_test WHERE KNN(vec, 1, 'learning')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK chunk_test; OPTIMIZE TABLE chunk_test OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM chunk_test WHERE KNN(vec, 1, 'learning')"
––– output –––
OKtest/clt-tests/mcl/auto-embeddings-json-api.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd $SEARCHD_FLAGS > /dev/null; if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore/searchd.log;fi
––– output –––
OK
––– input –––
apt-get install jq -y > /dev/null; echo $?
––– output –––
- debconf: delaying package configuration, since apt-utils is not installed
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/libjq1_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.24 80]
- 0
+ E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/j/jq/jq_1.7.1-3ubuntu0.24.04.1_amd64.deb 404 Not Found [IP: 91.189.92.24 80]
+ E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?
+ 100
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_columnar (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='columnar'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_columnar" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 19: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_columnar WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_columnar","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_columnar","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 26: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_columnar","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 30: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_columnar WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_columnar (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_columnar","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_columnar WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_columnar"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_columnar; OPTIMIZE TABLE test_json_columnar OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_columnar\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 46: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE no_auto_embed (title TEXT, vec FLOAT_VECTOR KNN_TYPE='hnsw' KNN_DIMS='384' HNSW_SIMILARITY='l2') engine='columnar'"
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/insert -d "{\"index\":\"no_auto_embed\",\"id\":1,\"doc\":{\"title\":\"test\",\"vec\":[$VECTOR]}}" | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 51: jq: command not found
––– input –––
QUERY_VEC=$(python3 -c "print(','.join(['0.5']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"no_auto_embed\",\"knn\":{\"field\":\"vec\",\"query_vector\":[$QUERY_VEC],\"k\":1}}" | jq -r '.hits.total // "0"'
––– output –––
- 1
+ bash: line 54: jq: command not found
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_json_rowwise (
title TEXT,
content TEXT,
embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2'
MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2'
FROM='title, content'
) engine='rowwise'"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_json_rowwise" | grep -o "model_name='sentence-transformers/all-MiniLM-L6-v2'"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"machine learning","content":"neural networks"}}' | jq -r 'if ._id then ._id else "inserted" end'
––– output –––
- inserted
+ bash: line 66: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT id FROM test_json_rowwise WHERE KNN(embedding, 1, 'machine learning neural networks')"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/bulk -H "Content-Type: application/x-ndjson" -d '
{"insert":{"index":"test_json_rowwise","id":2,"doc":{"title":"computer vision","content":"image recognition"}}}
{"insert":{"index":"test_json_rowwise","id":3,"doc":{"title":"NLP","content":"text processing"}}}
' | jq '{created: .items[0].bulk.created}'
––– output –––
- {
+ bash: line 73: jq: command not found
- "created": 2
- }
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise WHERE id IN (2,3)"
––– output –––
OK
––– input –––
curl -s -X POST http://localhost:9308/replace -d '{"index":"test_json_rowwise","id":1,"doc":{"title":"updated ML","content":"updated networks"}}' | jq -r '.result'
––– output –––
- updated
+ bash: line 77: jq: command not found
––– input –––
mysql -h0 -P9306 -e "SELECT title FROM test_json_rowwise WHERE id=1 AND KNN(embedding, 1, 'updated ML networks')"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_json_rowwise (id, title, content) VALUES (100, 'test', 'data')";
curl -s -X POST http://localhost:9308/insert -d '{"index":"test_json_rowwise","id":101,"doc":{"title":"test","content":"data"}}' > /dev/null
––– output –––
OK
––– input –––
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=100" > /tmp/v1.txt
mysql -h0 -P9306 --batch --skip-column-names -e "SELECT embedding FROM test_json_rowwise WHERE id=101" > /tmp/v2.txt
diff -q /tmp/v1.txt /tmp/v2.txt > /dev/null && echo "Vectors identical" || echo "Vectors differ"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) FROM test_json_rowwise"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "FLUSH RAMCHUNK test_json_rowwise; OPTIMIZE TABLE test_json_rowwise OPTION sync=1, cutoff=1"; echo $?
––– output –––
OK
––– input –––
VECTOR=$(python3 -c "print(','.join(['0.01']*384))")
curl -s -X POST http://localhost:9308/search -d "{\"index\":\"test_json_rowwise\",\"knn\":{\"field\":\"embedding\",\"query_vector\":[$VECTOR],\"k\":2}}" | jq -r '.hits.total // "0"'
––– output –––
- 5
+ bash: line 93: jq: command not foundtest/clt-tests/mcl/auto-embeddings-syntax-check.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title, content' API_KEY='${VOYAGE_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_voyage_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_voyage_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title' API_KEY='${VOYAGE_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_voyage_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test__invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_voyage_no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "INSERT INTO test__no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test__no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_voyage_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$VOYAGE_API_KEY" ] && [ "$VOYAGE_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_voyage_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${VOYAGE_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_plain {
type = rt
path = /var/lib/manticore/test_voyage_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"voyage/voyage-3.5-lite","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_voyage_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_no_key {
type = rt
path = /var/lib/manticore/test_voyage_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"voyage/voyage-3.5-lite","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 14:52:53.598 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVINGtest/clt-tests/mcl/auto-embeddings-voyage-remote.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title, content' API_KEY='${VOYAGE_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_voyage_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_voyage_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_voyage_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/voyage-3.5-lite' FROM = 'title' API_KEY='${VOYAGE_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_voyage_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_voyage_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test__invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'voyage/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_voyage_no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "INSERT INTO test__no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test__no_from'" | grep -q test_voyage_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_voyage_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$VOYAGE_API_KEY" ] && [ "$VOYAGE_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_voyage_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${VOYAGE_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_plain {
type = rt
path = /var/lib/manticore/test_voyage_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"voyage/voyage-3.5-lite","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_voyage_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_voyage_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_voyage_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_voyage_no_key {
type = rt
path = /var/lib/manticore/test_voyage_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"voyage/voyage-3.5-lite","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 14:52:59.384 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_voyage_no_key': prealloc: Invalid API key for remote model - NOT SERVINGtest/clt-tests/mcl/auto-embeddings-openai-remote.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title, content' API_KEY='${OPENAI_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW CREATE TABLE test_openai_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_openai_remote WHERE id=1"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'title' API_KEY='${OPENAI_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_openai_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_openai_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_openai_invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'openai/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_openai_no_from'" | grep -q test_openai_no_from; then mysql -h0 -P9306 -e "INSERT INTO test_openai_no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_openai_no_from'" | grep -q test_openai_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_openai_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$OPENAI_API_KEY" ] && [ "$OPENAI_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT id, knn_dist() FROM test_openai_remote WHERE knn(embedding, 3, 'machine learning and artificial intelligence')\G"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_openai_remote WHERE knn(embedding, 5, 'technology and AI') AND id > 0"
––– output –––
OK
––– input –––
API_KEY_VAL="${OPENAI_API_KEY}"; cat > /etc/manticoresearch/manticore.conf << CONFEOF
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_openai_plain {
type = rt
path = /var/lib/manticore/test_openai_plain
rt_field = title
rt_field = content
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","hnsw_m":16,"hnsw_ef_construction":200,"model_name":"openai/text-embedding-ada-002","from":"title,content","api_key":"${API_KEY_VAL}"}]}
}
CONFEOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SHOW TABLES"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_openai_plain (id, title, content) VALUES(1, 'bread', 'food item'), (2, 'cat', 'animal pet')"; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as count FROM test_openai_plain"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SELECT id, title FROM test_openai_plain WHERE knn(embedding, 2, 'dog')"
––– output –––
OK
––– input –––
cat > /etc/manticoresearch/manticore.conf << 'EOF'
searchd {
listen = 127.0.0.1:9306:mysql41
listen = 127.0.0.1:9308:http
log = /var/log/manticore/searchd.log
pid_file = /var/run/manticore/searchd.pid
}
table test_openai_no_key {
type = rt
path = /var/lib/manticore/test_openai_no_key
rt_field = title
rt_attr_float_vector = embedding
knn = {"attrs":[{"name":"embedding","type":"hnsw","hnsw_similarity":"L2","model_name":"openai/text-embedding-ada-002","from":"title"}]}
}
EOF
––– output –––
OK
––– input –––
searchd --stopwait --quiet
––– output –––
OK
––– input –––
searchd 2>&1|grep WARNING
––– output –––
- WARNING: table 'test_openai_no_key': prealloc: Invalid API key for remote model - NOT SERVING
+ [Wed Apr 29 14:51:56.557 2026] [134] WARNING: Error initializing secondary index: daemon requires secondary library v19 (trying to load v20)
+ WARNING: table 'test_openai_no_key': prealloc: Invalid API key for remote model - NOT SERVING |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Additional remote-model adjustment:
Related issue #155