Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions mysql-test/main/vector_bulk.combinations
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[innodb]
innodb
default-storage-engine=innodb

[myisam]
default-storage-engine=myisam

[aria]
default-storage-engine=aria
44 changes: 44 additions & 0 deletions mysql-test/main/vector_bulk.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Test memory budget fallback during MHNSW bulk insert
#
# 1. Normal bulk insert works when memory budget is large enough
create table t1 (id int auto_increment primary key, v vector(3) not null);
insert into t1 (v) values (x'000000000000000000000000'),
(x'0000803f0000000000000000'),
(x'000000000000803f00000000'),
(x'00000000000000000000803f');
alter table t1 add vector index (v);
show warnings;
Level Code Message
# Test search using the successfully bulk-built index
select id, vec_distance_euclidean(v, x'0000803f0000000000000000') d from t1 order by d limit 2;
id d
2 0
1 1
drop table t1;
# 2. Bulk insert falls back to normal insert when mhnsw_max_cache_size is small
set @old_cache_size= @@global.mhnsw_max_cache_size;
set global mhnsw_max_cache_size= 1048576;
set max_recursive_iterations= 300;
create table t1 (id int auto_increment primary key, v vector(3000) not null);
insert into t1 (v)
with recursive cte as (
select 1 as n
union all
select n + 1 from cte where n < 115
)
select repeat(x'00', 12000) from cte;
insert into t1 (id, v) values (999, concat(repeat(x'00', 11996), x'0000803f'));
# Adding index with small cache size should trigger fallback and show a warning
alter table t1 add vector index (v) m=200;
Warnings:
Note 1105 MHNSW: Bulk insert disabled because estimated memory usage (estimated_mem) exceeds mhnsw_max_cache_size (1048576). Falling back to normal insert.
show warnings;
Level Code Message
Note 1105 MHNSW: Bulk insert disabled because estimated memory usage (estimated_mem) exceeds mhnsw_max_cache_size (1048576). Falling back to normal insert.
# Test search using the fallback-built index to ensure it is healthy and correct
select id, vec_distance_euclidean(v, concat(repeat(x'00', 11996), x'0000803f')) d from t1 order by d limit 1;
id d
999 0
drop table t1;
set global mhnsw_max_cache_size= @old_cache_size;
51 changes: 51 additions & 0 deletions mysql-test/main/vector_bulk.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
--echo #
--echo # Test memory budget fallback during MHNSW bulk insert
--echo #

--echo # 1. Normal bulk insert works when memory budget is large enough
create table t1 (id int auto_increment primary key, v vector(3) not null);
insert into t1 (v) values (x'000000000000000000000000'),
(x'0000803f0000000000000000'),
(x'000000000000803f00000000'),
(x'00000000000000000000803f');
alter table t1 add vector index (v);
show warnings;

--echo # Test search using the successfully bulk-built index
--replace_regex /(\.\d{5})\d+/\1/
select id, vec_distance_euclidean(v, x'0000803f0000000000000000') d from t1 order by d limit 2;

drop table t1;

--echo # 2. Bulk insert falls back to normal insert when mhnsw_max_cache_size is small
set @old_cache_size= @@global.mhnsw_max_cache_size;
set global mhnsw_max_cache_size= 1048576;
set max_recursive_iterations= 300;

create table t1 (id int auto_increment primary key, v vector(3000) not null);

# Insert 115 rows of 3000-dimensional vectors.
# Total size: 115 * ~9288 bytes/row (with M=200) = ~1.06 MB, exceeding 1MB.
insert into t1 (v)
with recursive cte as (
select 1 as n
union all
select n + 1 from cte where n < 115
)
select repeat(x'00', 12000) from cte;

# Insert a unique search target vector with fixed ID 999 to guarantee deterministic results across all engines
insert into t1 (id, v) values (999, concat(repeat(x'00', 11996), x'0000803f'));

--echo # Adding index with small cache size should trigger fallback and show a warning
--replace_regex /usage \(\d+\)/usage (estimated_mem)/
alter table t1 add vector index (v) m=200;
--replace_regex /usage \(\d+\)/usage (estimated_mem)/
show warnings;

--echo # Test search using the fallback-built index to ensure it is healthy and correct
--replace_regex /(\.\d{5})\d+/\1/
select id, vec_distance_euclidean(v, concat(repeat(x'00', 11996), x'0000803f')) d from t1 order by d limit 1;

drop table t1;
set global mhnsw_max_cache_size= @old_cache_size;
47 changes: 42 additions & 5 deletions sql/sql_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10145,11 +10145,15 @@ int TABLE::unlock_hlindexes()

int TABLE::hlindexes_on_insert()
{
DBUG_ASSERT(s->hlindexes() == (hlindex != NULL));
if (hlindex && hlindex->in_use)
if (int err= mhnsw_insert(this, key_info + s->keys))
return err;
return 0;
DBUG_ASSERT(s->hlindexes() == (hlindex != NULL));
if (hlindex && hlindex->in_use)
{
if (hlindex->bulk_insert_active)
return mhnsw_bulk_insert_row(this, key_info + s->keys);
else
return mhnsw_insert(this, key_info + s->keys);
}
return 0;
Comment on lines +10148 to +10156

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The member bulk_insert_active was added to the TABLE struct in sql/table.h, but here it is accessed via hlindex->bulk_insert_active. Since hlindex is not of type TABLE*, this will cause a compilation error. It should be accessed directly as bulk_insert_active (or this->bulk_insert_active).

Additionally, please update the indentation to 2 spaces to match the repository's implicit style guide.

  DBUG_ASSERT(s->hlindexes() == (hlindex != NULL));
  if (hlindex && hlindex->in_use)
  {
    if (bulk_insert_active)
      return mhnsw_bulk_insert_row(this, key_info + s->keys);
    else
      return mhnsw_insert(this, key_info + s->keys);
  }
  return 0;

}

int TABLE::hlindexes_on_update()
Expand Down Expand Up @@ -10208,3 +10212,36 @@ int TABLE::hlindex_read_end()
{
return mhnsw_read_end(this);
}

int TABLE::hlindexes_bulk_insert_begin(ha_rows rows)
{
if (s->hlindexes())
{
if (!hlindex || !hlindex->in_use)
if (int err= open_hlindexes_for_write())
return err;

if (hlindex && hlindex->in_use)
{
int err= mhnsw_bulk_insert_begin(this, key_info + s->keys, rows);
if (err)
{
hlindex->bulk_insert_active= false;
return err;
}
if (hlindex->context)
hlindex->bulk_insert_active= true;
}
}
return 0;
}

int TABLE::hlindexes_bulk_insert_end()
{
if (hlindex && hlindex->in_use)
{
hlindex->bulk_insert_active= false;
return mhnsw_bulk_insert_end(this, key_info + s->keys);
}
return 0;
}
Comment on lines +10216 to +10247

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There are three issues here:

  1. bulk_insert_active is a member of TABLE, not hlindex, so accessing it via hlindex->bulk_insert_active will cause a compilation error.
  2. If mhnsw_bulk_insert_begin fails (returns a non-zero error code), bulk_insert_active should be reset to false so that subsequent inserts do not incorrectly attempt bulk insertion.
  3. The indentation should be updated to 2 spaces to match the repository's implicit style guide.
int TABLE::hlindexes_bulk_insert_begin(ha_rows rows)
{
  if (hlindex && hlindex->in_use)
  {
    bulk_insert_active= true;
    int err= mhnsw_bulk_insert_begin(this, key_info + s->keys, rows);
    if (err)
      bulk_insert_active= false;
    return err;
  }
  return 0;
}

int TABLE::hlindexes_bulk_insert_end()
{
  if (hlindex && hlindex->in_use)
  {
    bulk_insert_active= false;
    return mhnsw_bulk_insert_end(this, key_info + s->keys);
  }
  return 0;
}

26 changes: 22 additions & 4 deletions sql/sql_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12616,6 +12616,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
bool make_unversioned= from->versioned() && !to->versioned();
bool keep_versioned= from->versioned() && to->versioned();
bool bulk_insert_started= 0;
bool hlindex_bulk_started= 0;
Field *to_row_start= NULL, *to_row_end= NULL, *from_row_end= NULL;
MYSQL_TIME query_start;
DBUG_ENTER("copy_data_between_tables");
Expand Down Expand Up @@ -12662,11 +12663,20 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,

from->file->info(HA_STATUS_VARIABLE);
to->file->extra(HA_EXTRA_PREPARE_FOR_ALTER_TABLE);
if (!to->s->long_unique_table && !to->s->hlindexes())

if (!to->s->long_unique_table)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when you remove something, check in the git history why it was added. The check above was in the commit with the comment "Don't enable bulk insert when altering a table containing vector index. InnoDB can't handle situation when bulk insert is enabled for one table but disabled for another"

That is, you should first do hlindexes_bulk_insert_begin() and if it succeeds, then do ha_start_bulk_insert(). To avoid the case when bulk insert was only started partially.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked and actually talked about this in Zulip :)

but yeah I forgot to make sure we only enable it when hlindex_bulk_begin succeed

{
to->file->ha_start_bulk_insert(from->file->stats.records,
ignore ? 0 : HA_CREATE_UNIQUE_INDEX_BY_SORT);
bulk_insert_started= 1;
if (to->s->hlindexes())
{
if (to->hlindexes_bulk_insert_begin(from->file->stats.records) == 0)
hlindex_bulk_started= 1;
}
if (!to->s->hlindexes() || hlindex_bulk_started)
{
to->file->ha_start_bulk_insert(from->file->stats.records,
ignore ? 0 : HA_CREATE_UNIQUE_INDEX_BY_SORT);
bulk_insert_started= 1;
}
}
Comment on lines +12667 to 12680

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The return value of to->hlindexes_bulk_insert_begin is currently ignored. If the bulk insert initialization fails, we should handle the error, print it, and avoid setting hlindex_bulk_started to 1 so that we do not proceed with the bulk insert or attempt to end it later.

Additionally, please update the indentation to 2 spaces to match the repository's implicit style guide.

  if (!to->s->long_unique_table)
  {
    to->file->ha_start_bulk_insert(from->file->stats.records,
                                   ignore ? 0 : HA_CREATE_UNIQUE_INDEX_BY_SORT);
    bulk_insert_started= 1;

    if (to->s->hlindexes())
    {
      if ((error= to->hlindexes_bulk_insert_begin(from->file->stats.records)))
      {
        to->file->print_error(error, MYF(0));
      }
      else
      {
        hlindex_bulk_started= 1;
      }
    }
  }

mysql_stage_set_work_estimated(thd->m_stage_progress_psi, from->file->stats.records);
List_iterator<Create_field> it(alter_info->create_list);
Expand Down Expand Up @@ -12999,6 +13009,14 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
}

bulk_insert_started= 0;
if (hlindex_bulk_started && to->hlindexes_bulk_insert_end() && error <= 0)
{
if (!thd->is_error())
to->file->print_error(my_errno, MYF(0));
error= 1;
}
hlindex_bulk_started=0;
Comment on lines +13012 to +13018

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Please update the indentation to 2 spaces and add proper spacing around operators (e.g., hlindex_bulk_started = 0;) to match the repository's implicit style guide.

  if (hlindex_bulk_started && to->hlindexes_bulk_insert_end() && error <= 0)
  {
    if (!thd->is_error())
      to->file->print_error(my_errno, MYF(0));
    error= 1;
  }
  hlindex_bulk_started= 0;


if (error <= 0 && !to->s->hlindexes())
{
Abort_on_warning_instant_set save_abort_on_warning(thd, false);
Expand Down
3 changes: 3 additions & 0 deletions sql/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -1632,6 +1632,7 @@ struct TABLE
*/
bool alias_name_used; /* true if table_name is alias */
bool get_fields_in_item_tree; /* Signal to fix_field */
bool bulk_insert_active=false; /* mhnsw bulk_insert_started flag */
private:
bool m_needs_reopen;
bool created; /* For tmp tables. TRUE <=> tmp table was actually created.*/
Expand Down Expand Up @@ -1875,6 +1876,8 @@ struct TABLE
int hlindexes_on_update();
int hlindexes_on_delete(const uchar *buf);
int hlindexes_on_delete_all(bool truncate);
int hlindexes_bulk_insert_begin(ha_rows rows);
int hlindexes_bulk_insert_end();
int unlock_hlindexes();

void prepare_triggers_for_insert_stmt_or_event();
Expand Down
Loading
Loading