apache
diff --git a/‎processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HeapVectorGrouper.java‎
Lines changed: 220 additions & 0 deletions b/‎processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HeapVectorGrouper.java‎
Lines changed: 220 additions & 0 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java‎
Lines changed: 25 additions & 2 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/vector/DictionaryBuildingSingleValueStringTopNVectorColumnSelector.java‎
Lines changed: 98 additions & 0 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/vector/DictionaryBuildingSingleValueStringTopNVectorColumnSelector.java‎
Lines changed: 98 additions & 0 deletions
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.groupby.epinephelinae;
+
+import it.unimi.dsi.fastutil.Hash;
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
+import it.unimi.dsi.fastutil.objects.Object2IntOpenCustomHashMap;
+import org.apache.datasketches.memory.Memory;
+import org.apache.druid.java.util.common.parsers.CloseableIterator;
+import org.apache.druid.query.aggregation.AggregatorAdapters;
+import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * On-heap {@link VectorGrouper} backed by a Java hash map and a growable on-heap {@link ByteBuffer} for
+ * aggregator state. Unlike {@link HashVectorGrouper}, this grouper never fails with "hash table full" — it
+ * grows its state buffer on demand — which makes it suitable for callers that cannot tolerate partial
+ * aggregation (e.g. topN, where truncating groups mid-aggregation would produce wrong results).
+ *
+ * Vectorized reads and writes (selector vector reads, batched aggregator writes) are preserved; only the
+ * hash table itself is heap-backed.
+ */
+public class HeapVectorGrouper implements VectorGrouper
+{
+  private static final Hash.Strategy<byte[]> BYTE_ARRAY_HASH_STRATEGY = new Hash.Strategy<byte[]>()
+  {
+    @Override
+    public int hashCode(byte[] o)
+    {
+      return Arrays.hashCode(o);
+    }
+
+    @Override
+    public boolean equals(byte[] a, byte[] b)
+    {
+      return Arrays.equals(a, b);
+    }
+  };
+
+  private static final int MIN_INITIAL_STATE_BUFFER_SIZE = 4096;
+
+  private final AggregatorAdapters aggregators;
+  private final int keySize;
+  private final int aggStateSize;
+  private final Object2IntOpenCustomHashMap<byte[]> keyToOffset;
+
+  private boolean initialized;
+  private ByteBuffer aggStateBuffer;
+  private int aggStateEnd;
+
+  private int[] vAggregationPositions;
+  private int[] vAggregationRows;
+  private byte[] keyScratch;
+
+  public HeapVectorGrouper(final AggregatorAdapters aggregators, final int keySize)
+  {
+    this.aggregators = aggregators;
+    this.keySize = keySize;
+    this.aggStateSize = aggregators.spaceNeeded();
+    this.keyToOffset = new Object2IntOpenCustomHashMap<>(BYTE_ARRAY_HASH_STRATEGY);
+    this.keyToOffset.defaultReturnValue(-1);
+  }
+
+  @Override
+  public void initVectorized(final int maxVectorSize)
+  {
+    if (initialized) {
+      return;
+    }
+    this.aggStateBuffer = ByteBuffer.allocate(MIN_INITIAL_STATE_BUFFER_SIZE);
+    this.vAggregationPositions = new int[maxVectorSize];
+    this.vAggregationRows = new int[maxVectorSize];
+    this.keyScratch = new byte[keySize];
+    this.aggStateEnd = 0;
+    this.initialized = true;
+  }
+
+  @Override
+  public AggregateResult aggregateVector(final Memory keySpace, final int startRow, final int endRow)
+  {
+    final int numRows = endRow - startRow;
+
+    for (int i = 0; i < numRows; i++) {
+      keySpace.getByteArray((long) i * keySize, keyScratch, 0, keySize);
+      int offset = keyToOffset.getInt(keyScratch);
+      if (offset == -1) {
+        // Grow only when a new group actually needs state; avoids worst-case pre-allocation.
+        if ((long) aggStateEnd + aggStateSize > aggStateBuffer.capacity()) {
+          growBuffer((long) aggStateEnd + aggStateSize);
+        }
+        offset = aggStateEnd;
+        final byte[] keyCopy = Arrays.copyOf(keyScratch, keySize);
+        keyToOffset.put(keyCopy, offset);
+        aggregators.init(aggStateBuffer, offset);
+        aggStateEnd += aggStateSize;
+      }
+      vAggregationPositions[i] = offset;
+    }
+
+    aggregators.aggregateVector(
+        aggStateBuffer,
+        numRows,
+        vAggregationPositions,
+        Groupers.writeAggregationRows(vAggregationRows, startRow, endRow)
+    );
+
+    return AggregateResult.ok();
+  }
+
+  private void growBuffer(final long neededCapacity)
+  {
+    int newCapacity = aggStateBuffer.capacity();
+    while (newCapacity < neededCapacity) {
+      final long doubled = (long) newCapacity * 2;
+      if (doubled > Integer.MAX_VALUE) {
+        newCapacity = Integer.MAX_VALUE;
+        break;
+      }
+      newCapacity = (int) doubled;
+    }
+
+    final ByteBuffer oldBuffer = aggStateBuffer;
+    final ByteBuffer newBuffer = ByteBuffer.allocate(newCapacity);
+
+    // Copy existing aggregator state bytes. Positions remain valid in the new buffer since we copy to
+    // the same offsets.
+    oldBuffer.position(0);
+    oldBuffer.limit(aggStateEnd);
+    newBuffer.put(oldBuffer);
+
+    // Inform aggregators that their state bytes moved to a different buffer. Positions are unchanged.
+    for (int pos = 0; pos < aggStateEnd; pos += aggStateSize) {
+      aggregators.relocate(pos, pos, oldBuffer, newBuffer);
+    }
+
+    this.aggStateBuffer = newBuffer;
+  }
+
+  @Override
+  public void reset()
+  {
+    aggregators.reset();
+    keyToOffset.clear();
+    aggStateEnd = 0;
+  }
+
+  @Override
+  public void close()
+  {
+    aggregators.reset();
+    keyToOffset.clear();
+    aggStateBuffer = null;
+    vAggregationPositions = null;
+    vAggregationRows = null;
+    keyScratch = null;
+  }
+
+  @Override
+  public CloseableIterator<Grouper.Entry<MemoryPointer>> iterator()
+  {
+    final Iterator<Object2IntMap.Entry<byte[]>> mapIter =
+        keyToOffset.object2IntEntrySet().fastIterator();
+
+    return new CloseableIterator<>()
+    {
+      final MemoryPointer reusableKey = new MemoryPointer();
+      final ReusableEntry<MemoryPointer> reusableEntry =
+          new ReusableEntry<>(reusableKey, new Object[aggregators.size()]);
+      final byte[] keyBytesHolder = new byte[keySize];
+      final Memory keyMemory = Memory.wrap(keyBytesHolder, 0, keySize, ByteOrder.nativeOrder());
+
+      @Override
+      public boolean hasNext()
+      {
+        return mapIter.hasNext();
+      }
+
+      @Override
+      public Grouper.Entry<MemoryPointer> next()
+      {
+        final Object2IntMap.Entry<byte[]> mapEntry = mapIter.next();
+        System.arraycopy(mapEntry.getKey(), 0, keyBytesHolder, 0, keySize);
+        reusableKey.set(keyMemory, 0);
+
+        final int position = mapEntry.getIntValue();
+        for (int i = 0; i < aggregators.size(); i++) {
+          reusableEntry.getValues()[i] = aggregators.get(aggStateBuffer, position, i);
+        }
+        return reusableEntry;
+      }
+
+      @Override
+      public void close()
+      {
+        // Nothing to close.
+      }
+    };
+  }
+}
@@ -26,6 +26,7 @@
 import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.guava.Sequences;
+import org.apache.druid.java.util.common.io.Closer;
 import org.apache.druid.query.ColumnSelectorPlus;
 import org.apache.druid.query.CursorGranularizer;
 import org.apache.druid.query.QueryContexts;
@@ -35,6 +36,7 @@
 import org.apache.druid.query.extraction.ExtractionFn;
 import org.apache.druid.query.topn.types.TopNColumnAggregatesProcessor;
 import org.apache.druid.query.topn.types.TopNColumnAggregatesProcessorFactory;
+import org.apache.druid.query.topn.vector.VectorTopNEngine;
 import org.apache.druid.segment.ColumnSelectorFactory;
 import org.apache.druid.segment.Cursor;
 import org.apache.druid.segment.CursorBuildSpec;
@@ -96,13 +98,34 @@ public Sequence<Result<TopNResultValue>> query(
       if (cursorHolder.isPreAggregated()) {
         query = query.withAggregatorSpecs(Preconditions.checkNotNull(cursorHolder.getAggregatorsForPreAggregated()));
       }
+
+      final TimeBoundaryInspector timeBoundaryInspector = segment.as(TimeBoundaryInspector.class);
+
+      final boolean canVectorize = cursorHolder.canVectorize()
+                                   && VectorTopNEngine.canVectorize(query, cursorFactory);
+      final boolean shouldVectorize = query.context().getVectorize().shouldVectorize(canVectorize);
+
+      if (shouldVectorize) {
+        final ResourceHolder<ByteBuffer> bufHolder = bufferPool.take();
+        try {
+          final Closer resourceCloser = Closer.create();
+          resourceCloser.register(bufHolder);
+          resourceCloser.register(cursorHolder);
+          return Sequences.filter(
+              VectorTopNEngine.process(query, timeBoundaryInspector, cursorHolder, bufHolder.get()),
+              Predicates.notNull()
+          ).withBaggage(resourceCloser);
+        }
+        catch (Throwable t) {
+          throw CloseableUtils.closeAndWrapInCatch(t, bufHolder);
+        }
+      }
+
       final Cursor cursor = cursorHolder.asCursor();
       if (cursor == null) {
         return Sequences.withBaggage(Sequences.empty(), cursorHolder);
       }
 
-      final TimeBoundaryInspector timeBoundaryInspector = segment.as(TimeBoundaryInspector.class);
-
       final ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
 
       final ColumnSelectorPlus<TopNColumnAggregatesProcessor<?>> selectorPlus =
 
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.topn.vector;
+
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
+import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils;
+import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * {@link TopNVectorColumnSelector} for single-valued STRING columns that are not natively dictionary-encoded,
+ * such as expression virtual columns. Builds a local int dictionary on-the-fly and encodes keys as 4-byte
+ * dictionary IDs, matching the key format of {@link SingleValueStringTopNVectorColumnSelector}.
+ */
+public class DictionaryBuildingSingleValueStringTopNVectorColumnSelector implements TopNVectorColumnSelector
+{
+  private final VectorObjectSelector selector;
+  private final List<String> dictionary = new ArrayList<>();
+  private final Object2IntMap<String> reverseDictionary = new Object2IntOpenHashMap<>();
+
+  DictionaryBuildingSingleValueStringTopNVectorColumnSelector(final VectorObjectSelector selector)
+  {
+    this.selector = selector;
+    reverseDictionary.defaultReturnValue(-1);
+  }
+
+  @Override
+  public int getGroupingKeySize()
+  {
+    return Integer.BYTES;
+  }
+
+  @Override
+  public int writeKeys(
+      final WritableMemory keySpace,
+      final int keySize,
+      final int keyOffset,
+      final int startRow,
+      final int endRow
+  )
+  {
+    final Object[] vector = selector.getObjectVector();
+    int stateFootprintIncrease = 0;
+
+    for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
+      final String value = DimensionHandlerUtils.convertObjectToString(vector[i]);
+      int dictId = reverseDictionary.getInt(value);
+      if (dictId < 0) {
+        dictId = dictionary.size();
+        dictionary.add(value);
+        reverseDictionary.put(value, dictId);
+        stateFootprintIncrease +=
+            DictionaryBuildingUtils.estimateEntryFootprint(value == null ? 0 : value.length() * Character.BYTES);
+      }
+      keySpace.putInt(j, dictId);
+    }
+
+    return stateFootprintIncrease;
+  }
+
+  @Override
+  @Nullable
+  public Object getDimensionValue(final MemoryPointer keyMemory, final int keyOffset)
+  {
+    return dictionary.get(keyMemory.memory().getInt(keyMemory.position() + keyOffset));
+  }
+
+  @Override
+  public void reset()
+  {
+    dictionary.clear();
+    reverseDictionary.clear();
+  }
+}