Skip to content

Commit c3f95dd

Browse files
committed
feat: [ArcadeData#3099] vector function enhancements, aliases, renames and new helpers
Follow-up to ArcadeData#4640 implementing the non-bug review items from issue ArcadeData#3099. Quick wins: - vectorLInfNorm: simplify loop to Math.max(maxAbs, Math.abs(v)) - vectorStdDev: reuse shared SQLFunctionVectorVariance.variance() helper - add vectorL2Norm alias for vectorMagnitude and vectorClamp alias for vectorClip Multi-alias framework: - add Function.getAliases() and an extra-alias constructor on SQLFunctionVectorAbstract (auto-derives dotted + camelCase forms) - SQLFunctionFactoryTemplate registers all aliases for instance and class-based registrations Naming: rename misnamed classes to the SQLFunctionVector* convention (MultiVectorScore, DenseVectorToSparse, SparseVectorCreate/Dot/ToDense); SQL function names are unchanged. New small features: - vector.hasNull(): detect genuine NULL elements (distinct from NaN) - scalar broadcasting in vector.add (commutative) and vector.subtract (order-preserving) - vector.scoreTransform: add LN (synonym of LOG) and TANH - vector.sparsity: optional threshold defaulting to sqrt(eps), plus FRACTION/L0/GMEAN modes Adds SQLFunctionVectorEnhancementsTest covering all of the above.
1 parent e2a4840 commit c3f95dd

22 files changed

Lines changed: 595 additions & 149 deletions

engine/src/main/java/com/arcadedb/function/Function.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,20 @@ default String getAlias() {
9696
return null;
9797
}
9898

99+
/**
100+
* Returns additional alias names beyond the primary {@link #getAlias()}.
101+
* <p>
102+
* A function may expose several backward-compatible or synonymous names (e.g.
103+
* {@code vector.magnitude} is also registered as {@code vector.l2Norm} / {@code vectorL2Norm}).
104+
* Each name returned here is registered as an extra lookup key for the same function instance.
105+
* </p>
106+
*
107+
* @return an unmodifiable list of additional names, or an empty list if none
108+
*/
109+
default java.util.List<String> getAliases() {
110+
return java.util.List.of();
111+
}
112+
99113
/**
100114
* Validates the arguments before execution.
101115
* Default implementation checks argument count against min/max bounds.

engine/src/main/java/com/arcadedb/function/sql/DefaultSQLFunctionFactory.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,23 +118,20 @@
118118
import com.arcadedb.function.sql.time.SQLFunctionTsFirst;
119119
import com.arcadedb.function.sql.time.SQLFunctionTsLast;
120120
import com.arcadedb.function.sql.time.SQLFunctionPromQL;
121-
import com.arcadedb.function.sql.vector.SQLFunctionDenseVectorToSparse;
122-
import com.arcadedb.function.sql.vector.SQLFunctionMultiVectorScore;
123-
import com.arcadedb.function.sql.vector.SQLFunctionSparseVectorCreate;
124-
import com.arcadedb.function.sql.vector.SQLFunctionSparseVectorDot;
125-
import com.arcadedb.function.sql.vector.SQLFunctionSparseVectorToDense;
126121
import com.arcadedb.function.sql.vector.SQLFunctionVectorAdd;
127122
import com.arcadedb.function.sql.vector.SQLFunctionVectorApproxDistance;
128123
import com.arcadedb.function.sql.vector.SQLFunctionVectorAvg;
129124
import com.arcadedb.function.sql.vector.SQLFunctionVectorBoost;
130125
import com.arcadedb.function.sql.vector.SQLFunctionVectorClip;
131126
import com.arcadedb.function.sql.vector.SQLFunctionVectorCosineSimilarity;
127+
import com.arcadedb.function.sql.vector.SQLFunctionVectorDenseToSparse;
132128
import com.arcadedb.function.sql.vector.SQLFunctionVectorDequantizeInt8;
133129
import com.arcadedb.function.sql.vector.SQLFunctionVectorDimension;
134130
import com.arcadedb.function.sql.vector.SQLFunctionVectorDiscover;
135131
import com.arcadedb.function.sql.vector.SQLFunctionVectorDotProduct;
136132
import com.arcadedb.function.sql.vector.SQLFunctionVectorHasInf;
137133
import com.arcadedb.function.sql.vector.SQLFunctionVectorHasNaN;
134+
import com.arcadedb.function.sql.vector.SQLFunctionVectorHasNull;
138135
import com.arcadedb.function.sql.vector.SQLFunctionVectorHybridScore;
139136
import com.arcadedb.function.sql.vector.SQLFunctionVectorIsNormalized;
140137
import com.arcadedb.function.sql.vector.SQLFunctionVectorL1Distance;
@@ -145,6 +142,7 @@
145142
import com.arcadedb.function.sql.vector.SQLFunctionVectorMax;
146143
import com.arcadedb.function.sql.vector.SQLFunctionVectorMin;
147144
import com.arcadedb.function.sql.vector.SQLFunctionVectorMmr;
145+
import com.arcadedb.function.sql.vector.SQLFunctionVectorMultiScore;
148146
import com.arcadedb.function.sql.vector.SQLFunctionVectorMultiply;
149147
import com.arcadedb.function.sql.vector.SQLFunctionVectorFuse;
150148
import com.arcadedb.function.sql.vector.SQLFunctionVectorNeighbors;
@@ -158,6 +156,9 @@
158156
import com.arcadedb.function.sql.vector.SQLFunctionVectorRerank;
159157
import com.arcadedb.function.sql.vector.SQLFunctionVectorScale;
160158
import com.arcadedb.function.sql.vector.SQLFunctionVectorScoreTransform;
159+
import com.arcadedb.function.sql.vector.SQLFunctionVectorSparseCreate;
160+
import com.arcadedb.function.sql.vector.SQLFunctionVectorSparseDot;
161+
import com.arcadedb.function.sql.vector.SQLFunctionVectorSparseToDense;
161162
import com.arcadedb.function.sql.vector.SQLFunctionVectorSparsity;
162163
import com.arcadedb.function.sql.vector.SQLFunctionVectorStdDev;
163164
import com.arcadedb.function.sql.vector.SQLFunctionVectorSubtract;
@@ -338,12 +339,12 @@ private DefaultSQLFunctionFactory() {
338339
register(SQLFunctionVectorHybridScore.NAME, new SQLFunctionVectorHybridScore());
339340
register(SQLFunctionVectorScoreTransform.NAME, new SQLFunctionVectorScoreTransform());
340341
// Sparse Vectors
341-
register(SQLFunctionSparseVectorCreate.NAME, new SQLFunctionSparseVectorCreate());
342-
register(SQLFunctionSparseVectorDot.NAME, new SQLFunctionSparseVectorDot());
343-
register(SQLFunctionSparseVectorToDense.NAME, new SQLFunctionSparseVectorToDense());
344-
register(SQLFunctionDenseVectorToSparse.NAME, new SQLFunctionDenseVectorToSparse());
342+
register(SQLFunctionVectorSparseCreate.NAME, new SQLFunctionVectorSparseCreate());
343+
register(SQLFunctionVectorSparseDot.NAME, new SQLFunctionVectorSparseDot());
344+
register(SQLFunctionVectorSparseToDense.NAME, new SQLFunctionVectorSparseToDense());
345+
register(SQLFunctionVectorDenseToSparse.NAME, new SQLFunctionVectorDenseToSparse());
345346
// Multi-Vector Operations
346-
register(SQLFunctionMultiVectorScore.NAME, new SQLFunctionMultiVectorScore());
347+
register(SQLFunctionVectorMultiScore.NAME, new SQLFunctionVectorMultiScore());
347348
// Quantization & Optimization
348349
register(SQLFunctionVectorQuantizeInt8.NAME, new SQLFunctionVectorQuantizeInt8());
349350
register(SQLFunctionVectorQuantizeBinary.NAME, new SQLFunctionVectorQuantizeBinary());
@@ -359,6 +360,7 @@ private DefaultSQLFunctionFactory() {
359360
register(SQLFunctionVectorIsNormalized.NAME, new SQLFunctionVectorIsNormalized());
360361
register(SQLFunctionVectorHasNaN.NAME, new SQLFunctionVectorHasNaN());
361362
register(SQLFunctionVectorHasInf.NAME, new SQLFunctionVectorHasInf());
363+
register(SQLFunctionVectorHasNull.NAME, new SQLFunctionVectorHasNull());
362364
register(SQLFunctionVectorClip.NAME, new SQLFunctionVectorClip());
363365
register(SQLFunctionVectorToString.NAME, new SQLFunctionVectorToString());
364366
// Existing

engine/src/main/java/com/arcadedb/function/sql/SQLFunctionFactoryTemplate.java

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,8 @@ public SQLFunctionFactoryTemplate() {
5555
*/
5656
public void register(final SQLFunction function) {
5757
functions.put(function.getName().toLowerCase(Locale.ENGLISH), function);
58-
// Register alias if provided (for backward compatibility)
59-
final String alias = function.getAlias();
60-
if (alias != null) {
61-
functions.put(alias.toLowerCase(Locale.ENGLISH), function);
62-
}
58+
// Register the primary alias and any additional aliases (for backward compatibility / synonyms)
59+
registerAliasKeys(function, function);
6360
// Also register in the unified FunctionRegistry for cross-engine access
6461
FunctionRegistry.register(function);
6562
}
@@ -81,34 +78,42 @@ public void unregister(final String name) {
8178
*/
8279
public void register(final String name, final Object function) {
8380
functions.put(name.toLowerCase(Locale.ENGLISH), function);
84-
// If it's an instance (not a class), also register in unified registry and handle alias
81+
// If it's an instance (not a class), also register in unified registry and handle aliases
8582
if (function instanceof SQLFunction sqlFunction) {
86-
// Register alias if provided (for backward compatibility)
87-
final String alias = sqlFunction.getAlias();
88-
if (alias != null) {
89-
functions.put(alias.toLowerCase(Locale.ENGLISH), function);
90-
}
83+
registerAliasKeys(sqlFunction, function);
9184
FunctionRegistry.register(sqlFunction);
9285
} else if (function instanceof Class<?> clazz && SQLFunction.class.isAssignableFrom(clazz)) {
9386
// Class-based (stateful) registration: getFunctionInstance() creates a fresh instance per call, so we
94-
// do not register in the unified FunctionRegistry. We still honor the function's alias by mapping the
95-
// alias name to the same class, otherwise backward-compatible names (e.g. vectorSum -> vector.sum)
87+
// do not register in the unified FunctionRegistry. We still honor the function's alias(es) by mapping
88+
// those names to the same class, otherwise backward-compatible names (e.g. vectorSum -> vector.sum)
9689
// would stop resolving once a stateful function moves from instance to class registration.
97-
final String alias = aliasOfFunctionClass(clazz);
98-
if (alias != null) {
99-
functions.put(alias.toLowerCase(Locale.ENGLISH), function);
100-
}
90+
final SQLFunction probe = probeFunctionInstance(clazz);
91+
if (probe != null)
92+
registerAliasKeys(probe, function);
10193
}
10294
}
10395

10496
/**
105-
* Probes a function class for its declared alias by instantiating it via the no-arg constructor (the same
106-
* constructor {@link #getFunctionInstance(String)} relies on). Returns {@code null} when the function has no
107-
* alias or cannot be instantiated, in which case only the primary name is registered.
97+
* Maps the primary {@link SQLFunction#getAlias()} and every {@link SQLFunction#getAliases()} entry to the
98+
* given map value (an instance or a class). The value, not the probe, is stored so class-based stateful
99+
* functions still get a fresh instance per call via {@link #getFunctionInstance(String)}.
100+
*/
101+
private void registerAliasKeys(final SQLFunction probe, final Object value) {
102+
final String alias = probe.getAlias();
103+
if (alias != null)
104+
functions.put(alias.toLowerCase(Locale.ENGLISH), value);
105+
for (final String extra : probe.getAliases())
106+
functions.put(extra.toLowerCase(Locale.ENGLISH), value);
107+
}
108+
109+
/**
110+
* Probes a function class by instantiating it via the no-arg constructor (the same constructor
111+
* {@link #getFunctionInstance(String)} relies on). Returns {@code null} when it cannot be instantiated, in
112+
* which case only the primary name is registered.
108113
*/
109-
private static String aliasOfFunctionClass(final Class<?> clazz) {
114+
private static SQLFunction probeFunctionInstance(final Class<?> clazz) {
110115
try {
111-
return ((SQLFunction) clazz.getConstructor().newInstance()).getAlias();
116+
return (SQLFunction) clazz.getConstructor().newInstance();
112117
} catch (final Exception e) {
113118
return null;
114119
}

engine/src/main/java/com/arcadedb/function/sql/vector/SQLFunctionVectorAbstract.java

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.arcadedb.query.sql.executor.Result;
2929
import com.arcadedb.utility.IntHashSet;
3030

31+
import java.util.ArrayList;
3132
import java.util.HashMap;
3233
import java.util.HashSet;
3334
import java.util.List;
@@ -47,24 +48,58 @@
4748
*/
4849
public abstract class SQLFunctionVectorAbstract extends SQLFunctionAbstract {
4950
private static final String VECTOR_PREFIX = "vector.";
50-
private final String alias;
51+
private final String alias;
52+
private final List<String> aliases;
5153

5254
protected SQLFunctionVectorAbstract(final String name) {
55+
this(name, (String[]) null);
56+
}
57+
58+
/**
59+
* Registers the function under {@code name} plus optional synonym names. Each synonym (given in
60+
* dotted {@code vector.xxx} form) is exposed both as-is and as its camelCase variant, e.g. passing
61+
* {@code "vector.l2Norm"} makes the function reachable as {@code vector.l2Norm} and {@code vectorL2Norm}
62+
* in addition to the primary {@code vector.magnitude} / {@code vectorMagnitude} names.
63+
*
64+
* @param name the primary (dotted) function name
65+
* @param extraDottedNames optional additional dotted names to expose as aliases
66+
*/
67+
protected SQLFunctionVectorAbstract(final String name, final String... extraDottedNames) {
5368
super(name);
5469
// Auto-generate alias for backward compatibility: vector.xxx -> vectorXxx
55-
if (name.startsWith(VECTOR_PREFIX)) {
56-
final String suffix = name.substring(VECTOR_PREFIX.length());
57-
this.alias = "vector" + Character.toUpperCase(suffix.charAt(0)) + suffix.substring(1);
70+
this.alias = camelCaseAlias(name);
71+
if (extraDottedNames == null || extraDottedNames.length == 0) {
72+
this.aliases = List.of();
5873
} else {
59-
this.alias = null;
74+
final List<String> a = new ArrayList<>(extraDottedNames.length * 2);
75+
for (final String extra : extraDottedNames) {
76+
a.add(extra);
77+
final String camel = camelCaseAlias(extra);
78+
if (camel != null && !camel.equals(extra))
79+
a.add(camel);
80+
}
81+
this.aliases = List.copyOf(a);
82+
}
83+
}
84+
85+
private static String camelCaseAlias(final String name) {
86+
if (name != null && name.startsWith(VECTOR_PREFIX)) {
87+
final String suffix = name.substring(VECTOR_PREFIX.length());
88+
return "vector" + Character.toUpperCase(suffix.charAt(0)) + suffix.substring(1);
6089
}
90+
return null;
6191
}
6292

6393
@Override
6494
public String getAlias() {
6595
return alias;
6696
}
6797

98+
@Override
99+
public List<String> getAliases() {
100+
return aliases;
101+
}
102+
68103
/**
69104
* Validates that two vectors have the same dimension.
70105
*

engine/src/main/java/com/arcadedb/function/sql/vector/SQLFunctionVectorAdd.java

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
package com.arcadedb.function.sql.vector;
2020

2121
import com.arcadedb.database.Identifiable;
22+
import com.arcadedb.exception.CommandSQLParsingException;
2223
import com.arcadedb.query.sql.executor.CommandContext;
2324

2425
/**
25-
* Performs element-wise vector addition.
26-
* Returns a new vector where each component is the sum of corresponding components.
26+
* Performs element-wise vector addition, or scalar broadcasting.
27+
* With two vectors, returns the element-wise sum. With a vector and a scalar (in either order), the
28+
* scalar is added to every element, e.g. {@code vectorAdd([1,2,3], 4) = [5,6,7]}.
2729
* <p>
2830
* Uses scalar implementation which is 7-11x faster than JVector for typical vector sizes (< 1024).
2931
* JVector overhead from object allocation and conversion dominates actual computation cost.
@@ -43,19 +45,40 @@ public Object execute(final Object self, final Identifiable currentRecord, final
4345
validateNotNull(params[0], "Vector1");
4446
validateNotNull(params[1], "Vector2");
4547

46-
final float[] v1 = toFloatArray(params[0]);
47-
final float[] v2 = toFloatArray(params[1]);
48+
final Object a = params[0];
49+
final Object b = params[1];
50+
final boolean aScalar = a instanceof Number;
51+
final boolean bScalar = b instanceof Number;
52+
53+
if (aScalar && bScalar)
54+
throw new CommandSQLParsingException("At least one argument of " + NAME + " must be a vector");
55+
56+
// vector + scalar (broadcast)
57+
if (bScalar)
58+
return broadcast(toFloatArray(a), ((Number) b).floatValue());
59+
// scalar + vector (broadcast) - addition is commutative
60+
if (aScalar)
61+
return broadcast(toFloatArray(b), ((Number) a).floatValue());
62+
63+
// vector + vector (element-wise)
64+
final float[] v1 = toFloatArray(a);
65+
final float[] v2 = toFloatArray(b);
4866
validateSameDimension(v1, v2);
4967

50-
// Scalar implementation - significantly faster than JVector for typical sizes
5168
final float[] result = new float[v1.length];
52-
for (int i = 0; i < v1.length; i++) {
69+
for (int i = 0; i < v1.length; i++)
5370
result[i] = v1[i] + v2[i];
54-
}
71+
return result;
72+
}
73+
74+
private static float[] broadcast(final float[] v, final float scalar) {
75+
final float[] result = new float[v.length];
76+
for (int i = 0; i < v.length; i++)
77+
result[i] = v[i] + scalar;
5578
return result;
5679
}
5780

5881
public String getSyntax() {
59-
return NAME + "(<vector1>, <vector2>)";
82+
return NAME + "(<vector1>, <vector2> | <scalar>)";
6083
}
6184
}

engine/src/main/java/com/arcadedb/function/sql/vector/SQLFunctionVectorClip.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
/**
2626
* Clips (clamps) vector elements to a specified range.
2727
* Any value below min becomes min, any value above max becomes max.
28+
* <p>
29+
* Also available as {@code vector.clamp} / {@code vectorClamp} (the standard Java/Math term).
2830
*
2931
* Formula: clipped[i] = max(min, min(max, value[i]))
3032
*
@@ -36,7 +38,7 @@ public class SQLFunctionVectorClip extends SQLFunctionVectorAbstract {
3638
public static final String NAME = "vector.clip";
3739

3840
public SQLFunctionVectorClip() {
39-
super(NAME);
41+
super(NAME, "vector.clamp");
4042
}
4143

4244
@Override

engine/src/main/java/com/arcadedb/function/sql/vector/SQLFunctionDenseVectorToSparse.java renamed to engine/src/main/java/com/arcadedb/function/sql/vector/SQLFunctionVectorDenseToSparse.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
*
3838
* @author Luca Garulli (l.garulli--(at)--arcadedata.com)
3939
*/
40-
public class SQLFunctionDenseVectorToSparse extends SQLFunctionVectorAbstract {
40+
public class SQLFunctionVectorDenseToSparse extends SQLFunctionVectorAbstract {
4141
public static final String NAME = "vector.denseToSparse";
4242

43-
public SQLFunctionDenseVectorToSparse() {
43+
public SQLFunctionVectorDenseToSparse() {
4444
super(NAME);
4545
}
4646

0 commit comments

Comments
 (0)