From f6d1e86dec64b495e51fce4c42f2f183a92635e0 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Thu, 14 Aug 2025 01:58:09 +0800 Subject: [PATCH 01/22] feat(java): support object stream serialization for graalvm (#2464) ## What does this PR do? ## Related issues Closes #2460 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- docs/guide/graalvm_guide.md | 4 ++ .../java/org/apache/fory/graalvm/Main.java | 1 + .../fory/graalvm/ObjectStreamExample.java | 58 +++++++++++++++++ .../graalvm_tests/native-image.properties | 1 + .../apache/fory/AbstractThreadSafeFory.java | 9 +++ .../main/java/org/apache/fory/BaseFory.java | 9 +++ .../src/main/java/org/apache/fory/Fory.java | 5 ++ .../org/apache/fory/builder/CodecUtils.java | 64 ++++++++++++++++--- .../apache/fory/resolver/ClassResolver.java | 35 ++++++++++ .../fory-core/native-image.properties | 4 ++ 10 files changed, 180 insertions(+), 10 deletions(-) create mode 100644 integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/ObjectStreamExample.java diff --git a/docs/guide/graalvm_guide.md b/docs/guide/graalvm_guide.md index e9bef16a81..7a1dc7998c 100644 --- a/docs/guide/graalvm_guide.md +++ b/docs/guide/graalvm_guide.md @@ -70,6 +70,8 @@ public class Example { fory = Fory.builder().build(); // register and generate serializer code. fory.register(Record.class, true); + // ensure lazy initialized serializers being compiled by fory. + fory.ensureSerializersCompiled(); } public static void main(String[] args) { @@ -115,6 +117,8 @@ public class ThreadSafeExample { Fory f = Fory.builder().build(); // register and generate serializer code. f.register(Foo.class, true); + // ensure lazy initialized serializers being compiled by fory. + fory.ensureSerializersCompiled(); return f; }); } diff --git a/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/Main.java b/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/Main.java index e042682a32..48469e9e72 100644 --- a/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/Main.java +++ b/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/Main.java @@ -35,6 +35,7 @@ public static void main(String[] args) throws Throwable { ThreadSafeExample.main(args); CompatibleThreadSafeExample.main(args); ProxyExample.main(args); + ObjectStreamExample.main(args); Benchmark.main(args); CollectionExample.main(args); } diff --git a/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/ObjectStreamExample.java b/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/ObjectStreamExample.java new file mode 100644 index 0000000000..a73c90bdf5 --- /dev/null +++ b/integration_tests/graalvm_tests/src/main/java/org/apache/fory/graalvm/ObjectStreamExample.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + package org.apache.fory.graalvm; + +import org.apache.fory.Fory; + +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +public class ObjectStreamExample extends AbstractMap { + private static final Fory FORY = Fory.builder() + .withName(ObjectStreamExample.class.getName()) + .registerGuavaTypes(false) + .build(); + + static { + FORY.register(ObjectStreamExample.class, true); + FORY.ensureSerializersCompiled(); + } + + final int[] ints = new int[10]; + + public static void main(String[] args) { + FORY.reset(); + byte[] bytes = FORY.serialize(new ObjectStreamExample()); + FORY.reset(); + ObjectStreamExample o = (ObjectStreamExample) FORY.deserialize(bytes); + System.out.println(Arrays.toString(o.ints)); + } + + @Override + public Set> entrySet() { + HashSet> set = new HashSet<>(); + for (int i = 0; i < ints.length; i++) { + set.add(new AbstractMap.SimpleEntry<>(i, ints[i])); + } + return set; + } +} \ No newline at end of file diff --git a/integration_tests/graalvm_tests/src/main/resources/META-INF/native-image/org.apache.fory/graalvm_tests/native-image.properties b/integration_tests/graalvm_tests/src/main/resources/META-INF/native-image/org.apache.fory/graalvm_tests/native-image.properties index 27cd0385fc..35d26127a2 100644 --- a/integration_tests/graalvm_tests/src/main/resources/META-INF/native-image/org.apache.fory/graalvm_tests/native-image.properties +++ b/integration_tests/graalvm_tests/src/main/resources/META-INF/native-image/org.apache.fory/graalvm_tests/native-image.properties @@ -27,5 +27,6 @@ Args=-H:+ReportExceptionStackTraces \ org.apache.fory.graalvm.ThreadSafeExample,\ org.apache.fory.graalvm.CompatibleThreadSafeExample,\ org.apache.fory.graalvm.ProxyExample,\ + org.apache.fory.graalvm.ObjectStreamExample,\ org.apache.fory.graalvm.CollectionExample,\ org.apache.fory.graalvm.Benchmark diff --git a/java/fory-core/src/main/java/org/apache/fory/AbstractThreadSafeFory.java b/java/fory-core/src/main/java/org/apache/fory/AbstractThreadSafeFory.java index 8c8405ca36..a438ec5cbb 100644 --- a/java/fory-core/src/main/java/org/apache/fory/AbstractThreadSafeFory.java +++ b/java/fory-core/src/main/java/org/apache/fory/AbstractThreadSafeFory.java @@ -82,6 +82,15 @@ public void setClassChecker(ClassChecker classChecker) { registerCallback(fory -> fory.getClassResolver().setClassChecker(classChecker)); } + @Override + public void ensureSerializersCompiled() { + execute( + fory -> { + fory.ensureSerializersCompiled(); + return null; + }); + } + @Internal public abstract void registerCallback(Consumer callback); } diff --git a/java/fory-core/src/main/java/org/apache/fory/BaseFory.java b/java/fory-core/src/main/java/org/apache/fory/BaseFory.java index 3f27ecdade..ff0b68c71c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/BaseFory.java +++ b/java/fory-core/src/main/java/org/apache/fory/BaseFory.java @@ -105,6 +105,15 @@ public interface BaseFory { void setSerializerFactory(SerializerFactory serializerFactory); + /** + * Ensure all compilation for serializers and accessors even for lazy initialized serializers. + * This method will block until all compilation is done. + * + *

This method is mainly used for graalvm native image build time and trigger compilation ahead + * for online service ahead to avoid cold start. + */ + void ensureSerializersCompiled(); + /** Return serialized obj as a byte array. */ byte[] serialize(Object obj); diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java b/java/fory-core/src/main/java/org/apache/fory/Fory.java index 99eed4bbf0..5bacf507f4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/Fory.java +++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java @@ -1620,6 +1620,11 @@ private void throwDepthDeserializationException() { method)); } + @Override + public void ensureSerializersCompiled() { + classResolver.ensureSerializersCompiled(); + } + public JITContext getJITContext() { return jitContext; } diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/CodecUtils.java b/java/fory-core/src/main/java/org/apache/fory/builder/CodecUtils.java index 9f96a889d0..c23cec2030 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/CodecUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/CodecUtils.java @@ -20,19 +20,26 @@ package org.apache.fory.builder; import java.util.Collections; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import org.apache.fory.Fory; import org.apache.fory.codegen.CodeGenerator; import org.apache.fory.codegen.CompileUnit; +import org.apache.fory.collection.Tuple2; import org.apache.fory.meta.ClassDef; import org.apache.fory.reflect.TypeRef; import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.FieldResolver; import org.apache.fory.serializer.Serializer; import org.apache.fory.util.ClassLoaderUtils; +import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; /** Codec util to create and load jit serializer class. */ +@SuppressWarnings("rawtypes") public class CodecUtils { + private static ConcurrentHashMap>, Class> graalvmSerializers = + new ConcurrentHashMap<>(); // TODO(chaokunyang) how to uninstall org.apache.fory.codegen/builder classes for graalvm build // time @@ -40,30 +47,47 @@ public class CodecUtils { public static Class> loadOrGenObjectCodecClass( Class cls, Fory fory) { Preconditions.checkNotNull(fory); - BaseObjectCodecBuilder codecBuilder = new ObjectCodecBuilder(cls, fory); - return loadOrGenCodecClass(cls, fory, codecBuilder); + return loadSerializer( + "loadOrGenObjectCodecClass", + cls, + () -> loadOrGenCodecClass(cls, fory, new ObjectCodecBuilder(cls, fory))); } public static Class> loadOrGenMetaSharedCodecClass( Fory fory, Class cls, ClassDef classDef) { Preconditions.checkNotNull(fory); - MetaSharedCodecBuilder codecBuilder = - new MetaSharedCodecBuilder(TypeRef.of(cls), fory, classDef); - return loadOrGenCodecClass(cls, fory, codecBuilder); + return loadSerializer( + "loadOrGenMetaSharedCodecClass", + cls, + () -> + loadOrGenCodecClass( + cls, fory, new MetaSharedCodecBuilder(TypeRef.of(cls), fory, classDef))); } public static Class> loadOrGenCompatibleCodecClass( Class cls, Fory fory) { - FieldResolver resolver = FieldResolver.of(fory, cls, true, false); - return loadOrGenCompatibleCodecClass(cls, fory, resolver, Generated.GeneratedSerializer.class); + return loadSerializer( + "loadOrGenCompatibleCodecClass", + cls, + () -> { + FieldResolver resolver = FieldResolver.of(fory, cls, true, false); + return loadOrGenCompatibleCodecClass( + cls, fory, resolver, Generated.GeneratedSerializer.class); + }); } public static Class> loadOrGenCompatibleCodecClass( Class cls, Fory fory, FieldResolver fieldResolver, Class parentSerializerClass) { Preconditions.checkNotNull(fory); - BaseObjectCodecBuilder codecBuilder = - new CompatibleCodecBuilder(TypeRef.of(cls), fory, fieldResolver, parentSerializerClass); - return loadOrGenCodecClass(cls, fory, codecBuilder); + return loadSerializer( + "loadOrGenCompatibleCodecClass", + cls, + () -> { + BaseObjectCodecBuilder codecBuilder = + new CompatibleCodecBuilder( + TypeRef.of(cls), fory, fieldResolver, parentSerializerClass); + return loadOrGenCodecClass(cls, fory, codecBuilder); + }); } @SuppressWarnings("unchecked") @@ -126,4 +150,24 @@ private static CodeGenerator getCodeGenerator( } return codeGenerator; } + + private static Class> loadSerializer( + String name, Class cls, Callable>> func) { + if (GraalvmSupport.IN_GRAALVM_NATIVE_IMAGE) { + Tuple2> key = Tuple2.of(name, cls); + Class serializerClass = graalvmSerializers.get(key); + if (serializerClass != null) { + return serializerClass; + } + } + try { + Class serializerClass = func.call(); + if (GraalvmSupport.IN_GRAALVM_NATIVE_IMAGE) { + graalvmSerializers.putIfAbsent(Tuple2.of(name, cls), serializerClass); + } + return serializerClass; + } catch (Exception e) { + throw new RuntimeException(e); + } + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index 0e62c54d40..54895c5929 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -274,6 +274,7 @@ private static class ExtRegistry { private final IdentityMap genericTypes = new IdentityMap<>(); private final Map> classGenericTypes = new HashMap<>(); private final Map, CodeGenerator> codeGeneratorMap = new HashMap<>(); + private final Set initialClassInfos = new HashSet<>(); } public ClassResolver(Fory fory) { @@ -331,6 +332,14 @@ public void initialize() { addDefaultSerializers(); shimDispatcher.initialize(); innerEndClassId = extRegistry.classIdGenerator; + if (GraalvmSupport.isGraalBuildtime()) { + classInfoMap.forEach( + (cls, classInfo) -> { + if (classInfo.serializer != null) { + extRegistry.initialClassInfos.add(classInfo); + } + }); + } } private void addDefaultSerializers() { @@ -2197,6 +2206,32 @@ public Fory getFory() { return fory; } + /** + * Ensure all compilation for serializers and accessors even for lazy initialized serializers. + * This method will block until all compilation is done. + */ + public void ensureSerializersCompiled() { + try { + classInfoMap.forEach( + (cls, classInfo) -> { + if (classInfo.serializer == null) { + getSerializer(classInfo.cls, isSerializable(classInfo.cls)); + } + }); + if (GraalvmSupport.isGraalBuildtime()) { + classInfoMap.forEach( + (cls, classInfo) -> { + if (classInfo.serializer != null + && !extRegistry.initialClassInfos.contains(classInfo)) { + classInfo.serializer = null; + } + }); + } + } finally { + fory.getJITContext().unlock(); + } + } + private static final ConcurrentMap GRAALVM_REGISTRY = new ConcurrentHashMap<>(); diff --git a/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties b/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties index 319193bfaf..50f599329b 100644 --- a/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties +++ b/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties @@ -193,6 +193,7 @@ Args=--initialize-at-build-time=org.apache.fory.memory.MemoryBuffer,\ org.apache.fory.builder.AccessorHelper,\ org.apache.fory.builder.JITContext,\ org.apache.fory.builder.ObjectCodecBuilder,\ + org.apache.fory.builder.CodecUtils,\ org.apache.fory.codegen.CodeGenerator$DefineState,\ org.apache.fory.codegen.CodeGenerator,\ org.apache.fory.codegen.CodegenContext,\ @@ -300,6 +301,7 @@ Args=--initialize-at-build-time=org.apache.fory.memory.MemoryBuffer,\ org.apache.fory.serializer.collection.SubListSerializers,\ org.apache.fory.serializer.collection.SubListSerializers$SubListViewSerializer,\ org.apache.fory.serializer.collection.SubListSerializers$SubListSerializer,\ + org.apache.fory.serializer.collection.MapSerializers$DefaultJavaMapSerializer,\ org.apache.fory.serializer.JavaSerializer$1,\ org.apache.fory.serializer.JavaSerializer$2,\ org.apache.fory.serializer.JavaSerializer$3,\ @@ -312,6 +314,7 @@ Args=--initialize-at-build-time=org.apache.fory.memory.MemoryBuffer,\ org.apache.fory.serializer.LocaleSerializer,\ org.apache.fory.serializer.LazySerializer,\ org.apache.fory.serializer.LazySerializer$LazyObjectSerializer,\ + org.apache.fory.serializer.CodegenSerializer$LazyInitBeanSerializer,\ org.apache.fory.serializer.NoneSerializer,\ org.apache.fory.serializer.NonexistentClassSerializers$ClassFieldsInfo,\ org.apache.fory.serializer.NonexistentClassSerializers$NonexistentClassSerializer,\ @@ -332,6 +335,7 @@ Args=--initialize-at-build-time=org.apache.fory.memory.MemoryBuffer,\ org.apache.fory.serializer.ReplaceResolveSerializer$1,\ org.apache.fory.serializer.ReplaceResolveSerializer$ReplaceStub,\ org.apache.fory.serializer.ReplaceResolveSerializer,\ + org.apache.fory.serializer.ReplaceResolveSerializer$ReplaceResolveInfo,\ org.apache.fory.serializer.Serializers$AtomicBooleanSerializer,\ org.apache.fory.serializer.Serializers$AtomicIntegerSerializer,\ org.apache.fory.serializer.Serializers$AtomicLongSerializer,\ From 80ffbebc302ad48ac7dc513d5abcaa14eea3136a Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Thu, 14 Aug 2025 16:12:53 +0800 Subject: [PATCH 02/22] fix(java): fix map/list element type same with collection type jit error (#2465) ## What does this PR do? ## Related issues Closes #2454 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .../fory/builder/BaseObjectCodecBuilder.java | 37 +++++++++++++++---- .../ChildContainerSerializersTest.java | 20 ++++++++++ 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java index 2f8efe1ec3..0d965a5ff4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java @@ -64,7 +64,6 @@ import static org.apache.fory.type.TypeUtils.PRIMITIVE_LONG_TYPE; import static org.apache.fory.type.TypeUtils.PRIMITIVE_VOID_TYPE; import static org.apache.fory.type.TypeUtils.SET_TYPE; -import static org.apache.fory.type.TypeUtils.getElementType; import static org.apache.fory.type.TypeUtils.getRawType; import static org.apache.fory.type.TypeUtils.isBoxed; import static org.apache.fory.type.TypeUtils.isPrimitive; @@ -484,19 +483,19 @@ private Expression serializeForNotNull( } protected boolean useCollectionSerialization(TypeRef typeRef) { - return fory(f -> f.getClassResolver().isCollection(TypeUtils.getRawType(typeRef))); + return useCollectionSerialization(TypeUtils.getRawType(typeRef)); } protected boolean useCollectionSerialization(Class type) { - return fory(f -> f.getClassResolver().isCollection(TypeUtils.getRawType(type))); + return fory(f -> f.getClassResolver().isCollection(type)); } protected boolean useMapSerialization(TypeRef typeRef) { - return fory(f -> f.getClassResolver().isMap(TypeUtils.getRawType(typeRef))); + return useMapSerialization(TypeUtils.getRawType(typeRef)); } protected boolean useMapSerialization(Class type) { - return fory(f -> f.getClassResolver().isMap(TypeUtils.getRawType(type))); + return fory(f -> f.getClassResolver().isMap(type)); } /** @@ -834,12 +833,13 @@ protected Expression serializeForCollection( serializer = cast(serializer, TypeRef.of(AbstractCollectionSerializer.class), "colSerializer"); } + TypeRef elementType = getElementType(typeRef); // write collection data. ListExpression actions = new ListExpression(); Expression write = new If( inlineInvoke(serializer, "supportCodegenHook", PRIMITIVE_BOOLEAN_TYPE), - writeCollectionData(buffer, collection, serializer, getElementType(typeRef)), + writeCollectionData(buffer, collection, serializer, elementType), new Invoke(serializer, "write", buffer, collection)); actions.add(write); if (generateNewMethod) { @@ -849,6 +849,14 @@ protected Expression serializeForCollection( return actions; } + private TypeRef getElementType(TypeRef typeRef) { + TypeRef elementType = TypeUtils.getElementType(typeRef); + if (elementType.equals(typeRef)) { + elementType = OBJECT_TYPE; + } + return elementType; + } + protected Expression writeCollectionData( Expression buffer, Expression collection, Expression serializer, TypeRef elementType) { Invoke onCollectionWrite = @@ -1146,9 +1154,22 @@ protected Expression serializeForMap( return write; } + private Tuple2, TypeRef> getMapKeyValueType(TypeRef typeRef) { + Tuple2, TypeRef> keyValueType = TypeUtils.getMapKeyValueType(typeRef); + TypeRef keyType = keyValueType.f0; + TypeRef valueType = keyValueType.f1; + if (keyType.equals(typeRef)) { + keyType = OBJECT_TYPE; + } + if (valueType.equals(typeRef)) { + valueType = OBJECT_TYPE; + } + return Tuple2.of(keyType, valueType); + } + private Expression jitWriteMap( Expression buffer, Expression map, Expression serializer, TypeRef typeRef) { - Tuple2, TypeRef> keyValueType = TypeUtils.getMapKeyValueType(typeRef); + Tuple2, TypeRef> keyValueType = getMapKeyValueType(typeRef); TypeRef keyType = keyValueType.f0; TypeRef valueType = keyValueType.f1; map = new Invoke(serializer, "onMapWrite", TypeUtils.mapOf(keyType, valueType), buffer, map); @@ -1867,7 +1888,7 @@ private Expression readContainerElement( */ protected Expression deserializeForMap( Expression buffer, TypeRef typeRef, Expression serializer, InvokeHint invokeHint) { - Tuple2, TypeRef> keyValueType = TypeUtils.getMapKeyValueType(typeRef); + Tuple2, TypeRef> keyValueType = getMapKeyValueType(typeRef); TypeRef keyType = keyValueType.f0; TypeRef valueType = keyValueType.f1; if (serializer == null) { diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/collection/ChildContainerSerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/collection/ChildContainerSerializersTest.java index a54d7e3ed4..86e9891e67 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/collection/ChildContainerSerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/collection/ChildContainerSerializersTest.java @@ -259,4 +259,24 @@ public void testSerializeCustomPrivateMap(boolean enableCodegen) { .build(); serDeMetaShared(fory, outerDO); } + + public static class ChildLinkedListElemList extends LinkedList {} + + public static class ChildLinkedListElemListStruct { + public ChildLinkedListElemList list; + } + + @Test + public void testElemTypeSameWithCollection() { + Fory fory = builder().withRefTracking(true).build(); + ChildLinkedListElemList list = new ChildLinkedListElemList(); + list.add(list); + ChildLinkedListElemList list1 = serDe(fory, list); + Assert.assertSame(list1.get(0), list1); + + ChildLinkedListElemListStruct struct = new ChildLinkedListElemListStruct(); + struct.list = list; + ChildLinkedListElemListStruct struct1 = serDe(fory, struct); + Assert.assertSame(struct1.list.get(0), struct1.list); + } } From 19180e95b410ba08cc6df03f3e925b4c683949e9 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Thu, 14 Aug 2025 16:35:50 +0800 Subject: [PATCH 03/22] refactor(java): rename abstract collection/map serializers to Map/ListLikeSerializer (#2466) ## What does this PR do? ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- docs/guide/java_serialization_guide.md | 20 ++++----- docs/specification/java_serialization_spec.md | 4 +- .../specification/xlang_serialization_spec.md | 2 +- .../fory/builder/BaseObjectCodecBuilder.java | 43 +++++++++---------- .../apache/fory/resolver/FieldResolver.java | 24 +++++------ .../apache/fory/resolver/XtypeResolver.java | 12 +++--- .../fory/serializer/CompatibleSerializer.java | 14 +++--- ...zer.java => CollectionLikeSerializer.java} | 8 ++-- .../collection/CollectionSerializer.java | 2 +- .../collection/CollectionSerializers.java | 14 +++--- ...Serializer.java => MapLikeSerializer.java} | 9 ++-- .../serializer/collection/MapSerializer.java | 2 +- .../serializer/collection/MapSerializers.java | 4 +- .../scala/SingletonCollectionSerializer.java | 4 +- .../scala/SingletonMapSerializer.java | 4 +- .../serializer/kotlin/CollectionSerializer.kt | 4 +- .../scala/CollectionSerializer.scala | 4 +- .../fory/serializer/scala/MapSerializer.scala | 4 +- .../serializer/scala/RangeSerializer.scala | 6 +-- 19 files changed, 90 insertions(+), 94 deletions(-) rename java/fory-core/src/main/java/org/apache/fory/serializer/collection/{AbstractCollectionSerializer.java => CollectionLikeSerializer.java} (98%) rename java/fory-core/src/main/java/org/apache/fory/serializer/collection/{AbstractMapSerializer.java => MapLikeSerializer.java} (99%) diff --git a/docs/guide/java_serialization_guide.md b/docs/guide/java_serialization_guide.md index 4498022778..a4f7da2b8f 100644 --- a/docs/guide/java_serialization_guide.md +++ b/docs/guide/java_serialization_guide.md @@ -308,8 +308,8 @@ fory.registerSerializer(Foo.class, new FooSerializer(fory)); ### Implement Collection Serializer -Similar to maps, when implementing a serializer for a custom Collection type, you must extend `CollectionSerializer` or `AbstractCollectionSerializer`. -The key difference between these two is that `AbstractCollectionSerializer` can serialize a class which has a collection-like structure but is not a java Collection subtype. +Similar to maps, when implementing a serializer for a custom Collection type, you must extend `CollectionSerializer` or `CollectionLikeSerializer`. +The key difference between these two is that `CollectionLikeSerializer` can serialize a class which has a collection-like structure but is not a java Collection subtype. For collection serializer, this is a special parameter `supportCodegenHook` needs be configured: @@ -433,7 +433,7 @@ class IntList extends AbstractCollection { } } -class IntListSerializer extends AbstractCollectionSerializer { +class IntListSerializer extends CollectionLikeSerializer { public IntListSerializer(Fory fory) { // Disable JIT since we're handling serialization directly super(fory, IntList.class, false); @@ -547,7 +547,7 @@ Sometimes you may want to implement a serializer for a type that behaves like a The key principles for collection-like type serialization are: -1. Extend `AbstractCollectionSerializer` for custom collection-like types +1. Extend `CollectionLikeSerializer` for custom collection-like types 2. Enable JIT optimization with `supportCodegenHook` 3. Provide efficient element access through views 4. Maintain proper size tracking @@ -643,7 +643,7 @@ class CollectionView extends AbstractCollection { } } -class CustomCollectionSerializer extends AbstractCollectionSerializer { +class CustomCollectionSerializer extends CollectionLikeSerializer { public CustomCollectionSerializer(Fory fory) { super(fory, CustomCollectionLike.class, true); } @@ -699,7 +699,7 @@ Note that this implementation provides better performance at the cost of flexibi ### Implement Map Serializer -When implementing a serializer for a custom Map type, you must extend `MapSerializer` or `AbstractMapSerializer`. The key difference between these two is that `AbstractMapSerializer` can serialize a class which has a map-like structure but is not a java Map subtype. +When implementing a serializer for a custom Map type, you must extend `MapSerializer` or `MapLikeSerializer`. The key difference between these two is that `MapLikeSerializer` can serialize a class which has a map-like structure but is not a java Map subtype. Similar to collection serializer, this is a special parameter `supportCodegenHook` needs be configured: @@ -797,7 +797,7 @@ class FixedValueMap extends AbstractMap { } } -class FixedValueMapSerializer extends AbstractMapSerializer { +class FixedValueMapSerializer extends MapLikeSerializer { public FixedValueMapSerializer(Fory fory) { // Disable codegen since we're handling serialization directly super(fory, FixedValueMap.class, false); @@ -900,7 +900,7 @@ Sometimes you may want to implement a serializer for a type that behaves like a The key principles for map-like type serialization are: -1. Extend `AbstractMapSerializer` for custom collection-like types +1. Extend `MapLikeSerializer` for custom collection-like types 2. Enable JIT optimization with `supportCodegenHook` 3. Provide efficient element access through views 4. Maintain proper size tracking @@ -1028,7 +1028,7 @@ class MapView extends AbstractMap { } } -class CustomMapLikeSerializer extends AbstractMapSerializer { +class CustomMapLikeSerializer extends MapLikeSerializer { public CustomMapLikeSerializer(Fory fory) { super(fory, CustomMapLike.class, true); } @@ -1082,7 +1082,7 @@ fory.registerSerializer(CustomCollection.class, new CustomCollectionSerializer<> Note that when implementing custom map or collection serializers: -1. Always extend the appropriate base class (`MapSerializer`/`AbstractMapSerializer` for maps, `CollectionSerializer`/`AbstractCollectionSerializer` for collections) +1. Always extend the appropriate base class (`MapSerializer`/`MapLikeSerializer` for maps, `CollectionSerializer`/`CollectionLikeSerializer` for collections) 2. Consider the impact of `supportCodegenHook` on performance and functionality 3. Properly handle reference tracking if needed 4. Implement proper size management using `setNumElements` and `getAndClearNumElements` when `supportCodegenHook` is `true` diff --git a/docs/specification/java_serialization_spec.md b/docs/specification/java_serialization_spec.md index 2aa12b5f25..51fa304801 100644 --- a/docs/specification/java_serialization_spec.md +++ b/docs/specification/java_serialization_spec.md @@ -369,7 +369,7 @@ Which encoding to choose: ### Collection -> All collection serializers must extend `AbstractCollectionSerializer`. +> All collection serializers must extend `CollectionLikeSerializer`. Format: @@ -425,7 +425,7 @@ type. ### Map -> All Map serializers must extend `AbstractMapSerializer`. +> All Map serializers must extend `MapLikeSerializer`. Format: diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index 66ced93199..2fcbab1db7 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -661,7 +661,7 @@ else: fory.write_value(buffer, elem) ``` -[`CollectionSerializer#writeElements`](https://github.com/apache/fory/blob/20a1a78b17a75a123a6f5b7094c06ff77defc0fe/java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractCollectionSerializer.java#L302) +[`CollectionSerializer#writeElements`](https://github.com/apache/fory/blob/20a1a78b17a75a123a6f5b7094c06ff77defc0fe/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionLikeSerializer.java#L302) can be taken as an example. ### array diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java index 0d965a5ff4..859dd594f5 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java @@ -46,11 +46,11 @@ import static org.apache.fory.codegen.ExpressionUtils.uninline; import static org.apache.fory.collection.Collections.ofHashSet; import static org.apache.fory.serializer.CodegenSerializer.LazyInitBeanSerializer; -import static org.apache.fory.serializer.collection.AbstractMapSerializer.MAX_CHUNK_SIZE; import static org.apache.fory.serializer.collection.MapFlags.KEY_DECL_TYPE; import static org.apache.fory.serializer.collection.MapFlags.TRACKING_KEY_REF; import static org.apache.fory.serializer.collection.MapFlags.TRACKING_VALUE_REF; import static org.apache.fory.serializer.collection.MapFlags.VALUE_DECL_TYPE; +import static org.apache.fory.serializer.collection.MapLikeSerializer.MAX_CHUNK_SIZE; import static org.apache.fory.type.TypeUtils.CLASS_TYPE; import static org.apache.fory.type.TypeUtils.COLLECTION_TYPE; import static org.apache.fory.type.TypeUtils.ITERATOR_TYPE; @@ -115,9 +115,9 @@ import org.apache.fory.serializer.PrimitiveSerializers.LongSerializer; import org.apache.fory.serializer.Serializer; import org.apache.fory.serializer.StringSerializer; -import org.apache.fory.serializer.collection.AbstractCollectionSerializer; -import org.apache.fory.serializer.collection.AbstractMapSerializer; import org.apache.fory.serializer.collection.CollectionFlags; +import org.apache.fory.serializer.collection.CollectionLikeSerializer; +import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.GraalvmSupport; @@ -140,8 +140,8 @@ public abstract class BaseObjectCodecBuilder extends CodecBuilder { private static final TypeRef STRING_SERIALIZER_TYPE_TOKEN = TypeRef.of(StringSerializer.class); private static final TypeRef SERIALIZER_TYPE = TypeRef.of(Serializer.class); private static final TypeRef COLLECTION_SERIALIZER_TYPE = - TypeRef.of(AbstractCollectionSerializer.class); - private static final TypeRef MAP_SERIALIZER_TYPE = TypeRef.of(AbstractMapSerializer.class); + TypeRef.of(CollectionLikeSerializer.class); + private static final TypeRef MAP_SERIALIZER_TYPE = TypeRef.of(MapLikeSerializer.class); private static final TypeRef GENERIC_TYPE = TypeRef.of(GenericType.class); protected final Reference refResolverRef; @@ -330,7 +330,7 @@ protected void addCommonImports() { ctx.addImports(LazyInitBeanSerializer.class, EnumSerializer.class); ctx.addImports(Serializer.class, StringSerializer.class); ctx.addImports(ObjectSerializer.class, CompatibleSerializer.class); - ctx.addImports(AbstractCollectionSerializer.class, AbstractMapSerializer.class); + ctx.addImports(CollectionLikeSerializer.class, MapLikeSerializer.class); } /** @@ -618,11 +618,11 @@ protected Expression getOrCreateSerializer(Class cls) { } } if (useCollectionSerialization(cls) - && !AbstractCollectionSerializer.class.isAssignableFrom(serializerClass)) { - serializerClass = AbstractCollectionSerializer.class; + && !CollectionLikeSerializer.class.isAssignableFrom(serializerClass)) { + serializerClass = CollectionLikeSerializer.class; } else if (useMapSerialization(cls) - && !AbstractMapSerializer.class.isAssignableFrom(serializerClass)) { - serializerClass = AbstractMapSerializer.class; + && !MapLikeSerializer.class.isAssignableFrom(serializerClass)) { + serializerClass = MapLikeSerializer.class; } TypeRef serializerTypeRef = TypeRef.of(serializerClass); Expression fieldTypeExpr = getClassExpr(cls); @@ -829,9 +829,8 @@ protected Expression serializeForCollection( "writeCollectionClassInfo", false); } - } else if (!TypeRef.of(AbstractCollectionSerializer.class).isSupertypeOf(serializer.type())) { - serializer = - cast(serializer, TypeRef.of(AbstractCollectionSerializer.class), "colSerializer"); + } else if (!TypeRef.of(CollectionLikeSerializer.class).isSupertypeOf(serializer.type())) { + serializer = cast(serializer, TypeRef.of(CollectionLikeSerializer.class), "colSerializer"); } TypeRef elementType = getElementType(typeRef); // write collection data. @@ -954,7 +953,7 @@ protected Expression writeCollectionData( /** * Write collection elements header: flags and maybe elements classinfo. Keep this consistent with - * `AbstractCollectionSerializer#writeElementsHeader`. + * `CollectionLikeSerializer#writeElementsHeader`. * * @return Tuple(flags, Nullable ( element serializer)) */ @@ -1140,8 +1139,8 @@ protected Expression serializeForMap( invokeGenerated( ctx, ofHashSet(buffer, map), writeClassAction, "writeMapClassInfo", false); } - } else if (!AbstractMapSerializer.class.isAssignableFrom(serializer.type().getRawType())) { - serializer = cast(serializer, TypeRef.of(AbstractMapSerializer.class), "mapSerializer"); + } else if (!MapLikeSerializer.class.isAssignableFrom(serializer.type().getRawType())) { + serializer = cast(serializer, TypeRef.of(MapLikeSerializer.class), "mapSerializer"); } Expression write = new If( @@ -1472,7 +1471,7 @@ protected Expression writeChunk( if (!inline) { expressions.add(new Return(entry)); // method too big, spilt it into a new method. - // Generate similar signature as `AbstractMapSerializer.writeJavaChunk`( + // Generate similar signature as `MapLikeSerializer.writeJavaChunk`( // MemoryBuffer buffer, // Entry entry, // Iterator> iterator, @@ -1679,8 +1678,8 @@ protected Expression deserializeForCollection( } } else { checkArgument( - AbstractCollectionSerializer.class.isAssignableFrom(serializer.type().getRawType()), - "Expected AbstractCollectionSerializer but got %s", + CollectionLikeSerializer.class.isAssignableFrom(serializer.type().getRawType()), + "Expected CollectionLikeSerializer but got %s", serializer.type()); } Invoke supportHook = inlineInvoke(serializer, "supportCodegenHook", PRIMITIVE_BOOLEAN_TYPE); @@ -1901,8 +1900,8 @@ protected Expression deserializeForMap( } } else { checkArgument( - AbstractMapSerializer.class.isAssignableFrom(serializer.type().getRawType()), - "Expected AbstractMapSerializer but got %s", + MapLikeSerializer.class.isAssignableFrom(serializer.type().getRawType()), + "Expected MapLikeSerializer but got %s", serializer.type()); } Expression mapSerializer = serializer; @@ -2103,7 +2102,7 @@ private Expression readChunk( new Return(ofInt(0))); expressions.add(returnSizeAndHeader); // method too big, spilt it into a new method. - // Generate similar signature as `AbstractMapSerializer.writeJavaChunk`( + // Generate similar signature as `MapLikeSerializer.writeJavaChunk`( // MemoryBuffer buffer, // long size, // int chunkHeader, diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/FieldResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/FieldResolver.java index aab4f570e4..b7d75dfce4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/FieldResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/FieldResolver.java @@ -53,8 +53,8 @@ import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.reflect.TypeRef; import org.apache.fory.serializer.PrimitiveSerializers; -import org.apache.fory.serializer.collection.AbstractCollectionSerializer; -import org.apache.fory.serializer.collection.AbstractMapSerializer; +import org.apache.fory.serializer.collection.CollectionLikeSerializer; +import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.type.Descriptor; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.MurmurHash3; @@ -580,8 +580,8 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType) { if (fieldType == FieldTypes.COLLECTION_ELEMENT_FINAL) { ClassInfo elementClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, classInfoHolder); - AbstractCollectionSerializer collectionSerializer = - (AbstractCollectionSerializer) classInfo.getSerializer(); + CollectionLikeSerializer collectionSerializer = + (CollectionLikeSerializer) classInfo.getSerializer(); try { collectionSerializer.setElementSerializer(elementClassInfo.getSerializer()); o = collectionSerializer.read(buffer); @@ -594,7 +594,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType) { ClassInfo keyClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo valueClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, classInfoHolder); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); @@ -608,7 +608,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType) { } else if (fieldType == FieldTypes.MAP_KEY_FINAL) { ClassInfo keyClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, classInfoHolder); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); o = mapSerializer.read(buffer); @@ -621,7 +621,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType) { Preconditions.checkArgument(fieldType == FieldTypes.MAP_VALUE_FINAL); ClassInfo valueClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, classInfoHolder); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); o = mapSerializer.read(buffer); @@ -639,8 +639,8 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType, FieldInf if (fieldType == FieldTypes.COLLECTION_ELEMENT_FINAL) { ClassInfo elementClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, fieldInfo.getClassInfoHolder()); - AbstractCollectionSerializer collectionSerializer = - (AbstractCollectionSerializer) classInfo.getSerializer(); + CollectionLikeSerializer collectionSerializer = + (CollectionLikeSerializer) classInfo.getSerializer(); try { collectionSerializer.setElementSerializer(elementClassInfo.getSerializer()); o = collectionSerializer.read(buffer); @@ -653,7 +653,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType, FieldInf ClassInfo keyClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo valueClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, fieldInfo.getClassInfoHolder()); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); @@ -667,7 +667,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType, FieldInf } else if (fieldType == FieldTypes.MAP_KEY_FINAL) { ClassInfo keyClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, fieldInfo.getClassInfoHolder()); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); o = mapSerializer.read(buffer); @@ -680,7 +680,7 @@ private Object readObjectWithFinal(MemoryBuffer buffer, byte fieldType, FieldInf Preconditions.checkArgument(fieldType == FieldTypes.MAP_VALUE_FINAL); ClassInfo valueClassInfo = classResolver.readClassInfo(buffer, classInfoHolder); ClassInfo classInfo = classResolver.readClassInfo(buffer, fieldInfo.getClassInfoHolder()); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); o = mapSerializer.read(buffer); diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java index 4cd2bfdf05..dcdf71e5b0 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java @@ -74,11 +74,11 @@ import org.apache.fory.serializer.SerializationUtils; import org.apache.fory.serializer.Serializer; import org.apache.fory.serializer.Serializers; -import org.apache.fory.serializer.collection.AbstractCollectionSerializer; -import org.apache.fory.serializer.collection.AbstractMapSerializer; +import org.apache.fory.serializer.collection.CollectionLikeSerializer; import org.apache.fory.serializer.collection.CollectionSerializer; import org.apache.fory.serializer.collection.CollectionSerializers.ArrayListSerializer; import org.apache.fory.serializer.collection.CollectionSerializers.HashSetSerializer; +import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.serializer.collection.MapSerializer; import org.apache.fory.type.GenericType; import org.apache.fory.type.Generics; @@ -441,8 +441,8 @@ private ClassInfo buildClassInfo(Class cls) { } else { ClassInfo classInfo = classResolver.getClassInfo(cls, false); if (classInfo != null && classInfo.serializer != null) { - if (classInfo.serializer instanceof AbstractMapSerializer - && ((AbstractMapSerializer) classInfo.serializer).supportCodegenHook()) { + if (classInfo.serializer instanceof MapLikeSerializer + && ((MapLikeSerializer) classInfo.serializer).supportCodegenHook()) { serializer = classInfo.serializer; } else { serializer = new MapSerializer(fory, cls); @@ -469,8 +469,8 @@ private ClassInfo buildClassInfo(Class cls) { private Serializer getCollectionSerializer(Class cls) { ClassInfo classInfo = classResolver.getClassInfo(cls, false); if (classInfo != null && classInfo.serializer != null) { - if (classInfo.serializer instanceof AbstractCollectionSerializer - && ((AbstractCollectionSerializer) (classInfo.serializer)).supportCodegenHook()) { + if (classInfo.serializer instanceof CollectionLikeSerializer + && ((CollectionLikeSerializer) (classInfo.serializer)).supportCodegenHook()) { return classInfo.serializer; } } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/CompatibleSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/CompatibleSerializer.java index a302d2b666..eb32422399 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/CompatibleSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/CompatibleSerializer.java @@ -31,8 +31,8 @@ import org.apache.fory.resolver.ClassInfo; import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.FieldResolver; -import org.apache.fory.serializer.collection.AbstractCollectionSerializer; -import org.apache.fory.serializer.collection.AbstractMapSerializer; +import org.apache.fory.serializer.collection.CollectionLikeSerializer; +import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.util.Preconditions; import org.apache.fory.util.record.RecordInfo; import org.apache.fory.util.record.RecordUtils; @@ -232,8 +232,8 @@ private void writeCollectionField( // following write is consistent with `BaseSeqCodecBuilder.serializeForCollection` ClassInfo classInfo = fieldInfo.getClassInfo(fieldValue.getClass()); classResolver.writeClassInfo(buffer, classInfo); - AbstractCollectionSerializer collectionSerializer = - (AbstractCollectionSerializer) classInfo.getSerializer(); + CollectionLikeSerializer collectionSerializer = + (CollectionLikeSerializer) classInfo.getSerializer(); try { collectionSerializer.setElementSerializer(elementClassInfo.getSerializer()); collectionSerializer.write(buffer, fieldValue); @@ -253,7 +253,7 @@ private void writeMapKVFinal( // following write is consistent with `BaseSeqCodecBuilder.serializeForMap` ClassInfo classInfo = fieldInfo.getClassInfo(fieldValue.getClass()); classResolver.writeClassInfo(buffer, classInfo); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); @@ -273,7 +273,7 @@ private void writeMapKeyFinal( // following write is consistent with `BaseSeqCodecBuilder.serializeForMap` ClassInfo classInfo = fieldInfo.getClassInfo(fieldValue.getClass()); classResolver.writeClassInfo(buffer, classInfo); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setKeySerializer(keyClassInfo.getSerializer()); mapSerializer.write(buffer, fieldValue); @@ -291,7 +291,7 @@ private void writeMapValueFinal( // following write is consistent with `BaseSeqCodecBuilder.serializeForMap` ClassInfo classInfo = fieldInfo.getClassInfo(fieldValue.getClass()); classResolver.writeClassInfo(buffer, classInfo); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); try { mapSerializer.setValueSerializer(valueClassInfo.getSerializer()); mapSerializer.write(buffer, fieldValue); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractCollectionSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionLikeSerializer.java similarity index 98% rename from java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractCollectionSerializer.java rename to java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionLikeSerializer.java index 561f5ac985..29568d4d14 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractCollectionSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionLikeSerializer.java @@ -39,7 +39,7 @@ * Serializer for all collection like object. All collection serializer should extend this class. */ @SuppressWarnings({"unchecked", "rawtypes"}) -public abstract class AbstractCollectionSerializer extends Serializer { +public abstract class CollectionLikeSerializer extends Serializer { private MethodHandle constructor; private int numElements; protected final boolean supportCodegenHook; @@ -58,11 +58,11 @@ public abstract class AbstractCollectionSerializer extends Serializer { // interpreter and jit mode although it seems unnecessary. // With elements header, we can write this element class only once, the cost won't be too much. - public AbstractCollectionSerializer(Fory fory, Class cls) { + public CollectionLikeSerializer(Fory fory, Class cls) { this(fory, cls, !ReflectionUtils.isDynamicGeneratedCLass(cls)); } - public AbstractCollectionSerializer(Fory fory, Class cls, boolean supportCodegenHook) { + public CollectionLikeSerializer(Fory fory, Class cls, boolean supportCodegenHook) { super(fory, cls); this.supportCodegenHook = supportCodegenHook; elementClassInfoHolder = fory.getClassResolver().nilClassInfoHolder(); @@ -70,7 +70,7 @@ public AbstractCollectionSerializer(Fory fory, Class cls, boolean supportCode binding = SerializationBinding.createBinding(fory); } - public AbstractCollectionSerializer( + public CollectionLikeSerializer( Fory fory, Class cls, boolean supportCodegenHook, boolean immutable) { super(fory, cls, immutable); this.supportCodegenHook = supportCodegenHook; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializer.java index 97496aa0a3..5175775067 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializer.java @@ -26,7 +26,7 @@ /** Base serializer for all java collections. */ @SuppressWarnings({"unchecked", "rawtypes"}) -public class CollectionSerializer extends AbstractCollectionSerializer { +public class CollectionSerializer extends CollectionLikeSerializer { public CollectionSerializer(Fory fory, Class type) { super(fory, type); } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializers.java index c77d01b569..fa0b2ce7c0 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/CollectionSerializers.java @@ -457,8 +457,7 @@ public SetFromMapSerializer(Fory fory, Class> type) { @Override public Collection newCollection(MemoryBuffer buffer) { final ClassInfo mapClassInfo = fory.getClassResolver().readClassInfo(buffer); - final AbstractMapSerializer mapSerializer = - (AbstractMapSerializer) mapClassInfo.getSerializer(); + final MapLikeSerializer mapSerializer = (MapLikeSerializer) mapClassInfo.getSerializer(); RefResolver refResolver = fory.getRefResolver(); // It's possible that elements or nested fields has circular ref to set. int refId = refResolver.lastPreservedRefId(); @@ -487,8 +486,8 @@ public Collection newCollection(Collection originCollection) { assert !fory.isCrossLanguage(); Map map = (Map) Platform.getObject(originCollection, MAP_FIELD_OFFSET); - AbstractMapSerializer mapSerializer = - (AbstractMapSerializer) fory.getClassResolver().getSerializer(map.getClass()); + MapLikeSerializer mapSerializer = + (MapLikeSerializer) fory.getClassResolver().getSerializer(map.getClass()); Map newMap = mapSerializer.newMap(map); return Collections.newSetFromMap(newMap); } @@ -497,7 +496,7 @@ public Collection newCollection(Collection originCollection) { public Collection onCollectionWrite(MemoryBuffer buffer, Set value) { final Map map = (Map) Platform.getObject(value, MAP_FIELD_OFFSET); final ClassInfo classInfo = fory.getClassResolver().getClassInfo(map.getClass()); - AbstractMapSerializer mapSerializer = (AbstractMapSerializer) classInfo.getSerializer(); + MapLikeSerializer mapSerializer = (MapLikeSerializer) classInfo.getSerializer(); fory.getClassResolver().writeClassInfo(buffer, classInfo); if (mapSerializer.supportCodegenHook) { buffer.writeBoolean(true); @@ -686,8 +685,7 @@ public PriorityQueue newCollection(MemoryBuffer buffer) { * serializer won't use element generics and doesn't support JIT, performance won't be the best, * but the correctness can be ensured. */ - public static final class DefaultJavaCollectionSerializer - extends AbstractCollectionSerializer { + public static final class DefaultJavaCollectionSerializer extends CollectionLikeSerializer { private Serializer dataSerializer; public DefaultJavaCollectionSerializer(Fory fory, Class cls) { @@ -734,7 +732,7 @@ public T read(MemoryBuffer buffer) { /** Collection serializer for class with JDK custom serialization methods defined. */ public static final class JDKCompatibleCollectionSerializer - extends AbstractCollectionSerializer { + extends CollectionLikeSerializer { private final Serializer serializer; public JDKCompatibleCollectionSerializer(Fory fory, Class cls) { diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractMapSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapLikeSerializer.java similarity index 99% rename from java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractMapSerializer.java rename to java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapLikeSerializer.java index c3866bf88b..c7aa2c59d4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/AbstractMapSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapLikeSerializer.java @@ -55,7 +55,7 @@ /** Serializer for all map-like objects. */ @SuppressWarnings({"unchecked", "rawtypes"}) -public abstract class AbstractMapSerializer extends Serializer { +public abstract class MapLikeSerializer extends Serializer { public static final int MAX_CHUNK_SIZE = 255; protected MethodHandle constructor; @@ -83,16 +83,15 @@ public abstract class AbstractMapSerializer extends Serializer { private final TypeResolver typeResolver; protected final SerializationBinding binding; - public AbstractMapSerializer(Fory fory, Class cls) { + public MapLikeSerializer(Fory fory, Class cls) { this(fory, cls, !ReflectionUtils.isDynamicGeneratedCLass(cls)); } - public AbstractMapSerializer(Fory fory, Class cls, boolean supportCodegenHook) { + public MapLikeSerializer(Fory fory, Class cls, boolean supportCodegenHook) { this(fory, cls, supportCodegenHook, false); } - public AbstractMapSerializer( - Fory fory, Class cls, boolean supportCodegenHook, boolean immutable) { + public MapLikeSerializer(Fory fory, Class cls, boolean supportCodegenHook, boolean immutable) { super(fory, cls, immutable); this.typeResolver = fory.isCrossLanguage() ? fory.getXtypeResolver() : fory.getClassResolver(); this.supportCodegenHook = supportCodegenHook; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializer.java index ab3010ff4f..8a648f5d8a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializer.java @@ -25,7 +25,7 @@ /** Base serializer for all java maps. */ @SuppressWarnings({"unchecked", "rawtypes"}) -public class MapSerializer extends AbstractMapSerializer { +public class MapSerializer extends MapLikeSerializer { public MapSerializer(Fory fory, Class cls) { super(fory, cls); } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializers.java index 520be2d8cc..1b71128138 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/collection/MapSerializers.java @@ -377,7 +377,7 @@ protected void copyEntry(Map originMap, Map newMap) { * won't use element generics and doesn't support JIT, performance won't be the best, but the * correctness can be ensured. */ - public static final class DefaultJavaMapSerializer extends AbstractMapSerializer { + public static final class DefaultJavaMapSerializer extends MapLikeSerializer { private Serializer dataSerializer; public DefaultJavaMapSerializer(Fory fory, Class cls) { @@ -427,7 +427,7 @@ public T read(MemoryBuffer buffer) { } /** Map serializer for class with JDK custom serialization methods defined. */ - public static class JDKCompatibleMapSerializer extends AbstractMapSerializer { + public static class JDKCompatibleMapSerializer extends MapLikeSerializer { private final Serializer serializer; public JDKCompatibleMapSerializer(Fory fory, Class cls) { diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonCollectionSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonCollectionSerializer.java index 7b4c5a612d..548ab6521e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonCollectionSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonCollectionSerializer.java @@ -24,7 +24,7 @@ import org.apache.fory.Fory; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.Platform; -import org.apache.fory.serializer.collection.AbstractCollectionSerializer; +import org.apache.fory.serializer.collection.CollectionLikeSerializer; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -34,7 +34,7 @@ * an object. */ @SuppressWarnings("rawtypes") -public class SingletonCollectionSerializer extends AbstractCollectionSerializer { +public class SingletonCollectionSerializer extends CollectionLikeSerializer { private final Field field; private Object base = null; private long offset = -1; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonMapSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonMapSerializer.java index 35a3803d22..07b954f959 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonMapSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/scala/SingletonMapSerializer.java @@ -24,7 +24,7 @@ import org.apache.fory.Fory; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.Platform; -import org.apache.fory.serializer.collection.AbstractMapSerializer; +import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -34,7 +34,7 @@ * object. */ @SuppressWarnings("rawtypes") -public class SingletonMapSerializer extends AbstractMapSerializer { +public class SingletonMapSerializer extends MapLikeSerializer { private final Field field; private Object base = null; private long offset = -1; diff --git a/kotlin/src/main/kotlin/org/apache/fory/serializer/kotlin/CollectionSerializer.kt b/kotlin/src/main/kotlin/org/apache/fory/serializer/kotlin/CollectionSerializer.kt index 328273785d..6eb8b91753 100644 --- a/kotlin/src/main/kotlin/org/apache/fory/serializer/kotlin/CollectionSerializer.kt +++ b/kotlin/src/main/kotlin/org/apache/fory/serializer/kotlin/CollectionSerializer.kt @@ -21,14 +21,14 @@ package org.apache.fory.serializer.kotlin import org.apache.fory.Fory import org.apache.fory.memory.MemoryBuffer -import org.apache.fory.serializer.collection.AbstractCollectionSerializer +import org.apache.fory.serializer.collection.CollectionLikeSerializer /** Serializer for kotlin collections. */ @Suppress("UNCHECKED_CAST") public abstract class AbstractKotlinCollectionSerializer>( fory: Fory, cls: Class -) : AbstractCollectionSerializer(fory, cls) { +) : CollectionLikeSerializer(fory, cls) { abstract override fun onCollectionWrite(buffer: MemoryBuffer, value: T): Collection override fun read(buffer: MemoryBuffer): T { diff --git a/scala/src/main/scala/org/apache/fory/serializer/scala/CollectionSerializer.scala b/scala/src/main/scala/org/apache/fory/serializer/scala/CollectionSerializer.scala index cbae7546de..b9274a1ec3 100644 --- a/scala/src/main/scala/org/apache/fory/serializer/scala/CollectionSerializer.scala +++ b/scala/src/main/scala/org/apache/fory/serializer/scala/CollectionSerializer.scala @@ -21,7 +21,7 @@ package org.apache.fory.serializer.scala import org.apache.fory.Fory import org.apache.fory.memory.MemoryBuffer -import org.apache.fory.serializer.collection.AbstractCollectionSerializer +import org.apache.fory.serializer.collection.CollectionLikeSerializer import java.util import scala.collection.{Factory, Iterable, mutable} @@ -44,7 +44,7 @@ import scala.collection.{Factory, Iterable, mutable} *
  • `onCollectionRead`: create scala collection from builder.
  • */ abstract class AbstractScalaCollectionSerializer[A, T <: Iterable[A]](fory: Fory, cls: Class[T]) - extends AbstractCollectionSerializer[T](fory, cls) { + extends CollectionLikeSerializer[T](fory, cls) { override def onCollectionWrite(buffer: MemoryBuffer, value: T): util.Collection[_] override def read(buffer: MemoryBuffer): T = { diff --git a/scala/src/main/scala/org/apache/fory/serializer/scala/MapSerializer.scala b/scala/src/main/scala/org/apache/fory/serializer/scala/MapSerializer.scala index b8f5778258..b528ade55c 100644 --- a/scala/src/main/scala/org/apache/fory/serializer/scala/MapSerializer.scala +++ b/scala/src/main/scala/org/apache/fory/serializer/scala/MapSerializer.scala @@ -22,7 +22,7 @@ package org.apache.fory.serializer.scala import org.apache.fory.Fory import org.apache.fory.collection.MapEntry import org.apache.fory.memory.MemoryBuffer -import org.apache.fory.serializer.collection.AbstractMapSerializer +import org.apache.fory.serializer.collection.MapLikeSerializer import java.util import scala.collection.{Factory, mutable} @@ -45,7 +45,7 @@ import scala.collection.{Factory, mutable} *
  • `onMapRead`: create scala map from builder.
  • */ abstract class AbstractScalaMapSerializer[K, V, T](fory: Fory, cls: Class[T]) - extends AbstractMapSerializer[T](fory, cls) { + extends MapLikeSerializer[T](fory, cls) { def onMapWrite(buffer: MemoryBuffer, value: T): util.Map[_, _] override def read(buffer: MemoryBuffer): T = { diff --git a/scala/src/main/scala/org/apache/fory/serializer/scala/RangeSerializer.scala b/scala/src/main/scala/org/apache/fory/serializer/scala/RangeSerializer.scala index 5425a518a2..32b6722777 100644 --- a/scala/src/main/scala/org/apache/fory/serializer/scala/RangeSerializer.scala +++ b/scala/src/main/scala/org/apache/fory/serializer/scala/RangeSerializer.scala @@ -23,7 +23,7 @@ import org.apache.fory.Fory import org.apache.fory.memory.MemoryBuffer import org.apache.fory.reflect.FieldAccessor import org.apache.fory.serializer.Serializer -import org.apache.fory.serializer.collection.AbstractCollectionSerializer +import org.apache.fory.serializer.collection.CollectionLikeSerializer import org.apache.fory.util.unsafe._JDKAccess import java.lang.invoke.{MethodHandle, MethodHandles} @@ -31,7 +31,7 @@ import java.util import scala.collection.immutable.NumericRange class RangeSerializer[T <: Range](fory: Fory, cls: Class[T]) - extends AbstractCollectionSerializer[T](fory, cls, false) { + extends CollectionLikeSerializer[T](fory, cls, false) { override def write(buffer: MemoryBuffer, value: T): Unit = { buffer.writeVarInt32(value.start) @@ -66,7 +66,7 @@ private object RangeUtils { class NumericRangeSerializer[A, T <: NumericRange[A]](fory: Fory, cls: Class[T]) - extends AbstractCollectionSerializer[T](fory, cls, false) { + extends CollectionLikeSerializer[T](fory, cls, false) { private val ctr = RangeUtils.lookupCache.get(cls) private val getter = FieldAccessor.createAccessor(cls.getDeclaredFields.find(f => f.getType == classOf[Integral[?]]).get) From 461d5f8fe4d5006d0155e2a424dc6c616c4887db Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Thu, 14 Aug 2025 17:13:21 +0800 Subject: [PATCH 04/22] fix(python): fix gh action pypi publish (#2468) ## What does this PR do? ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .github/workflows/release.yaml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7061a3436b..25cbba7688 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -22,6 +22,9 @@ on: tags: - "v*" +permissions: + contents: read + jobs: build-wheels: name: Build Wheels @@ -98,6 +101,9 @@ jobs: name: Publish Wheels runs-on: ubuntu-latest needs: build-wheels + permissions: + contents: read + id-token: write steps: - name: Download Wheel Artifacts uses: actions/download-artifact@v4 @@ -106,9 +112,16 @@ jobs: merge-multiple: true - name: Display structure of downloaded files run: ls -R downloaded_wheels - - name: Publish Wheels to PyPI + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ startsWith(github.ref, 'refs/tags/') && contains(github.ref, '-') }} + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + packages-dir: downloaded_wheels + - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: downloaded_wheels/ + skip-existing: true + packages-dir: downloaded_wheels From 373e712b35503d743683fd2911a3d83cb184e0f0 Mon Sep 17 00:00:00 2001 From: adri Date: Thu, 14 Aug 2025 12:57:35 +0200 Subject: [PATCH 05/22] feat(memory): add customizable MemoryAllocator interface (#2467) ## What does this PR do? This PR introduces a new `MemoryAllocator` interface that allows customisation of memory allocation strategies in `MemoryBuffer`. This enables users to implement custom allocation policies. ## Related issues - Closes https://github.com/apache/fory/issues/2459 - https://github.com/apache/fory/pull/2457 - Closes https://github.com/apache/fory/issues/2350. ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? --------- Co-authored-by: Shawn Yang --- docs/guide/java_serialization_guide.md | 78 +++++++++ .../apache/fory/memory/MemoryAllocator.java | 41 +++++ .../org/apache/fory/memory/MemoryBuffer.java | 72 +++++++-- .../fory/memory/MemoryAllocatorTest.java | 153 ++++++++++++++++++ 4 files changed, 331 insertions(+), 13 deletions(-) create mode 100644 java/fory-core/src/main/java/org/apache/fory/memory/MemoryAllocator.java create mode 100644 java/fory-core/src/test/java/org/apache/fory/memory/MemoryAllocatorTest.java diff --git a/docs/guide/java_serialization_guide.md b/docs/guide/java_serialization_guide.md index a4f7da2b8f..7f1f7a2aff 100644 --- a/docs/guide/java_serialization_guide.md +++ b/docs/guide/java_serialization_guide.md @@ -1089,6 +1089,84 @@ Note that when implementing custom map or collection serializers: Besides registering serializes, one can also implement `java.io.Externalizable` for a class to customize serialization logic, such type will be serialized by fory `ExternalizableSerializer`. +### Memory Allocation Customization + +Fory provides a `MemoryAllocator` interface that allows you to customize how memory buffers are allocated and grown during serialization operations. This can be useful for performance optimization, memory pooling, or debugging memory usage. + +#### MemoryAllocator Interface + +The `MemoryAllocator` interface defines two key methods: + +```java +public interface MemoryAllocator { + /** + * Allocates a new MemoryBuffer with the specified initial capacity. + */ + MemoryBuffer allocate(int initialCapacity); + + /** + * Grows an existing buffer to accommodate the new capacity. + * The implementation must grow the buffer in-place by modifying + * the existing buffer instance. + */ + MemoryBuffer grow(MemoryBuffer buffer, int newCapacity); +} +``` + +#### Using Custom Memory Allocators + +You can set a global memory allocator that will be used by all `MemoryBuffer` instances: + +```java +// Create a custom allocator +MemoryAllocator customAllocator = new MemoryAllocator() { + @Override + public MemoryBuffer allocate(int initialCapacity) { + // Add extra capacity for debugging or pooling + return MemoryBuffer.fromByteArray(new byte[initialCapacity + 100]); + } + + @Override + public MemoryBuffer grow(MemoryBuffer buffer, int newCapacity) { + if (newCapacity <= buffer.size()) { + return buffer; + } + + // Custom growth strategy - add 100% extra capacity + int newSize = (int) (newCapacity * 2); + byte[] data = new byte[newSize]; + buffer.copyToUnsafe(0, data, Platform.BYTE_ARRAY_OFFSET, buffer.size()); + buffer.initHeapBuffer(data, 0, data.length); + return buffer; + } +}; + +// Set the custom allocator globally +MemoryBuffer.setGlobalAllocator(customAllocator); + +// All subsequent MemoryBuffer allocations will use your custom allocator +Fory fory = Fory.builder().withLanguage(Language.JAVA).build(); +byte[] bytes = fory.serialize(someObject); // Uses custom allocator +``` + +#### Default Memory Allocator Behavior + +The default allocator uses the following growth strategy: + +- For buffers smaller than `BUFFER_GROW_STEP_THRESHOLD` (100MB): multiply capacity by 2 +- For larger buffers: multiply capacity by 1.5 (capped at `Integer.MAX_VALUE - 8`) + +This provides a balance between avoiding frequent reallocations and preventing excessive memory usage. + +#### Use Cases + +Custom memory allocators are useful for: + +- **Memory Pooling**: Reuse allocated buffers to reduce GC pressure +- **Performance Tuning**: Use different growth strategies based on your workload +- **Debugging**: Add logging or tracking to monitor memory usage +- **Off-heap Memory**: Integrate with off-heap memory management systems + ### Security & Class Registration `ForyBuilder#requireClassRegistration` can be used to disable class registration, this will allow to deserialize objects diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryAllocator.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryAllocator.java new file mode 100644 index 0000000000..ebbc6098c3 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryAllocator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.memory; + +/** Interface for customizing memory allocation strategies in MemoryBuffer. */ +public interface MemoryAllocator { + /** + * Allocates a new MemoryBuffer with the specified initial capacity. + * + * @param initialCapacity the initial capacity for the buffer + * @return a new MemoryBuffer instance + */ + MemoryBuffer allocate(int initialCapacity); + + /** + * Grows an existing buffer to accommodate the new capacity. The implementation must grow the + * buffer in-place by modifying the existing buffer instance. + * + * @param buffer the existing buffer to grow + * @param newCapacity the required new capacity + * @return the same MemoryBuffer instance with at least the new capacity + */ + MemoryBuffer grow(MemoryBuffer buffer, int newCapacity); +} diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java index fe7704ca9a..97f1fa0d4a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java @@ -64,6 +64,9 @@ public final class MemoryBuffer { private static final Unsafe UNSAFE = Platform.UNSAFE; private static final boolean LITTLE_ENDIAN = (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN); + // Global allocator instance that can be customized + private static volatile MemoryAllocator globalAllocator = new DefaultMemoryAllocator(); + // If the data in on the heap, `heapMemory` will be non-null, and its' the object relative to // which we access the memory. // If we have this buffer, we must never void this reference, or the memory buffer will point @@ -1233,27 +1236,17 @@ public void writePrimitiveArray(Object arr, int offset, int numBytes) { public void grow(int neededSize) { int length = writerIndex + neededSize; if (length > size) { - growBuffer(length); + globalAllocator.grow(this, length); } } /** For off-heap buffer, this will make a heap buffer internally. */ public void ensure(int length) { if (length > size) { - growBuffer(length); + globalAllocator.grow(this, length); } } - private void growBuffer(int length) { - int newSize = - length < BUFFER_GROW_STEP_THRESHOLD - ? length << 2 - : (int) Math.min(length * 1.5d, Integer.MAX_VALUE - 8); - byte[] data = new byte[newSize]; - copyToUnsafe(0, data, Platform.BYTE_ARRAY_OFFSET, size()); - initHeapBuffer(data, 0, data.length); - } - // ------------------------------------------------------------------------- // Read Methods // ------------------------------------------------------------------------- @@ -2607,6 +2600,59 @@ public String toString() { + '}'; } + // ------------------------------------------------------------------------ + // Memory Allocator Support + // ------------------------------------------------------------------------ + + /** Default memory allocator that uses the original heap-based allocation strategy. */ + private static final class DefaultMemoryAllocator implements MemoryAllocator { + @Override + public MemoryBuffer allocate(int initialSize) { + return fromByteArray(new byte[initialSize]); + } + + @Override + public MemoryBuffer grow(MemoryBuffer buffer, int newCapacity) { + if (newCapacity <= buffer.size()) { + return buffer; + } + + int newSize = + newCapacity < BUFFER_GROW_STEP_THRESHOLD + ? newCapacity << 1 + : (int) Math.min(newCapacity * 1.5d, Integer.MAX_VALUE - 8); + + byte[] data = new byte[newSize]; + buffer.copyToUnsafe(0, data, Platform.BYTE_ARRAY_OFFSET, buffer.size()); + buffer.initHeapBuffer(data, 0, data.length); + + return buffer; + } + } + + /** + * Sets the global memory allocator. This affects all new MemoryBuffer allocations and growth + * operations. + * + * @param allocator the new global allocator to use + * @throws NullPointerException if allocator is null + */ + public static void setGlobalAllocator(MemoryAllocator allocator) { + if (allocator == null) { + throw new NullPointerException("Memory allocator cannot be null"); + } + globalAllocator = allocator; + } + + /** + * Gets the current global memory allocator. + * + * @return the current global allocator + */ + public static MemoryAllocator getGlobalAllocator() { + return globalAllocator; + } + /** Point this buffer to a new byte array. */ public void pointTo(byte[] buffer, int offset, int length) { initHeapBuffer(buffer, offset, length); @@ -2663,6 +2709,6 @@ public static MemoryBuffer fromNativeAddress(long address, int size) { * enough. */ public static MemoryBuffer newHeapBuffer(int initialSize) { - return fromByteArray(new byte[initialSize]); + return globalAllocator.allocate(initialSize); } } diff --git a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryAllocatorTest.java b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryAllocatorTest.java new file mode 100644 index 0000000000..68c3e613de --- /dev/null +++ b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryAllocatorTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.memory; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class MemoryAllocatorTest { + + private MemoryAllocator originalAllocator; + + @BeforeMethod + public void setUp() { + // Save the original allocator before each test + originalAllocator = MemoryBuffer.getGlobalAllocator(); + } + + @AfterMethod + public void tearDown() { + // Restore the original allocator after each test + MemoryBuffer.setGlobalAllocator(originalAllocator); + } + + @Test + public void testDefaultMemoryAllocator() { + MemoryAllocator defaultAllocator = MemoryBuffer.getGlobalAllocator(); + + MemoryBuffer buffer = defaultAllocator.allocate(100); + assertEquals(buffer.size(), 100); + assertFalse(buffer.isOffHeap()); + + // Test growth below BUFFER_GROW_STEP_THRESHOLD (should multiply by 2) + defaultAllocator.grow(buffer, 200); + assertEquals(buffer.size(), 200 << 1); + + // Test growth above BUFFER_GROW_STEP_THRESHOLD + buffer = defaultAllocator.allocate(100); + int largeCapacity = MemoryBuffer.BUFFER_GROW_STEP_THRESHOLD + 1000; + defaultAllocator.grow(buffer, largeCapacity); + int expectedSize = (int) Math.min(largeCapacity * 1.5d, Integer.MAX_VALUE - 8); + assertEquals(buffer.size(), expectedSize); + } + + @Test + public void testDefaultMemoryAllocatorDataPreservation() { + MemoryAllocator defaultAllocator = MemoryBuffer.getGlobalAllocator(); + MemoryBuffer buffer = defaultAllocator.allocate(100); + + // Write some test data + byte[] testData = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + buffer.writeBytes(testData); + buffer.writeInt32(42); + buffer.writeInt64(123456789L); + + int writerIndexBeforeGrowth = buffer.writerIndex(); + + // Grow the buffer + defaultAllocator.grow(buffer, 500); + + // Verify data is preserved + buffer.readerIndex(0); + byte[] readData = new byte[testData.length]; + buffer.readBytes(readData); + for (int i = 0; i < testData.length; i++) { + assertEquals(readData[i], testData[i]); + } + + assertEquals(buffer.readInt32(), 42); + assertEquals(buffer.readInt64(), 123456789L); + assertEquals(buffer.writerIndex(), writerIndexBeforeGrowth); + } + + @Test + public void testDefaultMemoryAllocatorGrowthSameInstance() { + MemoryAllocator defaultAllocator = MemoryBuffer.getGlobalAllocator(); + MemoryBuffer buffer = defaultAllocator.allocate(100); + + // Growth should return the same instance + MemoryBuffer grownBuffer = defaultAllocator.grow(buffer, 200); + assertSame(buffer, grownBuffer); + } + + @Test + public void testCustomAllocator() { + // Create a custom allocator that adds a marker + MemoryAllocator customAllocator = + new MemoryAllocator() { + @Override + public MemoryBuffer allocate(int initialCapacity) { + // Use larger capacity as a marker + return MemoryBuffer.fromByteArray(new byte[initialCapacity + 10]); + } + + @Override + public MemoryBuffer grow(MemoryBuffer buffer, int newCapacity) { + if (newCapacity <= buffer.size()) { + return buffer; + } + + // Use default grow logic but with custom marker + int newSize = newCapacity + 10; // Add 10 as marker + byte[] data = new byte[newSize]; + buffer.copyToUnsafe(0, data, Platform.BYTE_ARRAY_OFFSET, buffer.size()); + buffer.initHeapBuffer(data, 0, data.length); + return buffer; + } + }; + + // Set the custom allocator + MemoryBuffer.setGlobalAllocator(customAllocator); + assertSame(MemoryBuffer.getGlobalAllocator(), customAllocator); + + // Test allocation + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(100); + assertEquals(buffer.size(), 110); // 100 + 10 marker + + // Test growth + buffer.writerIndex(50); + buffer.readerIndex(10); + buffer.ensure(200); // This should trigger growth + assertEquals(buffer.writerIndex(), 50); + assertEquals(buffer.readerIndex(), 10); + assertTrue(buffer.size() >= 210); // Should be at least 200 + 10 marker + } + + @Test(expectedExceptions = NullPointerException.class) + public void testSetNullAllocator() { + MemoryBuffer.setGlobalAllocator(null); + } +} From 9a246c594acf98fc335649afd662f4e5ce93085e Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Thu, 14 Aug 2025 19:08:03 +0800 Subject: [PATCH 06/22] fix(java): fix row encoder for private struct (#2469) ## What does this PR do? ## Related issues Closes #2439 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .../java/org/apache/fory/type/TypeUtils.java | 63 +++++++++---------- .../format/encoder/RowEncoderBuilder.java | 2 +- .../fory/format/encoder/RowEncoderTest.java | 17 +++++ 3 files changed, 47 insertions(+), 35 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index a4af2479fe..5d35b5b5c4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -619,43 +619,38 @@ public static boolean isBean(TypeRef typeRef, TypeResolutionContext ctx) { || ctx.getCustomTypeRegistry().isExtraSupportedType(typeRef)) { return false; } - // since we need to access class in generated code in our package, the class must be public // if ReflectionUtils.hasNoArgConstructor(cls) return false, we use Unsafe to create object. - if (Modifier.isPublic(cls.getModifiers())) { - // bean class can be static nested class, but can't be not a non-static inner class - if (cls.getEnclosingClass() != null && !Modifier.isStatic(cls.getModifiers())) { - return false; - } - TypeResolutionContext newTypePath = ctx.appendTypePath(typeRef); - if (cls == Object.class) { - // return false for typeToken that point to un-specialized generic type. - return false; - } - boolean maybe = - !SUPPORTED_TYPES.contains(typeRef) - && !typeRef.isArray() - && !cls.isEnum() - && !ITERABLE_TYPE.isSupertypeOf(typeRef) - && !MAP_TYPE.isSupertypeOf(typeRef); - if (maybe) { - for (Descriptor d : Descriptor.getDescriptors(cls)) { - TypeRef t = d.getTypeRef(); - // do field modifiers and getter/setter validation here, not in getDescriptors. - // If Modifier.isFinal(d.getModifiers()), use reflection - // private field that doesn't have getter/setter will be handled by reflection. - TypeRef replacementType = - ctx.getCustomTypeRegistry().replacementTypeFor(cls, t.getRawType()); - if (replacementType != null) { - t = replacementType; - } - if (!isSupported(t, newTypePath)) { - return false; - } + // bean class can be static nested class, but can't be not a non-static inner class + if (cls.getEnclosingClass() != null && !Modifier.isStatic(cls.getModifiers())) { + return false; + } + TypeResolutionContext newTypePath = ctx.appendTypePath(typeRef); + if (cls == Object.class) { + // return false for typeToken that point to un-specialized generic type. + return false; + } + boolean maybe = + !SUPPORTED_TYPES.contains(typeRef) + && !typeRef.isArray() + && !cls.isEnum() + && !ITERABLE_TYPE.isSupertypeOf(typeRef) + && !MAP_TYPE.isSupertypeOf(typeRef); + if (maybe) { + for (Descriptor d : Descriptor.getDescriptors(cls)) { + TypeRef t = d.getTypeRef(); + // do field modifiers and getter/setter validation here, not in getDescriptors. + // If Modifier.isFinal(d.getModifiers()), use reflection + // private field that doesn't have getter/setter will be handled by reflection. + TypeRef replacementType = + ctx.getCustomTypeRegistry().replacementTypeFor(cls, t.getRawType()); + if (replacementType != null) { + t = replacementType; + } + if (!isSupported(t, newTypePath)) { + return false; } - return true; - } else { - return false; } + return true; } else { return false; } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java index f8c16a38c3..45f65fff93 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java @@ -184,7 +184,7 @@ public String genCode() { @Override public Expression buildEncodeExpression() { Reference inputObject = new Reference(ROOT_OBJECT_NAME, TypeUtils.OBJECT_TYPE, false); - Expression bean = new Expression.Cast(inputObject, beanType, ctx.newName(beanClass)); + Expression bean = tryCastIfPublic(inputObject, beanType); Reference writer = new Reference(ROOT_ROW_WRITER_NAME, rowWriterTypeToken, false); Reference schemaExpr = new Reference(SCHEMA_NAME, schemaTypeToken, false); diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/RowEncoderTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/RowEncoderTest.java index dd6832e9a6..4842964568 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/RowEncoderTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/RowEncoderTest.java @@ -19,6 +19,7 @@ package org.apache.fory.format.encoder; +import static org.apache.fory.collection.Collections.ofHashMap; import static org.apache.fory.format.encoder.CodecBuilderTest.testStreamingEncode; import com.google.common.collect.ImmutableMap; @@ -101,4 +102,20 @@ public void testImportInnerClass() { Foo deserializedFoo = encoder.fromRow(row); Assert.assertEquals(foo, deserializedFoo); } + + private static class PrivateStruct { + java.util.Map f1; + java.util.Map f2; + } + + @Test + public void testPrivateBean() { + RowEncoder encoder = Encoders.bean(PrivateStruct.class); + PrivateStruct s = new PrivateStruct(); + s.f1 = ofHashMap(10L, 100L); + s.f2 = ofHashMap("k", "v"); + PrivateStruct s1 = encoder.decode(encoder.encode(s)); + Assert.assertEquals(s1.f1, s.f1); + Assert.assertEquals(s1.f2, s.f2); + } } From 9004a03b7db1a2d1e803238f401b6a671859cbfa Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Fri, 15 Aug 2025 02:28:48 +0800 Subject: [PATCH 07/22] fix(python): fix pyfory pypi release (#2473) ## What does this PR do? ## Related issues #2381 #2472 Closes #2471 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .github/workflows/release.yaml | 21 ++++++++++++----- ci/deploy.sh | 43 ++++++++++++++++------------------ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 25cbba7688..919d326fa7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -69,19 +69,26 @@ jobs: DOCKER_IMAGE="${{ env.manylinux_aarch64_image }}" PLAT="manylinux_2_28_aarch64" fi - docker run --rm -e PLAT=$PLAT \ + PY_VERSION=${{ matrix.python-version }} + echo "PY_VERSION: $PY_VERSION" + PY_VERSION=${PY_VERSION//./} + echo "PY_VERSION without dots: $PY_VERSION" + docker run --rm -e PY_VERSION="$PY_VERSION" -e PLAT="$PLAT" \ -v ${{ github.workspace }}:/work \ -w /work "$DOCKER_IMAGE" \ bash -c " set -e - # Install build dependencies inside the container yum install -y git sudo wget git config --global --add safe.directory /work - - # Install Bazel inside the container + ls -alh /opt/python + echo \"PY_VERSION: \$PY_VERSION\" + ls /opt/python/cp\${PY_VERSION}-cp\${PY_VERSION} + ls /opt/python/cp\${PY_VERSION}-cp\${PY_VERSION}/bin + export PATH=/opt/python/cp\${PY_VERSION}-cp\${PY_VERSION}/bin:\$PATH + echo \"PATH: \$PATH\" + echo \"Using Python from: \$(which python)\" + echo \"Python version: \$(python -V)\" bash ci/run_ci.sh install_bazel - - # Build the wheel inside the container bash ci/deploy.sh build_pyfory " @@ -118,10 +125,12 @@ jobs: with: repository-url: https://test.pypi.org/legacy/ skip-existing: true + verify-metadata: false packages-dir: downloaded_wheels - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} with: skip-existing: true + verify-metadata: false packages-dir: downloaded_wheels diff --git a/ci/deploy.sh b/ci/deploy.sh index 6cce344b0a..7de5af5a31 100755 --- a/ci/deploy.sh +++ b/ci/deploy.sh @@ -72,40 +72,37 @@ build_pyfory() { # Fix strange installed deps not found pip install setuptools -U - # Detect host architecture and only pass x86_64 config when appropriate - ARCH=$(uname -m) - if [[ "$ARCH" == "x86_64" || "$ARCH" == "amd64" ]]; then - bazel build --config=x86_64 //:cp_fory_so - else - bazel build //:cp_fory_so - fi - python setup.py bdist_wheel --dist-dir=../dist + ls -l ../dist if [ -n "$PLAT" ]; then # In manylinux container, repair the wheel to embed shared libraries # and rename the wheel with the manylinux tag. PYARROW_LIB_DIR=$(python -c 'import pyarrow; print(":".join(pyarrow.get_library_dirs()))') export LD_LIBRARY_PATH="$PYARROW_LIB_DIR:$LD_LIBRARY_PATH" - auditwheel repair ../dist/pyfory-*-linux_*.whl --plat "$PLAT" -w ../dist/ + auditwheel repair ../dist/pyfory-*-linux_*.whl --plat "$PLAT" --exclude '*arrow*' --exclude '*parquet*' --exclude '*numpy*' -w ../dist/ rm ../dist/pyfory-*-linux_*.whl elif [[ "$OSTYPE" == "darwin"* ]]; then - # macOS: use delocate to bundle dependencies and fix wheel tags - pip install delocate - mkdir -p ../dist_repaired - delocate-wheel -w ../dist_repaired/ ../dist/pyfory-*-macosx*.whl - rm ../dist/pyfory-*-macosx*.whl - mv ../dist_repaired/* ../dist/ - rmdir ../dist_repaired + # Check macOS version + MACOS_VERSION=$(sw_vers -productVersion | cut -d. -f1-2) + if [[ "$MACOS_VERSION" == "13"* ]]; then + # Check if wheel ends with x86_64.whl + for wheel in ../dist/pyfory-*-macosx*.whl; do + if [[ "$wheel" == *"x86_64.whl" ]]; then + echo "Fixing wheel tags for x86_64 wheel: $wheel" + wheel tags --platform-tag macosx_12_0_x86_64 "$wheel" + else + echo "Skipping wheel tags for non-x86_64 wheel: $wheel" + fi + done + else + # Other macOS versions: skip wheel repair + echo "Skipping wheel repair for macOS $MACOS_VERSION" + fi elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then - # Windows: use delvewheel to bundle dependencies - pip install delvewheel - mkdir -p ../dist_repaired - delvewheel repair ../dist/pyfory-*-win*.whl -w ../dist_repaired/ - rm ../dist/pyfory-*-win*.whl - mv ../dist_repaired/* ../dist/ - rmdir ../dist_repaired + echo "Skip windows wheel repair" fi + ls -l ../dist popd } From 1e01b9a4b5c8a3453479db97903af065114bd591 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Fri, 15 Aug 2025 11:51:24 +0800 Subject: [PATCH 08/22] chore(python): disable pyfory.format import warning (#2476) ## What does this PR do? ## Related issues Closes #2475 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- python/pyfory/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index caea3efbe4..a5aa69f149 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -52,8 +52,12 @@ ) from pyfory._util import Buffer # noqa: F401 # pylint: disable=unused-import +import warnings + try: - from pyfory.format import * # noqa: F401,F403 # pylint: disable=unused-import + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=RuntimeWarning) + from pyfory.format import * # noqa: F401,F403 # pylint: disable=unused-import except (AttributeError, ImportError): pass From 68b9f08ea070725ad6f5c56566616e5627d8d23d Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Fri, 15 Aug 2025 16:46:05 +0800 Subject: [PATCH 09/22] fix(python): fix py release on macos 13 (#2478) ## What does this PR do? ## Related issues #2474 #2473 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .github/workflows/release.yaml | 1 + ci/deploy.sh | 31 ++++++++++++++----------------- python/pyproject.toml | 2 +- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 919d326fa7..0d3728421b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -125,6 +125,7 @@ jobs: with: repository-url: https://test.pypi.org/legacy/ skip-existing: true + verbose: true verify-metadata: false packages-dir: downloaded_wheels - name: Publish to PyPI diff --git a/ci/deploy.sh b/ci/deploy.sh index 7de5af5a31..9a71fceb9d 100755 --- a/ci/deploy.sh +++ b/ci/deploy.sh @@ -72,7 +72,19 @@ build_pyfory() { # Fix strange installed deps not found pip install setuptools -U - python setup.py bdist_wheel --dist-dir=../dist + if [[ "$OSTYPE" == "darwin"* ]]; then + MACOS_VERSION=$(sw_vers -productVersion | cut -d. -f1-2) + echo "MACOS_VERSION: $MACOS_VERSION" + if [[ "$MACOS_VERSION" == "13"* ]]; then + export MACOSX_DEPLOYMENT_TARGET=10.13 + python setup.py bdist_wheel --plat-name macosx_10_13_x86_64 --dist-dir=../dist + else + python setup.py bdist_wheel --dist-dir=../dist + fi + else + python setup.py bdist_wheel --dist-dir=../dist + fi + ls -l ../dist if [ -n "$PLAT" ]; then @@ -83,22 +95,7 @@ build_pyfory() { auditwheel repair ../dist/pyfory-*-linux_*.whl --plat "$PLAT" --exclude '*arrow*' --exclude '*parquet*' --exclude '*numpy*' -w ../dist/ rm ../dist/pyfory-*-linux_*.whl elif [[ "$OSTYPE" == "darwin"* ]]; then - # Check macOS version - MACOS_VERSION=$(sw_vers -productVersion | cut -d. -f1-2) - if [[ "$MACOS_VERSION" == "13"* ]]; then - # Check if wheel ends with x86_64.whl - for wheel in ../dist/pyfory-*-macosx*.whl; do - if [[ "$wheel" == *"x86_64.whl" ]]; then - echo "Fixing wheel tags for x86_64 wheel: $wheel" - wheel tags --platform-tag macosx_12_0_x86_64 "$wheel" - else - echo "Skipping wheel tags for non-x86_64 wheel: $wheel" - fi - done - else - # Other macOS versions: skip wheel repair - echo "Skipping wheel repair for macOS $MACOS_VERSION" - fi + echo "Skip macos wheel repair" elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then echo "Skip windows wheel repair" fi diff --git a/python/pyproject.toml b/python/pyproject.toml index a18aa7b615..58b81674e3 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -59,7 +59,7 @@ all = ["pyarrow"] dev = ["ruff"] [tool.setuptools] -packages = ["pyfory", "pyfory.format", "pyfory.lib", "pyfory.meta"] +packages = ["pyfory", "pyfory.format", "pyfory.lib", "pyfory.lib.mmh3", "pyfory.meta"] include-package-data = true zip-safe = false From bef364a1e46afa5fcd6a2135335c0d0b19f2d864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emre=20=C5=9Eafak?= <3928300+esafak@users.noreply.github.com> Date: Sun, 17 Aug 2025 10:41:38 -0400 Subject: [PATCH 10/22] feat: Chain wheel test/build and release workflows (#2483) ## What does this PR do? This commit refactors the Python wheel CI and release process to use a chained workflow model, ensuring that the exact same test and build process is run for both CI checks and releases. A new reusable workflow, `.github/workflows/build-and-test-core.yml`, is introduced. This workflow is triggered on `push` and `pull_request` for CI purposes, and can also be called by other workflows via `workflow_call`. It contains the full logic for building, testing, and packaging the Python wheel across a matrix of operating systems and Python versions. The `release.yaml` workflow is refactored to be an orchestrator. On a new tag, it now calls the `build-and-test-core.yml` workflow to run all tests. If the tests pass, it proceeds to a separate job to download the wheel artifacts produced by the test run and publish them to PyPI. This architecture ensures that every release is automatically and thoroughly tested in the exact same manner as pull requests, just before publication. ## Related issues #2472 #2480 ## Does this PR introduce any user-facing change? No --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Shawn Yang --- .github/workflows/build-wheels-for-pr.yaml | 47 ++++++ .../workflows/build-wheels-for-release.yaml | 33 ++++ .github/workflows/build-wheels.yaml | 94 +++++++++++ .github/workflows/ci.yml | 26 +-- .github/workflows/release-java-snapshot.yaml | 2 +- .github/workflows/release-python.yaml | 63 +++++++ .github/workflows/release.yaml | 137 --------------- .github/workflows/sync.yml | 2 +- ci/build_manylinux_wheel.sh | 157 ++++++++++++++++++ 9 files changed, 409 insertions(+), 152 deletions(-) create mode 100644 .github/workflows/build-wheels-for-pr.yaml create mode 100644 .github/workflows/build-wheels-for-release.yaml create mode 100644 .github/workflows/build-wheels.yaml create mode 100644 .github/workflows/release-python.yaml delete mode 100644 .github/workflows/release.yaml create mode 100755 ci/build_manylinux_wheel.sh diff --git a/.github/workflows/build-wheels-for-pr.yaml b/.github/workflows/build-wheels-for-pr.yaml new file mode 100644 index 0000000000..ec85455f88 --- /dev/null +++ b/.github/workflows/build-wheels-for-pr.yaml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: build wheels for pull request +on: + push: + branches: + - main + paths: + - 'python/**' + - 'cpp/**' + - 'bazel/**' + - 'BUILD' + - 'WORKSPACE' + - '.github/workflows/build-wheels*.yml' + pull_request: + paths: + - 'python/**' + - 'cpp/**' + - 'bazel/**' + - 'BUILD' + - 'WORKSPACE' + - '.github/workflows/build-wheels*.yml' +jobs: + build-wheels: + uses: ./.github/workflows/build-wheels.yaml + strategy: + matrix: + os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest] + python-version: ['3.8', '3.13'] + with: + os: ${{ matrix.os }} + python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/build-wheels-for-release.yaml b/.github/workflows/build-wheels-for-release.yaml new file mode 100644 index 0000000000..e1799e6de5 --- /dev/null +++ b/.github/workflows/build-wheels-for-release.yaml @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: build wheels for release +on: + push: + tags: ["v*"] + +jobs: + build-wheels: + uses: ./.github/workflows/build-wheels.yaml + strategy: + matrix: + os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, macos-latest, windows-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + with: + os: ${{ matrix.os }} + python-version: ${{ matrix.python-version }} + bump-version: true diff --git a/.github/workflows/build-wheels.yaml b/.github/workflows/build-wheels.yaml new file mode 100644 index 0000000000..554f4a0065 --- /dev/null +++ b/.github/workflows/build-wheels.yaml @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Build Wheels + +on: + workflow_call: + inputs: + os: + required: true + type: string + python-version: + required: true + type: string + bump-version: + description: 'Whether to bump the version in setup.py' + required: false + type: boolean + default: false + +permissions: + contents: read + actions: write + +jobs: + build_and_test: + name: Build and Test + runs-on: ${{ inputs.os }} + + steps: + - uses: actions/checkout@v5 + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Install bazel + if: "runner.os != 'Windows'" + run: ./ci/run_ci.sh install_bazel + + - name: Install bazel + if: "runner.os == 'Windows'" + run: ./ci/run_ci.sh install_bazel_windows + shell: bash + + - name: Update version in setup.py + if: "inputs.bump-version" + run: ./ci/deploy.sh bump_py_version + + - name: Build a binary wheel (Linux, manylinux) + if: "runner.os == 'Linux'" + env: + manylinux_x86_64_image: ${{ env.manylinux_x86_64_image }} + manylinux_aarch64_image: ${{ env.manylinux_aarch64_image }} + GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + ./ci/build_manylinux_wheel.sh --os "${{ runner.os }}" \ + --arch "${{ runner.arch }}" \ + --python "${{ inputs.python-version }}" \ + --workspace "${GITHUB_WORKSPACE}" + + - name: Build a binary wheel (native) + if: "runner.os != 'Linux'" + run: ./ci/deploy.sh build_pyfory + shell: bash + + - name: Install and verify wheel + shell: bash + run: | + python -m pip install --upgrade pip + pip install dist/*.whl + python -c "import pyfory; print(pyfory.__version__)" + + - name: Upload wheel +# if: ${{ inputs.bump-version }} + uses: actions/upload-artifact@v4 + with: + name: pyfory-wheels-${{ inputs.os }}-${{ inputs.python-version }}${{ inputs.bump-version && '-tagged' || github.sha }} + path: dist/*.whl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11c92b32b4..db261c1a6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,7 @@ jobs: matrix: java-version: ["8", "11", "17", "21", "24"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK ${{ matrix.java-version }} uses: actions/setup-java@v4 with: @@ -81,7 +81,7 @@ jobs: # String in openj9 1.8 share byte array by offset, fory doesn't allow it. java-version: ["21"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK ${{ matrix.java-version }} uses: actions/setup-java@v4 with: @@ -107,7 +107,7 @@ jobs: matrix: java-version: ["21"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK ${{ matrix.java-version }} uses: actions/setup-java@v4 with: @@ -128,7 +128,7 @@ jobs: matrix: java-version: ["17", "21", "23"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: graalvm/setup-graalvm@v1 with: java-version: ${{ matrix.java-version }} @@ -152,7 +152,7 @@ jobs: matrix: java-version: ["8", "11", "17", "21"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK ${{ matrix.java-version }} uses: actions/setup-java@v4 with: @@ -171,7 +171,7 @@ jobs: name: Scala CI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK8 uses: actions/setup-java@v4 with: @@ -189,7 +189,7 @@ jobs: name: Integration Tests runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK8 uses: actions/setup-java@v4 with: @@ -210,7 +210,7 @@ jobs: os: [ubuntu-latest, macos-13, windows-2022] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v4 with: @@ -237,7 +237,7 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 45 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python 3.11 uses: actions/setup-python@v5 with: @@ -252,7 +252,7 @@ jobs: os: [ubuntu-latest, macos-13, macos-14, windows-2022] # macos-13: x86, macos-14: arm64 runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python 3.11 uses: actions/setup-python@v5 with: @@ -267,7 +267,7 @@ jobs: python-version: [3.8, 3.12, 3.13.3] os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, windows-2022] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -286,7 +286,7 @@ jobs: matrix: go-version: ["1.13", "1.18"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@v4 with: @@ -308,7 +308,7 @@ jobs: name: Code Style Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK ${{ matrix.java-version }} uses: actions/setup-java@v4 with: diff --git a/.github/workflows/release-java-snapshot.yaml b/.github/workflows/release-java-snapshot.yaml index 1292ccc1f3..631975aac4 100644 --- a/.github/workflows/release-java-snapshot.yaml +++ b/.github/workflows/release-java-snapshot.yaml @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'apache/fory' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Maven Central Repository uses: actions/setup-java@v4 with: diff --git a/.github/workflows/release-python.yaml b/.github/workflows/release-python.yaml new file mode 100644 index 0000000000..938ef2e9a4 --- /dev/null +++ b/.github/workflows/release-python.yaml @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Publish Python + +on: + workflow_run: + workflows: ["build wheels for release"] + types: [completed] + +permissions: + contents: read + id-token: write + +jobs: + publish-wheels: + name: Publish Wheels + if: ${{ github.event.workflow_run.conclusion == 'success' }} + runs-on: ubuntu-latest + steps: + - name: Download all wheel artifacts + uses: actions/download-artifact@v5 + with: + path: downloaded_wheels + + - name: Move wheels to a single directory + shell: bash + run: | + mkdir dist + find downloaded_wheels -type f -name "*.whl" -exec mv {} dist/ \; + ls -R dist + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + if: startsWith(github.ref, 'refs/tags/') && contains(github.ref, '-') + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + verbose: true + verify-metadata: false + packages-dir: dist + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + if: startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') + with: + skip-existing: true + verify-metadata: false + packages-dir: dist diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 0d3728421b..0000000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,137 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Publish Fory - -on: - push: - tags: - - "v*" - -permissions: - contents: read - -jobs: - build-wheels: - name: Build Wheels - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.8, 3.9, "3.10", 3.11, 3.12, 3.13] - os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, windows-2022] # macos-13: x86, macos-14: arm64 - env: - manylinux_x86_64_image: quay.io/pypa/manylinux_2_28_x86_64 - manylinux_aarch64_image: quay.io/pypa/manylinux_2_28_aarch64 - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install bazel (for macOS and Windows) - if: "!startsWith(matrix.os, 'ubuntu')" - shell: bash - run: | - if [ "$RUNNER_OS" == "Windows" ]; then - ./ci/run_ci.sh install_bazel_windows - else - ./ci/run_ci.sh install_bazel - fi - - name: Update version in setup.py - shell: bash - run: ci/deploy.sh bump_py_version - # --------- Use manylinux for Linux wheels --------- - - name: Build a binary wheel (Linux, manylinux) - if: startsWith(matrix.os, 'ubuntu') - shell: bash - run: | - DOCKER_IMAGE="" - PLAT="" - if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then - DOCKER_IMAGE="${{ env.manylinux_x86_64_image }}" - PLAT="manylinux_2_28_x86_64" - elif [[ "${{ matrix.os }}" == "ubuntu-24.04-arm" ]]; then - DOCKER_IMAGE="${{ env.manylinux_aarch64_image }}" - PLAT="manylinux_2_28_aarch64" - fi - PY_VERSION=${{ matrix.python-version }} - echo "PY_VERSION: $PY_VERSION" - PY_VERSION=${PY_VERSION//./} - echo "PY_VERSION without dots: $PY_VERSION" - docker run --rm -e PY_VERSION="$PY_VERSION" -e PLAT="$PLAT" \ - -v ${{ github.workspace }}:/work \ - -w /work "$DOCKER_IMAGE" \ - bash -c " - set -e - yum install -y git sudo wget - git config --global --add safe.directory /work - ls -alh /opt/python - echo \"PY_VERSION: \$PY_VERSION\" - ls /opt/python/cp\${PY_VERSION}-cp\${PY_VERSION} - ls /opt/python/cp\${PY_VERSION}-cp\${PY_VERSION}/bin - export PATH=/opt/python/cp\${PY_VERSION}-cp\${PY_VERSION}/bin:\$PATH - echo \"PATH: \$PATH\" - echo \"Using Python from: \$(which python)\" - echo \"Python version: \$(python -V)\" - bash ci/run_ci.sh install_bazel - bash ci/deploy.sh build_pyfory - " - - # --------- Native (not in container) for macOS and Windows --------- - - name: Build a binary wheel (native) - if: "!startsWith(matrix.os, 'ubuntu')" - shell: bash - run: | - ci/deploy.sh build_pyfory - - name: Upload Wheel Artifact - uses: actions/upload-artifact@v4 - with: - name: pyfory-wheels-${{ matrix.os }}-${{ matrix.python-version }} - path: dist/*.whl - - publish-wheels: - name: Publish Wheels - runs-on: ubuntu-latest - needs: build-wheels - permissions: - contents: read - id-token: write - steps: - - name: Download Wheel Artifacts - uses: actions/download-artifact@v4 - with: - path: downloaded_wheels/ - merge-multiple: true - - name: Display structure of downloaded files - run: ls -R downloaded_wheels - - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - if: ${{ startsWith(github.ref, 'refs/tags/') && contains(github.ref, '-') }} - with: - repository-url: https://test.pypi.org/legacy/ - skip-existing: true - verbose: true - verify-metadata: false - packages-dir: downloaded_wheels - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} - with: - skip-existing: true - verify-metadata: false - packages-dir: downloaded_wheels diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 46ed08d178..9f4c1b0fbe 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'apache/fory' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Sync files uses: BetaHuhn/repo-file-sync-action@v1 with: diff --git a/ci/build_manylinux_wheel.sh b/ci/build_manylinux_wheel.sh new file mode 100755 index 0000000000..58c8347ddf --- /dev/null +++ b/ci/build_manylinux_wheel.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Usage: +# ./build_manylinux_wheel.sh --os --python \ +# --arch [--workspace ] [--x86-image ] \ +# [--aarch64-image ] [--docker-image ] [--dry-run] +# +# Examples: +# ./build_manylinux_wheel.sh --os ubuntu-latest --python 3.10 --arch X64 +# ./build_manylinux_wheel.sh --os ubuntu-24.04-arm --python 3.11 --arch ARM64 \ +# --aarch64-image quay.io/pypa/manylinux_2014_aarch64:latest +# +# Notes: +# --arch accepts values: X86, X64, ARM, or ARM64 (case-insensitive). +# This script requires --arch to be provided explicitly. +set -euo pipefail + +print_usage() { + cat < --python --arch [options] + +Required: + --os matrix.os value (e.g. ubuntu-latest or ubuntu-24.04-arm) + --python Python version (e.g. 3.10) + --arch Architecture (X86, X64, ARM, or ARM64) + +Optional: + --workspace Path to workspace to mount into container (default: cwd) + --x86-image manylinux x86_64 docker image (overrides default env) + --aarch64-image manylinux aarch64 docker image (overrides default env) + --docker-image Explicit docker image to use (skips auto selection) + --dry-run Print the docker command without executing it + -h, --help Show this help +EOF +} + +# Defaults - can be overridden by options +WORKSPACE="${GITHUB_WORKSPACE:-$(pwd)}" +MANYLINUX_X86_64_IMAGE="${MANYLINUX_X86_64_IMAGE:-quay.io/pypa/manylinux_2_28_x86_64:latest}" +MANYLINUX_AARCH64_IMAGE="${MANYLINUX_AARCH64_IMAGE:-quay.io/pypa/manylinux_2_28_aarch64:latest}" +DOCKER_IMAGE="" +ARCH="" +DRY_RUN=0 + +# Parse args +while [[ $# -gt 0 ]]; do + case "$1" in + --os) MATRIX_OS="$2"; shift 2;; + --python) PY_VERSION_RAW="$2"; shift 2;; + --workspace) WORKSPACE="$2"; shift 2;; + --x86-image) MANYLINUX_X86_64_IMAGE="$2"; shift 2;; + --aarch64-image) MANYLINUX_AARCH64_IMAGE="$2"; shift 2;; + --docker-image) DOCKER_IMAGE="$2"; shift 2;; + --arch) ARCH="$2"; shift 2;; + --dry-run) DRY_RUN=1; shift;; + -h|--help) print_usage; exit 0;; + *) echo "Unknown argument: $1"; print_usage; exit 2;; + esac +done + +if [[ -z "${MATRIX_OS:-}" ]] || [[ -z "${PY_VERSION_RAW:-}" ]] || [[ -z "${ARCH:-}" ]]; then + echo "Error: --os, --python and --arch are required." + print_usage + exit 2 +fi + +# Normalize ARCH to uppercase +ARCH="${ARCH^^}" + +# Normalize Python version: remove dots (e.g. 3.10 -> 310) +PY_VERSION_NO_DOTS="${PY_VERSION_RAW//./}" + +# Determine DOCKER_IMAGE and PLAT strictly from ARCH (unless --docker-image supplied) +PLAT="" +case "$ARCH" in + X86|X64) + PLAT="manylinux_2_28_x86_64" + DOCKER_IMAGE="${DOCKER_IMAGE:-$MANYLINUX_X86_64_IMAGE}" + ;; + ARM|ARM64) + PLAT="manylinux_2_28_aarch64" + DOCKER_IMAGE="${DOCKER_IMAGE:-$MANYLINUX_AARCH64_IMAGE}" + ;; + *) + echo "Error: Unsupported ARCH '$ARCH'. Use one of: X86, X64, ARM, ARM64." + exit 2 + ;; +esac + +echo "Matrix OS: $MATRIX_OS" +echo "Arch (input): $ARCH" +echo "Selected docker image: $DOCKER_IMAGE" +echo "Platform (PLAT): $PLAT" +echo "Python version (raw): $PY_VERSION_RAW" +echo "PY_VERSION without dots: $PY_VERSION_NO_DOTS" +echo "Workspace: $WORKSPACE" + +# Basic checks +if ! command -v docker >/dev/null 2>&1; then + echo "Error: docker is required but not installed or not on PATH." + exit 3 +fi + +SCRIPT='set -e +yum install -y git sudo wget || true +git config --global --add safe.directory /work +ls -alh /opt/python || true +echo "PY_VERSION: $PY_VERSION" +ls /opt/python/cp${PY_VERSION}-cp${PY_VERSION} || true +ls /opt/python/cp${PY_VERSION}-cp${PY_VERSION}/bin || true +export PATH=/opt/python/cp${PY_VERSION}-cp${PY_VERSION}/bin:$PATH +echo "PATH: $PATH" +echo "Using Python from: $(which python || echo not-found)" +echo "Python version: $(python -V 2>&1 || true)" +bash ci/run_ci.sh install_bazel +bash ci/deploy.sh build_pyfory' + +DOCKER_CMD=(docker run --rm + -e "PY_VERSION=$PY_VERSION_NO_DOTS" + -e "PLAT=$PLAT" + -v "$WORKSPACE":/work + -w /work + "$DOCKER_IMAGE" + bash -lc "$SCRIPT" +) + +# Show the final command (joined) for clarity +echo +echo "Docker command to be executed:" +printf ' %q' "${DOCKER_CMD[@]}" +echo +echo + +if [[ $DRY_RUN -eq 1 ]]; then + echo "Dry run enabled; not executing docker command." + exit 0 +fi + +# Execute +"${DOCKER_CMD[@]}" From b84255fae05888a67a0aafbe6ef4ff6375021518 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Mon, 18 Aug 2025 20:08:29 +0800 Subject: [PATCH 11/22] chore: bump release version to 0.12.0 (#2489) ## What does this PR do? ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- README.md | 14 +++++++------- docs/guide/scala_guide.md | 2 +- integration_tests/graalvm_tests/pom.xml | 2 +- integration_tests/jdk_compatibility_tests/pom.xml | 2 +- integration_tests/jpms_tests/pom.xml | 2 +- integration_tests/latest_jdk_tests/pom.xml | 2 +- java/benchmark/pom.xml | 2 +- java/fory-core/pom.xml | 2 +- java/fory-extensions/pom.xml | 2 +- java/fory-format/pom.xml | 2 +- java/fory-test-core/pom.xml | 2 +- java/fory-testsuite/pom.xml | 2 +- java/pom.xml | 2 +- javascript/packages/fory/package.json | 2 +- javascript/packages/hps/package.json | 2 +- kotlin/pom.xml | 2 +- python/pyfory/__init__.py | 2 +- rust/Cargo.toml | 2 +- scala/build.sbt | 2 +- 19 files changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 22937b0c6d..cd38e3ea6f 100644 --- a/README.md +++ b/README.md @@ -106,13 +106,13 @@ Nightly snapshot: org.apache.fory fory-core - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT ``` @@ -122,13 +122,13 @@ Release version: org.apache.fory fory-core - 0.11.2 + 0.12.0 ``` @@ -137,13 +137,13 @@ Release version: Scala2: ```sbt -libraryDependencies += "org.apache.fory" % "fory-scala_2.13" % "0.11.2" +libraryDependencies += "org.apache.fory" % "fory-scala_2.13" % "0.12.0" ``` Scala3: ```sbt -libraryDependencies += "org.apache.fory" % "fory-scala_3" % "0.11.2" +libraryDependencies += "org.apache.fory" % "fory-scala_3" % "0.12.0" ``` ### Kotlin @@ -152,7 +152,7 @@ libraryDependencies += "org.apache.fory" % "fory-scala_3" % "0.11.2" org.apache.fory fory-kotlin - 0.11.2 + 0.12.0 ``` diff --git a/docs/guide/scala_guide.md b/docs/guide/scala_guide.md index c0ca649637..563e1e65d2 100644 --- a/docs/guide/scala_guide.md +++ b/docs/guide/scala_guide.md @@ -34,7 +34,7 @@ Scala 2 and 3 are both supported. To add a dependency on Fory scala for with sbt, use the following: ```sbt -libraryDependencies += "org.apache.fory" %% "fory-scala" % "0.11.2" +libraryDependencies += "org.apache.fory" %% "fory-scala" % "0.12.0" ``` ## Quick Start diff --git a/integration_tests/graalvm_tests/pom.xml b/integration_tests/graalvm_tests/pom.xml index 5286456324..84da032e4d 100644 --- a/integration_tests/graalvm_tests/pom.xml +++ b/integration_tests/graalvm_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT ../../java 4.0.0 diff --git a/integration_tests/jdk_compatibility_tests/pom.xml b/integration_tests/jdk_compatibility_tests/pom.xml index b4f19783d1..a2eb0f6b88 100644 --- a/integration_tests/jdk_compatibility_tests/pom.xml +++ b/integration_tests/jdk_compatibility_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT ../../java 4.0.0 diff --git a/integration_tests/jpms_tests/pom.xml b/integration_tests/jpms_tests/pom.xml index 87fcaf05f1..00e5a6b948 100644 --- a/integration_tests/jpms_tests/pom.xml +++ b/integration_tests/jpms_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT ../../java 4.0.0 diff --git a/integration_tests/latest_jdk_tests/pom.xml b/integration_tests/latest_jdk_tests/pom.xml index 53d341071c..e0bc170228 100644 --- a/integration_tests/latest_jdk_tests/pom.xml +++ b/integration_tests/latest_jdk_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT ../../java 4.0.0 diff --git a/java/benchmark/pom.xml b/java/benchmark/pom.xml index 1a87bfb608..f4a5990af3 100644 --- a/java/benchmark/pom.xml +++ b/java/benchmark/pom.xml @@ -26,7 +26,7 @@ fory-parent org.apache.fory - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT benchmark diff --git a/java/fory-core/pom.xml b/java/fory-core/pom.xml index e68a3dcc71..3b8a084db4 100644 --- a/java/fory-core/pom.xml +++ b/java/fory-core/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/java/fory-extensions/pom.xml b/java/fory-extensions/pom.xml index a6e1dfa6e5..28677ec42b 100644 --- a/java/fory-extensions/pom.xml +++ b/java/fory-extensions/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/java/fory-format/pom.xml b/java/fory-format/pom.xml index 31fb73a80c..42f27a3103 100644 --- a/java/fory-format/pom.xml +++ b/java/fory-format/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/java/fory-test-core/pom.xml b/java/fory-test-core/pom.xml index 28696dda08..0697219038 100644 --- a/java/fory-test-core/pom.xml +++ b/java/fory-test-core/pom.xml @@ -25,7 +25,7 @@ fory-parent org.apache.fory - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/java/fory-testsuite/pom.xml b/java/fory-testsuite/pom.xml index 47b6da69f3..53c88f8d55 100644 --- a/java/fory-testsuite/pom.xml +++ b/java/fory-testsuite/pom.xml @@ -25,7 +25,7 @@ fory-parent org.apache.fory - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index e1f46f89e0..12ebf75209 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -33,7 +33,7 @@ org.apache.fory fory-parent pom - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT Fory Project Parent POM Apache Fory™ is a blazingly fast multi-language serialization framework powered by jit and zero-copy. diff --git a/javascript/packages/fory/package.json b/javascript/packages/fory/package.json index 43657cb69c..e30e9f6319 100644 --- a/javascript/packages/fory/package.json +++ b/javascript/packages/fory/package.json @@ -1,6 +1,6 @@ { "name": "@foryjs/fory", - "version": "0.12.0.dev", + "version": "0.13.0.dev", "description": "Apache Fory™ is a blazingly fast multi-language serialization framework powered by jit and zero-copy", "main": "dist/index.js", "scripts": { diff --git a/javascript/packages/hps/package.json b/javascript/packages/hps/package.json index dacdaecd2a..03fa1d040a 100644 --- a/javascript/packages/hps/package.json +++ b/javascript/packages/hps/package.json @@ -1,6 +1,6 @@ { "name": "@foryjs/hps", - "version": "0.12.0.dev", + "version": "0.13.0.dev", "description": "Apache Fory™ nodejs high-performance suite", "main": "dist/index.js", "files": [ diff --git a/kotlin/pom.xml b/kotlin/pom.xml index 4370a3f64a..4c7f7601be 100644 --- a/kotlin/pom.xml +++ b/kotlin/pom.xml @@ -30,7 +30,7 @@ org.apache.fory fory-kotlin - 0.12.0-SNAPSHOT + 0.13.0-SNAPSHOT 4.0.0 diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index a5aa69f149..4a5e46396d 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -61,4 +61,4 @@ except (AttributeError, ImportError): pass -__version__ = "0.12.0.dev" +__version__ = "0.13.0.dev" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 174302d0b0..8d75b5bdf1 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -30,7 +30,7 @@ exclude = [ resolver = "2" [workspace.package] -version = "0.12.0" +version = "0.13.0" rust-version = "1.70" license = "Apache-2.0" readme = "README.md" diff --git a/scala/build.sbt b/scala/build.sbt index 69a09ee2b8..b76c3ba679 100644 --- a/scala/build.sbt +++ b/scala/build.sbt @@ -16,7 +16,7 @@ * limitations under the License. */ -val foryVersion = "0.12.0-SNAPSHOT" +val foryVersion = "0.13.0-SNAPSHOT" val scala213Version = "2.13.15" ThisBuild / apacheSonatypeProjectProfile := "fory" version := foryVersion From c7cd3524add1f363aa6f5861682eb825c643357f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emre=20=C5=9Eafak?= <3928300+esafak@users.noreply.github.com> Date: Mon, 18 Aug 2025 23:12:02 -0400 Subject: [PATCH 12/22] fix(ci): Build python wheels using interpreters in manylinux2014 (#2486) ## What does this PR do? The existing workflow did not build wheels correctly because the python interpreter installed in the workflow was not used in the container where the actual building was taking place. The manylinux images already have every python version pre-installed. We now use them instead. Key changes include: - **New Workflows:** - `build-containerized-pr.yml`: Handles building wheels for pull requests using containerized environments for Linux. - `build-containerized-release.yml`: Handles building wheels for releases using containerized environments for Linux. - `build-native-pr.yml`: Handles building wheels for pull requests on macOS and Windows. - `build-native-release.yml`: Handles building wheels for releases on macOS and Windows. - **Removed Workflows:** - `.github/workflows/build-wheels.yaml`: This generic workflow has been superseded by the new, more specific workflows. - `.github/workflows/build-wheels-for-pr.yaml`: Merged into `build-containerized-pr.yml` and `build-native-pr.yml`. - `.github/workflows/build-wheels-for-release.yaml`: Merged into `build-containerized-release.yml` and `build-native-release.yml`. - `.github/workflows/build-native-pr.yml`: This was a duplicate and is now handled by the new `build-native-pr.yml`. - **Script Improvements:** - `ci/build_linux_wheels.py`: Introduced a new script to manage Linux wheel building within Docker containers, replacing `build_manylinux_wheel.sh`. This script now uses explicit Docker image definitions and a simplified execution model. - `ci/deploy.sh`: Updated to use `PYTHON_PATH` and `PIP_CMD` for better Python environment management, and adjusted `pyarrow` installation versions for different Python versions. - `ci/run_ci.sh`: Updated to correctly handle Bazel installation paths (avoiding sudo) and use `curl` for downloading Bazel, since wget is not in the container. - **Workflow Triggers:** - Workflows are now triggered based on their intended purpose (e.g., `push` to `main` or tags for releases, `pull_request` events for PR builds). - Path filters have been adjusted to ensure workflows trigger only when relevant files are changed. ## Related issues Closes #2480 ## Notes I also attempted to create musl builds but they failed because the bazel binaries are incompatible with it. We would need to rebuild bazel for musllinux. --- ...elease.yaml => build-containerized-pr.yml} | 22 +-- ...r.yaml => build-containerized-release.yml} | 44 +++-- .github/workflows/build-native-pr.yml | 53 ++++++ .github/workflows/build-native-release.yml | 56 ++++++ .github/workflows/build-wheels.yaml | 94 ---------- .github/workflows/release-python.yaml | 2 +- ci/build_linux_wheels.py | 160 ++++++++++++++++++ ci/build_manylinux_wheel.sh | 157 ----------------- ci/deploy.sh | 42 ++--- ci/run_ci.sh | 15 +- ci/tasks/python.py | 2 +- python/setup.py | 7 +- 12 files changed, 334 insertions(+), 320 deletions(-) rename .github/workflows/{build-wheels-for-release.yaml => build-containerized-pr.yml} (67%) rename .github/workflows/{build-wheels-for-pr.yaml => build-containerized-release.yml} (54%) create mode 100644 .github/workflows/build-native-pr.yml create mode 100644 .github/workflows/build-native-release.yml delete mode 100644 .github/workflows/build-wheels.yaml create mode 100755 ci/build_linux_wheels.py delete mode 100755 ci/build_manylinux_wheel.sh diff --git a/.github/workflows/build-wheels-for-release.yaml b/.github/workflows/build-containerized-pr.yml similarity index 67% rename from .github/workflows/build-wheels-for-release.yaml rename to .github/workflows/build-containerized-pr.yml index e1799e6de5..5d144bc60b 100644 --- a/.github/workflows/build-wheels-for-release.yaml +++ b/.github/workflows/build-containerized-pr.yml @@ -15,19 +15,21 @@ # specific language governing permissions and limitations # under the License. -name: build wheels for release +name: Build Containerized PR Wheels on: + pull_request: + paths: [ci/**, python/**, .github/workflows/**] push: - tags: ["v*"] + branches: [main] + paths: [ci/**, python/**, .github/workflows/**] jobs: - build-wheels: - uses: ./.github/workflows/build-wheels.yaml + build: + runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] - with: - os: ${{ matrix.os }} - python-version: ${{ matrix.python-version }} - bump-version: true + os: [ubuntu-latest, ubuntu-24.04-arm] + steps: + - uses: actions/checkout@v5 + - name: Build and test wheels + run: ./ci/build_linux_wheels.py --arch ${{ runner.arch }} diff --git a/.github/workflows/build-wheels-for-pr.yaml b/.github/workflows/build-containerized-release.yml similarity index 54% rename from .github/workflows/build-wheels-for-pr.yaml rename to .github/workflows/build-containerized-release.yml index ec85455f88..7551ec047b 100644 --- a/.github/workflows/build-wheels-for-pr.yaml +++ b/.github/workflows/build-containerized-release.yml @@ -15,33 +15,27 @@ # specific language governing permissions and limitations # under the License. -name: build wheels for pull request +name: Build Containerized Release Wheels on: push: - branches: - - main - paths: - - 'python/**' - - 'cpp/**' - - 'bazel/**' - - 'BUILD' - - 'WORKSPACE' - - '.github/workflows/build-wheels*.yml' - pull_request: - paths: - - 'python/**' - - 'cpp/**' - - 'bazel/**' - - 'BUILD' - - 'WORKSPACE' - - '.github/workflows/build-wheels*.yml' + tags: ['v*'] # NO PATH FILTER - critical for releases + jobs: - build-wheels: - uses: ./.github/workflows/build-wheels.yaml + build: + runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest] - python-version: ['3.8', '3.13'] - with: - os: ${{ matrix.os }} - python-version: ${{ matrix.python-version }} + os: [ubuntu-latest, ubuntu-24.04-arm] + steps: + - uses: actions/checkout@v5 + - name: Bump version + run: ./ci/deploy.sh bump_py_version + - name: Install bazel + run: ./ci/run_ci.sh install_bazel + - name: Build and test wheels + run: ./ci/build_linux_wheels.py --arch ${{ runner.arch }} --release + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: pyfory-wheels-${{ matrix.os }}-${{ runner.arch }}-${{ github.ref_name }} + path: dist/*.whl diff --git a/.github/workflows/build-native-pr.yml b/.github/workflows/build-native-pr.yml new file mode 100644 index 0000000000..3c0d2c06c0 --- /dev/null +++ b/.github/workflows/build-native-pr.yml @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Build Native PR Wheels +on: + pull_request: + paths: [ci/**, python/**, .github/workflows/**] + push: + branches: [main] + paths: [ci/**, python/**, .github/workflows/**] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [macos-latest, windows-latest] + python-version: ['3.8', '3.13'] + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install bazel + if: runner.os != 'Windows' + run: ./ci/run_ci.sh install_bazel + - name: Install bazel (Windows) + if: runner.os == 'Windows' + run: ./ci/run_ci.sh install_bazel_windows + shell: bash + - name: Build wheel + run: ./ci/deploy.sh build_pyfory + shell: bash + - name: Install and verify wheel + shell: bash + run: | + python -m pip install --upgrade pip + pip install dist/*.whl + python -c "import pyfory; print(pyfory.__version__)" diff --git a/.github/workflows/build-native-release.yml b/.github/workflows/build-native-release.yml new file mode 100644 index 0000000000..f6c43662cf --- /dev/null +++ b/.github/workflows/build-native-release.yml @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Build Native Release Wheels +on: + push: + tags: ['v*'] # NO PATH FILTER - critical for releases + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [macos-latest, windows-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + steps: + - uses: actions/checkout@v5 + - name: Bump version + run: ./ci/deploy.sh bump_py_version + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install bazel + if: runner.os != 'Windows' + run: ./ci/run_ci.sh install_bazel + - name: Install bazel (Windows) + if: runner.os == 'Windows' + run: ./ci/run_ci.sh install_bazel_windows + shell: bash + - name: Build wheel + run: ./ci/deploy.sh build_pyfory + - name: Install and verify wheel + shell: bash + run: | + python -m pip install --upgrade pip + pip install dist/*.whl + python -c "import pyfory; print(pyfory.__version__)" + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: pyfory-wheels-${{ matrix.os }}-${{ matrix.python-version }}-${{ github.ref_name }} + path: dist/*.whl diff --git a/.github/workflows/build-wheels.yaml b/.github/workflows/build-wheels.yaml deleted file mode 100644 index 554f4a0065..0000000000 --- a/.github/workflows/build-wheels.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Build Wheels - -on: - workflow_call: - inputs: - os: - required: true - type: string - python-version: - required: true - type: string - bump-version: - description: 'Whether to bump the version in setup.py' - required: false - type: boolean - default: false - -permissions: - contents: read - actions: write - -jobs: - build_and_test: - name: Build and Test - runs-on: ${{ inputs.os }} - - steps: - - uses: actions/checkout@v5 - - - name: Set up Python ${{ inputs.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python-version }} - - - name: Install bazel - if: "runner.os != 'Windows'" - run: ./ci/run_ci.sh install_bazel - - - name: Install bazel - if: "runner.os == 'Windows'" - run: ./ci/run_ci.sh install_bazel_windows - shell: bash - - - name: Update version in setup.py - if: "inputs.bump-version" - run: ./ci/deploy.sh bump_py_version - - - name: Build a binary wheel (Linux, manylinux) - if: "runner.os == 'Linux'" - env: - manylinux_x86_64_image: ${{ env.manylinux_x86_64_image }} - manylinux_aarch64_image: ${{ env.manylinux_aarch64_image }} - GITHUB_WORKSPACE: ${{ github.workspace }} - run: | - ./ci/build_manylinux_wheel.sh --os "${{ runner.os }}" \ - --arch "${{ runner.arch }}" \ - --python "${{ inputs.python-version }}" \ - --workspace "${GITHUB_WORKSPACE}" - - - name: Build a binary wheel (native) - if: "runner.os != 'Linux'" - run: ./ci/deploy.sh build_pyfory - shell: bash - - - name: Install and verify wheel - shell: bash - run: | - python -m pip install --upgrade pip - pip install dist/*.whl - python -c "import pyfory; print(pyfory.__version__)" - - - name: Upload wheel -# if: ${{ inputs.bump-version }} - uses: actions/upload-artifact@v4 - with: - name: pyfory-wheels-${{ inputs.os }}-${{ inputs.python-version }}${{ inputs.bump-version && '-tagged' || github.sha }} - path: dist/*.whl diff --git a/.github/workflows/release-python.yaml b/.github/workflows/release-python.yaml index 938ef2e9a4..c66d65e705 100644 --- a/.github/workflows/release-python.yaml +++ b/.github/workflows/release-python.yaml @@ -19,7 +19,7 @@ name: Publish Python on: workflow_run: - workflows: ["build wheels for release"] + workflows: ["Build Containerized Release Wheels", "Build Native Release Wheels"] types: [completed] permissions: diff --git a/ci/build_linux_wheels.py b/ci/build_linux_wheels.py new file mode 100755 index 0000000000..02c1754e67 --- /dev/null +++ b/ci/build_linux_wheels.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Host-side wrapper: workflow provides only --arch. +Images are defined as regular Python lists (no env vars). + +Environment: + - GITHUB_WORKSPACE (optional; defaults to cwd) +""" +from __future__ import annotations +import argparse +import os +import shlex +import subprocess +import sys +from typing import List + +SCRIPT = r'''set -e +yum install -y git sudo wget || true + +git config --global --add safe.directory /work + +# Determine Python versions to test +if [ "$RELEASE" = "1" ]; then + PYTHON_VERSIONS="cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312 cp313-cp313" +else + PYTHON_VERSIONS="cp38-cp38 cp313-cp313" +fi + +ci/run_ci.sh install_bazel +export PATH="$HOME/.local/bin:$PATH" + +# use the python interpreters preinstalled in manylinux +OLD_PATH=$PATH +for PY in $PYTHON_VERSIONS; do + export PYTHON_PATH="/opt/python/$PY/bin/python" + export PATH="/opt/python/$PY/bin:$OLD_PATH" + echo "Using $PYTHON_PATH" + python -m pip install cython wheel pytest + ci/deploy.sh build_pyfory + + latest_wheel=$(ls -t dist/*.whl | head -n1) + echo "Attempting to install $latest_wheel" + python -m pip install "$latest_wheel" + python -c "import pyfory; print(pyfory.__version__)" + + bazel clean --expunge +done +export PATH=$OLD_PATH +''' + +DEFAULT_X86_IMAGES = [ + "quay.io/pypa/manylinux2014_x86_64:latest", + # "quay.io/pypa/manylinux_2_28_x86_64:latest", + + # bazel binaries do not work with musl + # "quay.io/pypa/musllinux_1_2_x86_64:latest", +] + +DEFAULT_AARCH64_IMAGES = [ + "quay.io/pypa/manylinux2014_aarch64:latest", + # "quay.io/pypa/manylinux_2_28_aarch64:latest", + + # bazel binaries do not work with musl + # "quay.io/pypa/musllinux_1_2_aarch64:latest", +] + +ARCH_ALIASES = { + "X86": "x86", + "X64": "x86", + "X86_64": "x86", + "AMD64": "x86", + "ARM": "arm64", + "ARM64": "arm64", + "AARCH64": "arm64", +} + +def parse_args(): + p = argparse.ArgumentParser() + p.add_argument("--arch", required=True, help="Architecture (e.g. X86, X64, AARCH64)") + p.add_argument("--release", action="store_true", help="Run full test suite for release") + p.add_argument("--dry-run", action="store_true", help="Print docker commands without running") + return p.parse_args() + +def normalize_arch(raw: str) -> str: + key = raw.strip().upper() + return ARCH_ALIASES.get(key, raw.strip().lower()) + +def collect_images_for_arch(arch_normalized: str) -> List[str]: + if arch_normalized == "x86": + imgs = DEFAULT_X86_IMAGES # dedupe preserving order + elif arch_normalized == "arm64": + imgs = DEFAULT_AARCH64_IMAGES + else: + raise SystemExit(f"Unsupported arch: {arch_normalized!r}") + return imgs + +def build_docker_cmd(workspace: str, image: str) -> List[str]: + workspace = os.path.abspath(workspace) + return [ + "docker", "run", "-i", "--rm", + "-v", f"{workspace}:/work", + "-w", "/work", + image, + "bash", "-s", "--" + ] + +def run_for_images(images: List[str], workspace: str, dry_run: bool) -> int: + rc_overall = 0 + for image in images: + docker_cmd = build_docker_cmd(workspace, image) + printable = " ".join(shlex.quote(c) for c in docker_cmd) + print(f"+ {printable}") + if dry_run: + continue + try: + completed = subprocess.run(docker_cmd, input=SCRIPT.encode("utf-8")) + if completed.returncode != 0: + print(f"Container {image} exited with {completed.returncode}", file=sys.stderr) + rc_overall = completed.returncode if rc_overall == 0 else rc_overall + else: + print(f"Container {image} completed successfully.") + except KeyboardInterrupt: + print("Interrupted by user", file=sys.stderr) + return 130 + except FileNotFoundError as e: + print(f"Error running docker: {e}", file=sys.stderr) + return 2 + return rc_overall + +def main() -> int: + args = parse_args() + arch = normalize_arch(args.arch) + images = collect_images_for_arch(arch) + if not images: + print(f"No images configured for arch {arch}", file=sys.stderr) + return 2 + workspace = os.environ.get("GITHUB_WORKSPACE", os.getcwd()) + print(f"Selected images for arch {args.arch}: {images}") + return run_for_images(images, workspace, args.dry_run) + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ci/build_manylinux_wheel.sh b/ci/build_manylinux_wheel.sh deleted file mode 100755 index 58c8347ddf..0000000000 --- a/ci/build_manylinux_wheel.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Usage: -# ./build_manylinux_wheel.sh --os --python \ -# --arch [--workspace ] [--x86-image ] \ -# [--aarch64-image ] [--docker-image ] [--dry-run] -# -# Examples: -# ./build_manylinux_wheel.sh --os ubuntu-latest --python 3.10 --arch X64 -# ./build_manylinux_wheel.sh --os ubuntu-24.04-arm --python 3.11 --arch ARM64 \ -# --aarch64-image quay.io/pypa/manylinux_2014_aarch64:latest -# -# Notes: -# --arch accepts values: X86, X64, ARM, or ARM64 (case-insensitive). -# This script requires --arch to be provided explicitly. -set -euo pipefail - -print_usage() { - cat < --python --arch [options] - -Required: - --os matrix.os value (e.g. ubuntu-latest or ubuntu-24.04-arm) - --python Python version (e.g. 3.10) - --arch Architecture (X86, X64, ARM, or ARM64) - -Optional: - --workspace Path to workspace to mount into container (default: cwd) - --x86-image manylinux x86_64 docker image (overrides default env) - --aarch64-image manylinux aarch64 docker image (overrides default env) - --docker-image Explicit docker image to use (skips auto selection) - --dry-run Print the docker command without executing it - -h, --help Show this help -EOF -} - -# Defaults - can be overridden by options -WORKSPACE="${GITHUB_WORKSPACE:-$(pwd)}" -MANYLINUX_X86_64_IMAGE="${MANYLINUX_X86_64_IMAGE:-quay.io/pypa/manylinux_2_28_x86_64:latest}" -MANYLINUX_AARCH64_IMAGE="${MANYLINUX_AARCH64_IMAGE:-quay.io/pypa/manylinux_2_28_aarch64:latest}" -DOCKER_IMAGE="" -ARCH="" -DRY_RUN=0 - -# Parse args -while [[ $# -gt 0 ]]; do - case "$1" in - --os) MATRIX_OS="$2"; shift 2;; - --python) PY_VERSION_RAW="$2"; shift 2;; - --workspace) WORKSPACE="$2"; shift 2;; - --x86-image) MANYLINUX_X86_64_IMAGE="$2"; shift 2;; - --aarch64-image) MANYLINUX_AARCH64_IMAGE="$2"; shift 2;; - --docker-image) DOCKER_IMAGE="$2"; shift 2;; - --arch) ARCH="$2"; shift 2;; - --dry-run) DRY_RUN=1; shift;; - -h|--help) print_usage; exit 0;; - *) echo "Unknown argument: $1"; print_usage; exit 2;; - esac -done - -if [[ -z "${MATRIX_OS:-}" ]] || [[ -z "${PY_VERSION_RAW:-}" ]] || [[ -z "${ARCH:-}" ]]; then - echo "Error: --os, --python and --arch are required." - print_usage - exit 2 -fi - -# Normalize ARCH to uppercase -ARCH="${ARCH^^}" - -# Normalize Python version: remove dots (e.g. 3.10 -> 310) -PY_VERSION_NO_DOTS="${PY_VERSION_RAW//./}" - -# Determine DOCKER_IMAGE and PLAT strictly from ARCH (unless --docker-image supplied) -PLAT="" -case "$ARCH" in - X86|X64) - PLAT="manylinux_2_28_x86_64" - DOCKER_IMAGE="${DOCKER_IMAGE:-$MANYLINUX_X86_64_IMAGE}" - ;; - ARM|ARM64) - PLAT="manylinux_2_28_aarch64" - DOCKER_IMAGE="${DOCKER_IMAGE:-$MANYLINUX_AARCH64_IMAGE}" - ;; - *) - echo "Error: Unsupported ARCH '$ARCH'. Use one of: X86, X64, ARM, ARM64." - exit 2 - ;; -esac - -echo "Matrix OS: $MATRIX_OS" -echo "Arch (input): $ARCH" -echo "Selected docker image: $DOCKER_IMAGE" -echo "Platform (PLAT): $PLAT" -echo "Python version (raw): $PY_VERSION_RAW" -echo "PY_VERSION without dots: $PY_VERSION_NO_DOTS" -echo "Workspace: $WORKSPACE" - -# Basic checks -if ! command -v docker >/dev/null 2>&1; then - echo "Error: docker is required but not installed or not on PATH." - exit 3 -fi - -SCRIPT='set -e -yum install -y git sudo wget || true -git config --global --add safe.directory /work -ls -alh /opt/python || true -echo "PY_VERSION: $PY_VERSION" -ls /opt/python/cp${PY_VERSION}-cp${PY_VERSION} || true -ls /opt/python/cp${PY_VERSION}-cp${PY_VERSION}/bin || true -export PATH=/opt/python/cp${PY_VERSION}-cp${PY_VERSION}/bin:$PATH -echo "PATH: $PATH" -echo "Using Python from: $(which python || echo not-found)" -echo "Python version: $(python -V 2>&1 || true)" -bash ci/run_ci.sh install_bazel -bash ci/deploy.sh build_pyfory' - -DOCKER_CMD=(docker run --rm - -e "PY_VERSION=$PY_VERSION_NO_DOTS" - -e "PLAT=$PLAT" - -v "$WORKSPACE":/work - -w /work - "$DOCKER_IMAGE" - bash -lc "$SCRIPT" -) - -# Show the final command (joined) for clarity -echo -echo "Docker command to be executed:" -printf ' %q' "${DOCKER_CMD[@]}" -echo -echo - -if [[ $DRY_RUN -eq 1 ]]; then - echo "Dry run enabled; not executing docker command." - exit 0 -fi - -# Execute -"${DOCKER_CMD[@]}" diff --git a/ci/deploy.sh b/ci/deploy.sh index 9a71fceb9d..58648a3fe8 100755 --- a/ci/deploy.sh +++ b/ci/deploy.sh @@ -18,16 +18,20 @@ # under the License. +# Print commands and their arguments as they are executed. set -x # Cause the script to exit if a single command fails. set -e -# configure ~/.pypirc before run this script -#if [ ! -f ~/.pypirc ]; then -# echo "Please configure .pypirc before run this script" -# exit 1 -#fi +# Prefer Python from $PYTHON_PATH if it exists, otherwise use default python +if [ -n "$PYTHON_PATH" ] && [ -x "$PYTHON_PATH" ]; then + PYTHON_CMD="$PYTHON_PATH" + PIP_CMD="$PYTHON_PATH -m pip" +else + PYTHON_CMD="python" + PIP_CMD="pip" +fi ROOT="$(git rev-parse --show-toplevel)" cd "$ROOT" @@ -63,34 +67,32 @@ deploy_jars() { } build_pyfory() { - echo "Python version $(python -V), path $(which python)" + echo "$($PYTHON_CMD -V), path $(which "$PYTHON_CMD")" install_pyarrow - pip install Cython wheel pytest auditwheel + $PIP_CMD install cython wheel pytest pushd "$ROOT/python" - pip list + $PIP_CMD list echo "Install pyfory" # Fix strange installed deps not found - pip install setuptools -U + $PIP_CMD install setuptools -U if [[ "$OSTYPE" == "darwin"* ]]; then MACOS_VERSION=$(sw_vers -productVersion | cut -d. -f1-2) echo "MACOS_VERSION: $MACOS_VERSION" if [[ "$MACOS_VERSION" == "13"* ]]; then export MACOSX_DEPLOYMENT_TARGET=10.13 - python setup.py bdist_wheel --plat-name macosx_10_13_x86_64 --dist-dir=../dist + $PYTHON_CMD setup.py bdist_wheel --plat-name macosx_10_13_x86_64 --dist-dir=../dist else - python setup.py bdist_wheel --dist-dir=../dist + $PYTHON_CMD setup.py bdist_wheel --dist-dir=../dist fi else - python setup.py bdist_wheel --dist-dir=../dist + $PYTHON_CMD setup.py bdist_wheel --dist-dir=../dist fi - ls -l ../dist - if [ -n "$PLAT" ]; then # In manylinux container, repair the wheel to embed shared libraries # and rename the wheel with the manylinux tag. - PYARROW_LIB_DIR=$(python -c 'import pyarrow; print(":".join(pyarrow.get_library_dirs()))') + PYARROW_LIB_DIR=$($PYTHON_CMD -c 'import pyarrow; print(":".join(pyarrow.get_library_dirs()))') export LD_LIBRARY_PATH="$PYARROW_LIB_DIR:$LD_LIBRARY_PATH" auditwheel repair ../dist/pyfory-*-linux_*.whl --plat "$PLAT" --exclude '*arrow*' --exclude '*parquet*' --exclude '*numpy*' -w ../dist/ rm ../dist/pyfory-*-linux_*.whl @@ -99,17 +101,19 @@ build_pyfory() { elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then echo "Skip windows wheel repair" fi + + echo "Wheels for $PYTHON_CMD:" ls -l ../dist popd } install_pyarrow() { - pyversion=$(python -V | cut -d' ' -f2) + pyversion=$($PYTHON_CMD -V | cut -d' ' -f2) if [[ $pyversion == 3.13* ]]; then - pip install pyarrow==18.0.0 - pip install numpy + $PIP_CMD install pyarrow==18.0.0 + $PIP_CMD install numpy else - pip install pyarrow==15.0.0 + $PIP_CMD install pyarrow==15.0.0 # Automatically install numpy fi } diff --git a/ci/run_ci.sh b/ci/run_ci.sh index 0c4fb52f64..63a1330d14 100755 --- a/ci/run_ci.sh +++ b/ci/run_ci.sh @@ -44,12 +44,11 @@ export FORY_CI=true install_python() { wget -q https://repo.anaconda.com/miniconda/Miniconda3-py38_23.5.2-0-Linux-x86_64.sh -O Miniconda3.sh bash Miniconda3.sh -b -p $HOME/miniconda && rm -f miniconda.* - which python - echo "Python version $(python -V), path $(which python)" + echo "$(python -V), path $(which python)" } install_pyfory() { - echo "Python version $(python -V), path $(which python)" + echo "$(python -V), path $(which python)" "$ROOT"/ci/deploy.sh install_pyarrow pip install Cython wheel pytest pushd "$ROOT/python" @@ -90,15 +89,15 @@ install_bazel() { esac BAZEL_VERSION=$(get_bazel_version) - BAZEL_DIR="/usr/local/bin" + BAZEL_DIR="$HOME/.local/bin" + mkdir -p "$BAZEL_DIR" # Construct platform-specific URL BINARY_URL="https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-${OS}-${ARCH}" echo "Downloading bazel from: $BINARY_URL" - sudo wget -q -O "$BAZEL_DIR/bazel" "$BINARY_URL" || { echo "Failed to download bazel"; exit 1; } - - sudo chmod +x "$BAZEL_DIR/bazel" + curl -L -sSf -o "$BAZEL_DIR/bazel" "$BINARY_URL" || { echo "Failed to download bazel"; exit 1; } + chmod +x "$BAZEL_DIR/bazel" # Add to current shell's PATH export PATH="$BAZEL_DIR:$PATH" @@ -108,7 +107,7 @@ install_bazel() { bazel version || { echo "Bazel installation verification failed"; exit 1; } # Configure number of jobs based on memory - if [[ "$MACHINE" == linux ]]; then + if [[ "$OS" == linux ]]; then MEM=$(grep MemTotal < /proc/meminfo | awk '{print $2}') JOBS=$(( MEM / 1024 / 1024 / 3 )) echo "build --jobs=$JOBS" >> ~/.bazelrc diff --git a/ci/tasks/python.py b/ci/tasks/python.py index 252d238d50..ea53e94332 100644 --- a/ci/tasks/python.py +++ b/ci/tasks/python.py @@ -25,7 +25,7 @@ def install_pyfory(): logging.info("Installing pyfory package") python_version = common.exec_cmd("python -V") python_path = common.exec_cmd("which python") - logging.info(f"Python version {python_version}, path {python_path}") + logging.info(f"{python_version}, path {python_path}") # Install PyArrow common.exec_cmd(f"{common.PROJECT_ROOT_DIR}/ci/deploy.sh install_pyarrow") diff --git a/python/setup.py b/python/setup.py index 9c488e10e3..87123efa8d 100644 --- a/python/setup.py +++ b/python/setup.py @@ -30,7 +30,7 @@ os.environ["CFLAGS"] = "-O0" BAZEL_BUILD_EXT = False -print(f"DEBUG = {DEBUG}, BAZEL_BUILD_EXT = {BAZEL_BUILD_EXT}") +print(f"DEBUG = {DEBUG}, BAZEL_BUILD_EXT = {BAZEL_BUILD_EXT}, PATH = {os.environ.get('PATH')}") setup_dir = abspath(os.path.dirname(__file__)) project_dir = abspath(pjoin(setup_dir, os.pardir)) @@ -39,7 +39,6 @@ print(f"setup_dir: {setup_dir}") print(f"fory_cpp_src_dir: {fory_cpp_src_dir}") - class BinaryDistribution(Distribution): def __init__(self, attrs=None): super().__init__(attrs=attrs) @@ -58,6 +57,4 @@ def has_ext_modules(self): if __name__ == "__main__": - setup( - distclass=BinaryDistribution, - ) + setup(distclass=BinaryDistribution) From 6b6486d4eece32353a0426de8ad225bd8136f069 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Tue, 19 Aug 2025 18:56:00 +0800 Subject: [PATCH 13/22] feat(python): set default languge to python for pyfory (#2490) ## What does this PR do? ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- python/pyfory/_fory.py | 2 +- python/pyfory/_serialization.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index c78e03746e..35ab78d36b 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -117,7 +117,7 @@ class Fory: def __init__( self, - language=Language.XLANG, + language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, ): diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 156693bdc0..c614524005 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -602,7 +602,7 @@ cdef class Fory: def __init__( self, - language=Language.XLANG, + language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, ): From 66a2e78a68fbd9f29605c670383769f9fe2ec597 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Tue, 19 Aug 2025 18:56:27 +0800 Subject: [PATCH 14/22] feat(python): add register api to python (#2491) ## What does this PR do? ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- python/pyfory/_serialization.pyx | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index c614524005..2a1e2e5173 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -650,14 +650,26 @@ cdef class Fory: def register_serializer(self, cls: Union[type, TypeVar], Serializer serializer): self.type_resolver.register_serializer(cls, serializer) + def register( + self, + cls: Union[type, TypeVar], + *, + type_id: int = None, + namespace: str = None, + typename: str = None, + serializer=None, + ): + self.type_resolver.register_type( + cls, type_id=type_id, namespace=namespace, typename=typename, serializer=serializer) + def register_type( - self, - cls: Union[type, TypeVar], - *, - type_id: int = None, - namespace: str = None, - typename: str = None, - serializer=None, + self, + cls: Union[type, TypeVar], + *, + type_id: int = None, + namespace: str = None, + typename: str = None, + serializer=None, ): self.type_resolver.register_type( cls, type_id=type_id, namespace=namespace, typename=typename, serializer=serializer) From 2c912cd187595d88e19a329a4548597e9351a5ad Mon Sep 17 00:00:00 2001 From: Steven Schlansker Date: Thu, 21 Aug 2025 00:03:06 -0700 Subject: [PATCH 15/22] fix(java): Encoders.mapEncoder(TypeRef, TypeRef, TypeRef, Fory) should load bean classes (#2494) We dynamically select the key and value type, and tried using this overload of Encoders.mapEncoder With the single-arg mapEncoder invocation, bean codec classes are loaded with `token4BeanLoad` But this overload never loads the bean classes leading to unexpected exceptions during Map codec compile since bean codecs are not loaded --- .../apache/fory/format/encoder/Encoders.java | 21 ++++++++++++++++++- .../fory/format/encoder/MapEncoderTest.java | 18 ++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java index 18845507b8..522f54b27b 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java @@ -474,7 +474,7 @@ public static MapEncoder mapEncoder(TypeRef token, Fory fo TypeRef keyToken = token4BeanLoad(set1, tuple2.f0); TypeRef valToken = token4BeanLoad(set2, tuple2.f1); - MapEncoder encoder = mapEncoder(token, keyToken, valToken, fory); + MapEncoder encoder = mapEncoder0(token, keyToken, valToken, fory); return createMapEncoder(encoder); } @@ -495,6 +495,22 @@ public static MapEncoder mapEncoder( Preconditions.checkNotNull(keyToken); Preconditions.checkNotNull(valToken); + Set> set1 = beanSet(keyToken); + Set> set2 = beanSet(valToken); + LOG.info("Find beans to load: {}, {}", set1, set2); + + token4BeanLoad(set1, keyToken); + token4BeanLoad(set2, valToken); + + return mapEncoder0(mapToken, keyToken, valToken, fory); + } + + private static MapEncoder mapEncoder0( + TypeRef mapToken, TypeRef keyToken, TypeRef valToken, Fory fory) { + Preconditions.checkNotNull(mapToken); + Preconditions.checkNotNull(keyToken); + Preconditions.checkNotNull(valToken); + Schema schema = TypeInference.inferSchema(mapToken, false); Field field = DataTypes.fieldOfSchema(schema, 0); Field keyField = DataTypes.keyArrayFieldForMap(field); @@ -685,6 +701,9 @@ public static Class loadOrGenRowCodecClass(Class beanClass) { TypeUtils.listBeansRecursiveInclusive( beanClass, new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true)); + if (classes.isEmpty()) { + return null; + } LOG.info("Create RowCodec for classes {}", classes); CompileUnit[] compileUnits = classes.stream() diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/MapEncoderTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/MapEncoderTest.java index cf40f70ac4..3b0fdea73e 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/MapEncoderTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/MapEncoderTest.java @@ -134,9 +134,9 @@ public void testSimpleNestStructWithMapEncoder() { @Test public void testKVStructMap() { Map map = ImmutableMap.of(SimpleFoo.create(), SimpleFoo.create()); - MapEncoder encoder = Encoders.mapEncoder(new TypeRef>() {}); + var encoder = Encoders.mapEncoder(new TypeRef>() {}); testStreamingEncode(encoder, map); - MapEncoder encoder1 = Encoders.mapEncoder(new TypeRef>() {}); + var encoder1 = Encoders.mapEncoder(new TypeRef>() {}); testStreamingEncode(encoder1, ImmutableMap.of(Foo.create(), Foo.create())); } @@ -192,4 +192,18 @@ public void testNestArrayWithMapEncoder() { testStreamingEncode(encoder, lmap); } + + @Test + public void testDynamicTypeDeclaration() { + Encoders.mapEncoder( + new TypeRef>() {}, + TypeRef.of(Integer.class), + TypeRef.of(Bean.class), + null) + .encode(new HashMap<>()); + } + + public static class Bean { + int f1; + } } From 6f54e1992daf5ef21e5ed3f478600ac7a248f297 Mon Sep 17 00:00:00 2001 From: urlyy Date: Fri, 22 Aug 2025 18:37:12 +0800 Subject: [PATCH 16/22] docs: fix meta_size_mask (#2495) ## What does this PR do? correct meta_size_mask in doc. --- docs/specification/java_serialization_spec.md | 4 ++-- docs/specification/xlang_serialization_spec.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/specification/java_serialization_spec.md b/docs/specification/java_serialization_spec.md index 51fa304801..4efae21923 100644 --- a/docs/specification/java_serialization_spec.md +++ b/docs/specification/java_serialization_spec.md @@ -136,8 +136,8 @@ Class meta are encoded from parent class to leaf class, only class with serializ Meta header is a 64 bits number value encoded in little endian order. -- lower 12 bits are used to encode meta size. If meta size `>= 0b111_1111_1111`, then write - `meta_ size - 0b111_1111_1111` next. +- lower 12 bits are used to encode meta size. If meta size `>= 0b1111_1111_1111`, then write + `meta_ size - 0b1111_1111_1111` next. - 13rd bit is used to indicate whether to write fields meta. When this class is schema-consistent or use registered serializer, fields meta will be skipped. Class Meta will be used for share namespace + type name only. - 14rd bit is used to indicate whether meta is compressed. diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index 2fcbab1db7..ba0cc0067c 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -319,8 +319,8 @@ subclass. `50 bits hash + 1bit compress flag + write fields meta + 12 bits meta size`. Right is the lower bits. -- lower 12 bits are used to encode meta size. If meta size `>= 0b111_1111_1111`, then write - `meta_ size - 0b111_1111_1111` next. +- lower 12 bits are used to encode meta size. If meta size `>= 0b1111_1111_1111`, then write + `meta_ size - 0b1111_1111_1111` next. - 13rd bit is used to indicate whether to write fields meta. When this class is schema-consistent or use registered serializer, fields meta will be skipped. Class Meta will be used for share namespace + type name only. - 14rd bit is used to indicate whether meta is compressed. From 1735e4d3ffbb21886ef848e63992348d60794eb4 Mon Sep 17 00:00:00 2001 From: Asnowww <23301095@bjtu.edu.cn> Date: Fri, 22 Aug 2025 18:37:43 +0800 Subject: [PATCH 17/22] chore: fix typos (#2496) ## What does this PR do? fix typos ## Related issues ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- .../src/main/java/org/apache/fory/codegen/Expression.java | 2 +- .../org/apache/fory/format/encoder/ArrayDataForEach.java | 2 +- python/pyfory/_serialization.pyx | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/codegen/Expression.java b/java/fory-core/src/main/java/org/apache/fory/codegen/Expression.java index 582c236129..0d9e236e22 100644 --- a/java/fory-core/src/main/java/org/apache/fory/codegen/Expression.java +++ b/java/fory-core/src/main/java/org/apache/fory/codegen/Expression.java @@ -2432,7 +2432,7 @@ public ExprCode doGenCode(CodegenContext ctx) { action.apply( new Reference(i), new Reference(leftElemValue, leftElemType, true), - // elemValue nullability check use isNullAt inside action, so elemValueRef'nullable is + // elemValue nullability check uses isNullAt inside action, so elemValueRef's nullable is // false. new Reference(rightElemValue, rightElemType, false)); ExprCode elementExprCode = elemExpr.genCode(ctx); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayDataForEach.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayDataForEach.java index 49910d9584..13e1e3d52c 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayDataForEach.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayDataForEach.java @@ -110,7 +110,7 @@ public Code.ExprCode doGenCode(CodegenContext ctx) { String i = freshNames[0]; String elemValue = freshNames[1]; String len = freshNames[2]; - // elemValue is only used in notNullAction, so set elemValueRef'nullable to false. + // elemValue is only used in notNullAction, so set elemValueRef's nullable to false. Reference elemValueRef = new Reference(elemValue, elemType); Code.ExprCode notNullElemExprCode = notNullAction.apply(new Reference(i), elemValueRef).genCode(ctx); diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 2a1e2e5173..d98a248d53 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -388,9 +388,9 @@ cdef class TypeInfo: for python `int`: `Int8/1632/64/128Serializer` for `int8/16/32/64/128` each, and another `IntSerializer` for `int` which will dispatch to different `int8/16/32/64/128` type according the actual value. - We do not get the acutal type here, because it will introduce extra computing. + We do not get the actual type here, because it will introduce extra computing. For example, we have want to get actual `Int8/16/32/64Serializer`, we must check and - extract the actutal here which will introduce cost, and we will do same thing again + extract the actual here which will introduce cost, and we will do same thing again when serializing the actual data. """ cdef public object cls @@ -1555,7 +1555,7 @@ cdef inline get_next_element( typeinfo = type_resolver.read_typeinfo(buffer) cdef int32_t type_id = typeinfo.type_id # Note that all read operations in fast paths of list/tuple/set/dict/sub_dict - # ust match corresponding writing operations. Otherwise, ref tracking will + # must match corresponding writing operations. Otherwise, ref tracking will # error. if type_id == TypeId.STRING: return buffer.read_string() From 0922ec720640333b90b4064ce95bbff5b36d6b28 Mon Sep 17 00:00:00 2001 From: Steven Schlansker Date: Fri, 22 Aug 2025 12:45:06 -0700 Subject: [PATCH 18/22] fix(java): row format generated bean types handling Optional (#2497) Incorrect check leads to losing OptionalInt, OptionalLong, and OptionalDouble values with generated bean implementation --- .../java/org/apache/fory/type/TypeUtils.java | 11 +++ .../format/encoder/RowEncoderBuilder.java | 7 +- .../encoder/ImplementInterfaceTest.java | 75 +++++++++++++++++-- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index 5d35b5b5c4..afee63b3db 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -656,6 +656,17 @@ public static boolean isBean(TypeRef typeRef, TypeResolutionContext ctx) { } } + /** + * Check if a class is one of {@link Optional), {@link OptionalInt}, + * {@link OptionaLong}, or {@link OptionalDouble}. + */ + public static boolean isOptionalType(Class type) { + return type == Optional.class + || type == OptionalInt.class + || type == OptionalLong.class + || type == OptionalDouble.class; + } + private static boolean isSynthesizableInterface(Class cls) { return cls.isInterface() && !Collection.class.isAssignableFrom(cls) diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java index 45f65fff93..49ec2e134f 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java @@ -285,10 +285,7 @@ public Expression buildDecodeExpression() { private static Expression nullValue(TypeRef fieldType) { Class rawType = fieldType.getRawType(); - if (rawType == Optional.class - || rawType == OptionalInt.class - || rawType == OptionalLong.class - || rawType == OptionalDouble.class) { + if (TypeUtils.isOptionalType(rawType)) { return new Expression.StaticInvoke(rawType, "empty", "", fieldType, false, true); } return new Expression.Reference(TypeUtils.defaultValue(rawType), fieldType); @@ -361,7 +358,7 @@ private CodegenContext buildImplClass() { Expression storeValue = new Expression.SetField(new Expression.Reference("this"), fieldName, decodeValue); Expression shouldLoad; - if (rawFieldType == Optional.class) { + if (TypeUtils.isOptionalType(rawFieldType)) { shouldLoad = new Expression.Not( Expression.Invoke.inlineInvoke(fieldRef, "isPresent", TypeUtils.BOOLEAN_TYPE)); diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java index cdd38be6cf..b942d5dc01 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java @@ -22,7 +22,11 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.OptionalDouble; +import java.util.OptionalInt; +import java.util.OptionalLong; import java.util.TreeSet; + import lombok.Data; import org.apache.arrow.vector.types.pojo.Field; import org.apache.fory.annotation.ForyField; @@ -141,35 +145,56 @@ public PoisonPill decode(final byte[] value) { public interface OptionalType { Optional f1(); + OptionalInt f2(); + OptionalLong f3(); + OptionalDouble f4(); } static class OptionalTypeImpl implements OptionalType { - private final Optional f1; - - OptionalTypeImpl(final Optional f1) { - this.f1 = f1; - } + Optional f1; + OptionalInt f2; + OptionalLong f3; + OptionalDouble f4; @Override public Optional f1() { return f1; } + + @Override + public OptionalInt f2() { + return f2; + } + + @Override + public OptionalLong f3() { + return f3; + } + + @Override + public OptionalDouble f4() { + return f4; + } } @Test public void testNullOptional() { - final OptionalType bean1 = new OptionalTypeImpl(null); + final OptionalType bean1 = new OptionalTypeImpl(); final RowEncoder encoder = Encoders.bean(OptionalType.class); final BinaryRow row = encoder.toRow(bean1); final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); row.pointTo(buffer, 0, buffer.size()); final OptionalType deserializedBean = encoder.fromRow(row); Assert.assertEquals(deserializedBean.f1(), Optional.empty()); + Assert.assertEquals(deserializedBean.f2(), OptionalInt.empty()); + Assert.assertEquals(deserializedBean.f3(), OptionalLong.empty()); + Assert.assertEquals(deserializedBean.f4(), OptionalDouble.empty()); } @Test public void testPresentOptional() { - final OptionalType bean1 = new OptionalTypeImpl(Optional.of("42")); + final OptionalTypeImpl bean1 = new OptionalTypeImpl(); + bean1.f1 = Optional.of("42"); final RowEncoder encoder = Encoders.bean(OptionalType.class); final BinaryRow row = encoder.toRow(bean1); final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); @@ -178,6 +203,42 @@ public void testPresentOptional() { Assert.assertEquals(deserializedBean.f1(), Optional.of("42")); } + @Test + public void testPresentOptionalInteger() { + final OptionalTypeImpl bean1 = new OptionalTypeImpl(); + bean1.f2 = OptionalInt.of(42); + final RowEncoder encoder = Encoders.bean(OptionalType.class); + final BinaryRow row = encoder.toRow(bean1); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final OptionalType deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f2(), OptionalInt.of(42)); + } + + @Test + public void testPresentOptionalLong() { + final OptionalTypeImpl bean1 = new OptionalTypeImpl(); + bean1.f3 = OptionalLong.of(42); + final RowEncoder encoder = Encoders.bean(OptionalType.class); + final BinaryRow row = encoder.toRow(bean1); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final OptionalType deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f3(), OptionalLong.of(42)); + } + + @Test + public void testPresentOptionalDouble() { + final OptionalTypeImpl bean1 = new OptionalTypeImpl(); + bean1.f4 = OptionalDouble.of(42.42); + final RowEncoder encoder = Encoders.bean(OptionalType.class); + final BinaryRow row = encoder.toRow(bean1); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final OptionalType deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f4(), OptionalDouble.of(42.42)); + } + public static class Id { byte id; From 08dad965b1c9d68e3825b1f1c1d856a440e6d312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emre=20=C5=9Eafak?= <3928300+esafak@users.noreply.github.com> Date: Fri, 22 Aug 2025 22:19:04 -0400 Subject: [PATCH 19/22] docs: Improve pyfory PyPI documentation (#2498) ## What does this PR do? The existing PyPI documentation for `pyfory` was a single line and uninformative. This change improves the documentation by: 1. Creating a new `python/README.md` with a clear and concise description of the project, installation instructions, and usage examples. This file is used to generate the PyPI page description. 2. Moving the developer-focused build and test instructions to a new `python/CONTRIBUTING.md` file. This will provide a much better experience for Python developers who discover `pyfory` on PyPI, similar to the documentation for other popular libraries like `opendal` and `msgpack`. ## Related issues Closes #2488 --- python/CONTRIBUTING.md | 73 +++++++++++++++++++++++ python/README.md | 131 +++++++++++++++++++++++++---------------- 2 files changed, 154 insertions(+), 50 deletions(-) create mode 100644 python/CONTRIBUTING.md diff --git a/python/CONTRIBUTING.md b/python/CONTRIBUTING.md new file mode 100644 index 0000000000..fbe24c77dd --- /dev/null +++ b/python/CONTRIBUTING.md @@ -0,0 +1,73 @@ +# Contributing to Apache Fory Python + +This document provides instructions for building and testing the `pyfory` package. + +## Building + +```bash +cd python +# Uninstall numpy first so that when we install pyarrow, it will install the correct numpy version automatically. +# For Python versions less than 3.13, numpy 2 is not currently supported. +pip uninstall -y numpy +# Install necessary environment for Python < 3.13. +pip install pyarrow==15.0.0 Cython wheel pytest +# For Python 3.13, pyarrow 18.0.0 is available and requires numpy version greater than 2. +# pip install pyarrow==18.0.0 Cython wheel pytest +pip install -v -e . +``` + +If the last steps fails with an error like `libarrow_python.dylib: No such file or directory`, +you are probably suffering from bazel's aggressive caching; the sought library is longer at the +temporary directory it was the last time bazel ran. To remedy this run + +> bazel clean --expunge + +In this situation, you might also find it fruitful to run bazel yourself before pip: + +> bazel build -s //:cp_fory_so + +### Environment Requirements + +- python 3.8+ + +## Testing + +```bash +cd python +pytest -v -s . +``` + +## Formatting + +```bash +cd python +pip install ruff +ruff format python +``` + +## Debugging + +```bash +cd python +python setup.py develop +``` + +- Use `cython --cplus -a pyfory/_serialization.pyx` to produce an annotated HTML file of the source code. Then you can + analyze interaction between Python objects and Python's C API. +- Read more: + +```bash +FORY_DEBUG=true python setup.py build_ext --inplace +# For linux +cygdb build +``` + +### Debugging with lldb + +```bash +lldb +(lldb) target create -- python +(lldb) settings set -- target.run-args "-c" "from pyfory.tests.test_serializer import test_enum; test_enum()" +(lldb) run +(lldb) bt +``` diff --git a/python/README.md b/python/README.md index fba1679573..fb29c7dc58 100644 --- a/python/README.md +++ b/python/README.md @@ -1,73 +1,104 @@ # Apache Fory™ Python -Fory is a blazingly-fast multi-language serialization framework powered by just-in-time compilation and zero-copy. +[![Build Status](https://img.shields.io/github/actions/workflow/status/apache/fory/ci.yml?branch=main&style=for-the-badge&label=GITHUB%20ACTIONS&logo=github)](https://github.com/apache/fory/actions/workflows/ci.yml) +[![PyPI](https://img.shields.io/pypi/v/pyfory.svg?logo=PyPI)](https://pypi.org/project/pyfory/) +[![Slack Channel](https://img.shields.io/badge/slack-join-3f0e40?logo=slack&style=for-the-badge)](https://join.slack.com/t/fory-project/shared_invite/zt-36g0qouzm-kcQSvV_dtfbtBKHRwT5gsw) +[![X](https://img.shields.io/badge/@ApacheFory-follow-blue?logo=x&style=for-the-badge)](https://x.com/ApacheFory) -## Build Fory Python +**Apache Fory** (formerly _Fury_) is a blazing fast multi-language serialization framework powered by **JIT** (just-in-time compilation) and **zero-copy**, providing up to 170x performance and ease of use. + +This package provides the Python bindings for Apache Fory. + +## Installation + +You can install `pyfory` using pip: ```bash -cd python -# Uninstall numpy first so that when we install pyarrow, it will install the correct numpy version automatically. -# For Python versions less than 3.13, numpy 2 is not currently supported. -pip uninstall -y numpy -# Install necessary environment for Python < 3.13. -pip install pyarrow==15.0.0 Cython wheel pytest -# For Python 3.13, pyarrow 18.0.0 is available and requires numpy version greater than 2. -# pip install pyarrow==18.0.0 Cython wheel pytest -pip install -v -e . +pip install pyfory ``` -If the last steps fails with an error like `libarrow_python.dylib: No such file or directory`, -you are probably suffering from bazel's aggressive caching; the sought library is longer at the -temporary directory it was the last time bazel ran. To remedy this run - -> bazel clean --expunge +## Quickstart -In this situation, you might also find it fruitful to run bazel yourself before pip: +Here are a few examples of how to use `pyfory` for serialization. -> bazel build -s //:cp_fory_so +### Basic Serialization -### Environment Requirements +This example shows how to serialize and deserialize a simple Python object. -- python 3.8+ +```python +from typing import Dict +import pyfory -## Testing +class SomeClass: + f1: "SomeClass" + f2: Dict[str, str] + f3: Dict[str, str] -```bash -cd python -pytest -v -s . +fory = pyfory.Fory(ref_tracking=True) +fory.register_type(SomeClass, typename="example.SomeClass") +obj = SomeClass() +obj.f2 = {"k1": "v1", "k2": "v2"} +obj.f1, obj.f3 = obj, obj.f2 +data = fory.serialize(obj) +# bytes can be data serialized by other languages. +print(fory.deserialize(data)) ``` -## Code Style +### Cross-language Serialization -```bash -cd python -pip install ruff -ruff format python -``` +Fory excels at cross-language serialization. You can serialize data in Python and deserialize it in another language like Java or Go, and vice-versa. -## Debug +Here's an example of how to serialize an object in Python and deserialize it in Java: -```bash -cd python -python setup.py develop -``` +**Python** -- Use `cython --cplus -a pyfory/_serialization.pyx` to produce an annotated HTML file of the source code. Then you can - analyze interaction between Python objects and Python's C API. -- Read more: +```python +from typing import Dict +import pyfory -```bash -FORY_DEBUG=true python setup.py build_ext --inplace -# For linux -cygdb build -``` +class SomeClass: + f1: "SomeClass" + f2: Dict[str, str] + f3: Dict[str, str] -## Debug with lldb +fory = pyfory.Fory(ref_tracking=True) +fory.register_type(SomeClass, typename="example.SomeClass") +obj = SomeClass() +obj.f2 = {"k1": "v1", "k2": "v2"} +obj.f1, obj.f3 = obj, obj.f2 +data = fory.serialize(obj) +# `data` can now be sent to a Java application +``` -```bash -lldb -(lldb) target create -- python -(lldb) settings set -- target.run-args "-c" "from pyfory.tests.test_serializer import test_enum; test_enum()" -(lldb) run -(lldb) bt +**Java** + +```java +import org.apache.fory.*; +import org.apache.fory.config.*; +import java.util.*; + +public class ReferenceExample { + public static class SomeClass { + SomeClass f1; + Map f2; + Map f3; + } + + public static void main(String[] args) { + Fory fory = Fory.builder().withLanguage(Language.XLANG) + .withRefTracking(true).build(); + fory.register(SomeClass.class, "example.SomeClass"); + // `bytes` would be the data received from the Python application + byte[] bytes = ... + System.out.println(fory.deserialize(bytes)); + } +} ``` + +## Useful Links + +- **[Project Website](https://fory.apache.org)** +- **[Documentation](https://fory.apache.org/docs/latest/python_guide/)** +- **[GitHub Repository](https://github.com/apache/fory)** +- **[Issue Tracker](https://github.com/apache/fory/issues)** +- **[Slack Channel](https://join.slack.com/t/fory-project/shared_invite/zt-36g0qouzm-kcQSvV_dtfbtBKHRwT5gsw)** From 49746f34646dd81e0c2e3ea751be57b9c6788c34 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Sat, 23 Aug 2025 10:42:41 +0800 Subject: [PATCH 20/22] docs(python): add row format doc (#2499) ## What does this PR do? ## Related issues #2498 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- python/README.md | 76 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/python/README.md b/python/README.md index fb29c7dc58..963c8409a0 100644 --- a/python/README.md +++ b/python/README.md @@ -46,7 +46,7 @@ print(fory.deserialize(data)) ### Cross-language Serialization -Fory excels at cross-language serialization. You can serialize data in Python and deserialize it in another language like Java or Go, and vice-versa. +Apache Fory excels at cross-language serialization. You can serialize data in Python and deserialize it in another language like Java or Go, and vice-versa. Here's an example of how to serialize an object in Python and deserialize it in Java: @@ -95,6 +95,80 @@ public class ReferenceExample { } ``` +### Row Format Zero-Copy Partial Serialzation + +Apache Fory provide a random-access row format, which supports map a typed nested struct into a binary and read its nested element without deserializing the whole binary. This can be used to minimize teh deserialization overhead for huge objects in the case where you only needs to access part of the data. You can even encode huge objects into binary and write to file, then mmap that file into memory to reduce memory overhead too. + +**Python** + +```python +@dataclass +class Bar: + f1: str + f2: List[pa.int64] +@dataclass +class Foo: + f1: pa.int32 + f2: List[pa.int32] + f3: Dict[str, pa.int32] + f4: List[Bar] + +encoder = pyfory.encoder(Foo) +foo = Foo(f1=10, f2=list(range(1000_000)), + f3={f"k{i}": i for i in range(1000_000)}, + f4=[Bar(f1=f"s{i}", f2=list(range(10))) for i in range(1000_000)]) +binary: bytes = encoder.to_row(foo).to_bytes() +foo_row = pyfory.RowData(encoder.schema, binary) +print(foo_row.f2[100000], foo_row.f4[100000].f1, foo_row.f4[200000].f2[5]) +``` + +**Java** + +```java +public class Bar { + String f1; + List f2; +} + +public class Foo { + int f1; + List f2; + Map f3; + List f4; +} + +RowEncoder encoder = Encoders.bean(Foo.class); +Foo foo = new Foo(); +foo.f1 = 10; +foo.f2 = IntStream.range(0, 1000000).boxed().collect(Collectors.toList()); +foo.f3 = IntStream.range(0, 1000000).boxed().collect(Collectors.toMap(i -> "k"+i, i->i)); +List bars = new ArrayList<>(1000000); +for (int i = 0; i < 1000000; i++) { + Bar bar = new Bar(); + bar.f1 = "s"+i; + bar.f2 = LongStream.range(0, 10).boxed().collect(Collectors.toList()); + bars.add(bar); +} +foo.f4 = bars; +// Can be zero-copy read by python +BinaryRow binaryRow = encoder.toRow(foo); +// can be data from python +Foo newFoo = encoder.fromRow(binaryRow); +// zero-copy read List f2 +BinaryArray binaryArray2 = binaryRow.getArray(1); +// zero-copy read List f4 +BinaryArray binaryArray4 = binaryRow.getArray(3); +// zero-copy read 11th element of `readList f4` +BinaryRow barStruct = binaryArray4.getStruct(10); + +// zero-copy read 6th of f2 of 11th element of `readList f4` +barStruct.getArray(1).getInt64(5); +RowEncoder barEncoder = Encoders.bean(Bar.class); +// deserialize part of data. +Bar newBar = barEncoder.fromRow(barStruct); +Bar newBar2 = barEncoder.fromRow(binaryArray4.getStruct(20)); +``` + ## Useful Links - **[Project Website](https://fory.apache.org)** From 50f5d4d4ceca0942af5f02328bf0db1b40362cb1 Mon Sep 17 00:00:00 2001 From: opensnail Date: Sat, 23 Aug 2025 12:11:55 +0800 Subject: [PATCH 21/22] fix: Fix the issue of addListener not releasing the lock (#2500) Co-authored-by: opensnail <598092184@qq.com> --- .../main/java/org/apache/fory/resolver/AllowListChecker.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/AllowListChecker.java b/java/fory-core/src/main/java/org/apache/fory/resolver/AllowListChecker.java index 507c9522d7..b49ded2e4f 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/AllowListChecker.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/AllowListChecker.java @@ -237,8 +237,9 @@ private void disallow(String classNameOrPrefix) { public void addListener(ClassResolver classResolver) { try { lock.writeLock().lock(); - } finally { listeners.put(classResolver, true); + } finally { + lock.writeLock().unlock(); } } From a0b1c2122ee428ad01954e2924a5ab5e109c413d Mon Sep 17 00:00:00 2001 From: PAN <1162953505@qq.com> Date: Sun, 24 Aug 2025 11:05:52 +0800 Subject: [PATCH 22/22] bump version --- integration_tests/graalvm_tests/pom.xml | 2 +- integration_tests/jdk_compatibility_tests/pom.xml | 2 +- integration_tests/jpms_tests/pom.xml | 2 +- integration_tests/latest_jdk_tests/pom.xml | 2 +- java/benchmark/pom.xml | 2 +- java/fory-core/pom.xml | 2 +- java/fory-extensions/pom.xml | 2 +- java/fory-format/pom.xml | 2 +- java/fory-test-core/pom.xml | 2 +- java/fory-testsuite/pom.xml | 2 +- java/pom.xml | 2 +- javascript/packages/fory/package.json | 2 +- javascript/packages/hps/package.json | 2 +- kotlin/pom.xml | 2 +- python/pyfory/__init__.py | 2 +- rust/Cargo.toml | 2 +- scala/build.sbt | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/integration_tests/graalvm_tests/pom.xml b/integration_tests/graalvm_tests/pom.xml index 84da032e4d..236bcf6254 100644 --- a/integration_tests/graalvm_tests/pom.xml +++ b/integration_tests/graalvm_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 ../../java 4.0.0 diff --git a/integration_tests/jdk_compatibility_tests/pom.xml b/integration_tests/jdk_compatibility_tests/pom.xml index a2eb0f6b88..891782a14c 100644 --- a/integration_tests/jdk_compatibility_tests/pom.xml +++ b/integration_tests/jdk_compatibility_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 ../../java 4.0.0 diff --git a/integration_tests/jpms_tests/pom.xml b/integration_tests/jpms_tests/pom.xml index 00e5a6b948..63d9b522d4 100644 --- a/integration_tests/jpms_tests/pom.xml +++ b/integration_tests/jpms_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 ../../java 4.0.0 diff --git a/integration_tests/latest_jdk_tests/pom.xml b/integration_tests/latest_jdk_tests/pom.xml index e0bc170228..94f4d1b687 100644 --- a/integration_tests/latest_jdk_tests/pom.xml +++ b/integration_tests/latest_jdk_tests/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 ../../java 4.0.0 diff --git a/java/benchmark/pom.xml b/java/benchmark/pom.xml index f4a5990af3..bc8eccfa93 100644 --- a/java/benchmark/pom.xml +++ b/java/benchmark/pom.xml @@ -26,7 +26,7 @@ fory-parent org.apache.fory - 0.13.0-SNAPSHOT + 0.12.1 benchmark diff --git a/java/fory-core/pom.xml b/java/fory-core/pom.xml index 3b8a084db4..79f0ac1dba 100644 --- a/java/fory-core/pom.xml +++ b/java/fory-core/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/java/fory-extensions/pom.xml b/java/fory-extensions/pom.xml index 28677ec42b..79d4056e66 100644 --- a/java/fory-extensions/pom.xml +++ b/java/fory-extensions/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/java/fory-format/pom.xml b/java/fory-format/pom.xml index 42f27a3103..2d9a18f441 100644 --- a/java/fory-format/pom.xml +++ b/java/fory-format/pom.xml @@ -25,7 +25,7 @@ org.apache.fory fory-parent - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/java/fory-test-core/pom.xml b/java/fory-test-core/pom.xml index 0697219038..67a6f90d0d 100644 --- a/java/fory-test-core/pom.xml +++ b/java/fory-test-core/pom.xml @@ -25,7 +25,7 @@ fory-parent org.apache.fory - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/java/fory-testsuite/pom.xml b/java/fory-testsuite/pom.xml index 53c88f8d55..9a6f3c63fa 100644 --- a/java/fory-testsuite/pom.xml +++ b/java/fory-testsuite/pom.xml @@ -25,7 +25,7 @@ fory-parent org.apache.fory - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 12ebf75209..b4f7a42c56 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -33,7 +33,7 @@ org.apache.fory fory-parent pom - 0.13.0-SNAPSHOT + 0.12.1 Fory Project Parent POM Apache Fory™ is a blazingly fast multi-language serialization framework powered by jit and zero-copy. diff --git a/javascript/packages/fory/package.json b/javascript/packages/fory/package.json index e30e9f6319..82bd60d843 100644 --- a/javascript/packages/fory/package.json +++ b/javascript/packages/fory/package.json @@ -1,6 +1,6 @@ { "name": "@foryjs/fory", - "version": "0.13.0.dev", + "version": "0.12.1", "description": "Apache Fory™ is a blazingly fast multi-language serialization framework powered by jit and zero-copy", "main": "dist/index.js", "scripts": { diff --git a/javascript/packages/hps/package.json b/javascript/packages/hps/package.json index 03fa1d040a..394afd738c 100644 --- a/javascript/packages/hps/package.json +++ b/javascript/packages/hps/package.json @@ -1,6 +1,6 @@ { "name": "@foryjs/hps", - "version": "0.13.0.dev", + "version": "0.12.1", "description": "Apache Fory™ nodejs high-performance suite", "main": "dist/index.js", "files": [ diff --git a/kotlin/pom.xml b/kotlin/pom.xml index 4c7f7601be..a74178c13c 100644 --- a/kotlin/pom.xml +++ b/kotlin/pom.xml @@ -30,7 +30,7 @@ org.apache.fory fory-kotlin - 0.13.0-SNAPSHOT + 0.12.1 4.0.0 diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index 4a5e46396d..6383048bdb 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -61,4 +61,4 @@ except (AttributeError, ImportError): pass -__version__ = "0.13.0.dev" +__version__ = "0.12.1" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8d75b5bdf1..d2032b22f1 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -30,7 +30,7 @@ exclude = [ resolver = "2" [workspace.package] -version = "0.13.0" +version = "0.12.1" rust-version = "1.70" license = "Apache-2.0" readme = "README.md" diff --git a/scala/build.sbt b/scala/build.sbt index b76c3ba679..7646cac8a5 100644 --- a/scala/build.sbt +++ b/scala/build.sbt @@ -16,7 +16,7 @@ * limitations under the License. */ -val foryVersion = "0.13.0-SNAPSHOT" +val foryVersion = "0.12.1" val scala213Version = "2.13.15" ThisBuild / apacheSonatypeProjectProfile := "fory" version := foryVersion