Fix librt compilation on platforms with OpenMP (#20583)

ilevkivskyi · web-flow · commit c3c7eb1f065f · 2026-01-14T09:41:49.000Z
This file was missed when base64 support was originally vendored.
diff --git a/mypyc/build.py b/mypyc/build.py
@@ -97,6 +97,7 @@ class ModDesc(NamedTuple):
             "base64/arch/neon64/enc_loop_asm.c",
             "base64/codecs.h",
             "base64/env.h",
+            "base64/lib_openmp.c",
             "base64/tables/tables.h",
             "base64/tables/table_dec_32bit.h",
             "base64/tables/table_enc_12bit.h",
diff --git a/mypyc/lib-rt/base64/lib_openmp.c b/mypyc/lib-rt/base64/lib_openmp.c
@@ -0,0 +1,149 @@
+// This code makes some assumptions on the implementation of
+// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
+// Basically these assumptions boil down to that when breaking the src into
+// parts, out parts can be written without side effects.
+// This is met when:
+// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
+// 2) the shared variables src and out are not read or written outside of the
+//    bounds of their parts, i.e.  when base64_stream_encode() reads a multiple
+//    of 3 bytes, it must write no more then a multiple of 4 bytes, not even
+//    temporarily;
+// 3) the state flag can be discarded after base64_stream_encode() and
+//    base64_stream_decode() on the parts.
+
+static inline void
+base64_encode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	size_t s;
+	size_t t;
+	size_t sum = 0, len, last_len;
+	struct base64_state state, initial_state;
+	int num_threads, i;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 3 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 3);
+			len *= 3;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_encode_init(&state, flags);
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier for all threads to wait here
+		// for the above to complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			// Feed each part of the string to the stream reader:
+			base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
+			sum += s;
+		}
+	}
+
+	// As encoding should never fail and we encode an exact multiple
+	// of 3 bytes, we can discard state:
+	state = initial_state;
+
+	// Encode the remaining bytes:
+	base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
+
+	// Finalize the stream by writing trailer if any:
+	base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
+
+	// Final output length is stream length plus tail:
+	sum += s + t;
+	*outlen = sum;
+}
+
+static inline int
+base64_decode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	int num_threads, result = 0, i;
+	size_t sum = 0, len, last_len, s;
+	struct base64_state state, initial_state;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 4 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 4);
+			len *= 4;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_decode_init(&state, flags);
+
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier to wait here for the above to
+		// complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			int this_result;
+
+			// Feed each part of the string to the stream reader:
+			this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
+			sum += s;
+			result += this_result;
+		}
+	}
+
+	// If `result' equals `-num_threads', then all threads returned -1,
+	// indicating that the requested codec is not available:
+	if (result == -num_threads) {
+		return -1;
+	}
+
+	// If `result' does not equal `num_threads', then at least one of the
+	// threads hit a decode error:
+	if (result != num_threads) {
+		return 0;
+	}
+
+	// So far so good, now decode whatever remains in the buffer. Reuse the
+	// initial state, since we are at a 4-byte boundary:
+	state = initial_state;
+	result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
+	sum += s;
+	*outlen = sum;
+
+	// If when decoding a whole block, we're still waiting for input then fail:
+	if (result && (state.bytes == 0)) {
+		return result;
+	}
+	return 0;
+}