Skip to content

Commit 1fd14b5

Browse files
committed
re: fix tests
1 parent 088ecf6 commit 1fd14b5

2 files changed

Lines changed: 80 additions & 16 deletions

File tree

native/src/regex_wrapper.cpp

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
#include <regex>
33
#include <string>
44
#include <unordered_map>
5+
#include <vector>
56
#include <memory>
7+
#include <cstring>
68

79
// A map to store compiled regex objects
810
std::unordered_map<int, std::shared_ptr<std::regex>> regex_cache;
@@ -54,32 +56,69 @@ extern "C" {
5456
std::smatch match;
5557
std::string str(text);
5658
if (std::regex_search(str, match, *it->second)) {
57-
return match.str().c_str(); // Return the matched substring
59+
return strdup(match.str().c_str()); // Return the matched substring
5860
}
5961
return nullptr; // Return nullptr if no match is found
6062
}
6163

64+
6265
// Find all matches of a regex pattern in a string
63-
const char* findall_pattern(int id, const char* text) {
66+
extern "C" char** findall_pattern(int id, const char* text) {
6467
auto it = regex_cache.find(id);
6568
if (it == regex_cache.end()) {
6669
return nullptr; // Return nullptr if the ID is not found
6770
}
6871

6972
std::string str(text);
7073
std::smatch match;
71-
std::string result;
74+
std::vector<std::string> matches;
7275
std::string::const_iterator searchStart(str.cbegin());
7376

77+
// Find all matches
7478
while (std::regex_search(searchStart, str.cend(), match, *it->second)) {
75-
result += match.str() + "\n"; // Append each match to the result string
79+
matches.push_back(match.str()); // Store each match in the vector
7680
searchStart = match.suffix().first;
7781
}
7882

79-
if (!result.empty()) {
80-
return result.c_str(); // Return all matches as a single string separated by newlines
83+
if (matches.empty()) {
84+
return nullptr; // Return nullptr if no matches are found
85+
}
86+
87+
// Allocate an array of char* to hold the matches
88+
char** result = (char**)malloc((matches.size() + 1) * sizeof(char*));
89+
if (!result) {
90+
return nullptr; // Return nullptr if memory allocation fails
91+
}
92+
93+
// Copy each match into the array
94+
for (size_t i = 0; i < matches.size(); ++i) {
95+
result[i] = strdup(matches[i].c_str()); // Duplicate the string
96+
if (!result[i]) {
97+
// Free previously allocated memory if strdup fails
98+
for (size_t j = 0; j < i; ++j) {
99+
free(result[j]);
100+
}
101+
free(result);
102+
return nullptr;
103+
}
104+
}
105+
106+
// Null-terminate the array
107+
result[matches.size()] = nullptr;
108+
109+
return result;
110+
}
111+
112+
// Function to free the allocated memory for the matches
113+
extern "C" void free_matches(char** matches) {
114+
if (!matches) {
115+
return;
116+
}
117+
118+
for (size_t i = 0; matches[i] != nullptr; ++i) {
119+
free(matches[i]); // Free each string
81120
}
82-
return nullptr; // Return nullptr if no matches are found
121+
free(matches); // Free the array itself
83122
}
84123

85124
// Substitute all occurrences of a regex pattern in a string
@@ -91,6 +130,6 @@ extern "C" {
91130

92131
std::string str(text);
93132
std::string result = std::regex_replace(str, *it->second, replacement);
94-
return result.c_str(); // Return the modified string
133+
return strdup(result.c_str()); // Return the modified string
95134
}
96135
}

src/stdlib/re.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import cffi
2+
13
from stdlib._cffi_util import load_library
24

35
# Define the C interface
@@ -6,9 +8,15 @@
68
bool match_compiled(int id, const char* text);
79
void release_compiled(int id);
810
bool match(const char* pattern, const char* text);
11+
const char* search_pattern(int id, const char* text);
12+
char** findall_pattern(int id, const char* text);
13+
void free_matches(char** matches);
14+
const char* substitute_pattern(int id, const char* text, const char* replacement);
15+
void free(void *ptr);
916
"""
1017

1118
# Load the shared library
19+
ffi = cffi.FFI()
1220
lib = load_library("regex_wrapper", interface)
1321

1422

@@ -35,25 +43,42 @@ def search(self, text: str) -> str | None:
3543
# Search for the compiled regex in the text
3644
result = lib.search_pattern(self.id, text.encode("utf-8")) # type: ignore
3745
if result:
38-
return result.decode("utf-8")
46+
ptr = result
47+
try:
48+
result_bytes: bytes = ffi.string(result)
49+
return result_bytes.decode("utf-8")
50+
finally:
51+
lib.free(ptr)
3952
return None
4053

4154
def findall(self, text: str) -> list[str]:
4255
# Find all matches of the compiled regex in the text
43-
result = lib.search_pattern(self.id, text.encode("utf-8")) # type: ignore
44-
if result:
45-
return result.decode("utf-8").split("\n")[
46-
:-1
47-
] # Split by newline and remove the last empty string
48-
return []
56+
matches_ptr = lib.findall_pattern(self.id, text.encode("utf-8")) # type: ignore
57+
matches = []
58+
if matches_ptr:
59+
try:
60+
# Convert the array of C strings to a Python list
61+
i = 0
62+
while matches_ptr[i]:
63+
matches.append(ffi.string(matches_ptr[i]).decode("utf-8"))
64+
i += 1
65+
finally:
66+
# Free the allocated memory
67+
lib.free_matches(matches_ptr)
68+
return matches
4969

5070
def sub(self, replacement: str, text: str) -> str:
5171
# Substitute all occurrences of the compiled regex in the text
5272
result = lib.substitute_pattern( # type: ignore
5373
self.id, text.encode("utf-8"), replacement.encode("utf-8")
5474
)
5575
if result:
56-
return result.decode("utf-8")
76+
ptr = result
77+
try:
78+
result_bytes: bytes = ffi.string(result)
79+
return result_bytes.decode("utf-8")
80+
finally:
81+
lib.free(ptr)
5782
return text # Return the original text if substitution fails
5883

5984

0 commit comments

Comments
 (0)