Skip to content

Commit 38ea60e

Browse files
authored
[AINode] Eliminate transformers registered and fix build bug (#17615)
1 parent d4be5c8 commit 38ea60e

4 files changed

Lines changed: 6 additions & 459 deletions

File tree

integration-test/src/test/java/org/apache/iotdb/ainode/it/AINodeModelManageIT.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,6 @@ public void userDefinedModelManagementTestInTree() throws SQLException, Interrup
7777
registerUserDefinedModel(statement, modelInfo, "file:///data/chronos2");
7878
callInferenceTest(statement, modelInfo);
7979
dropUserDefinedModel(statement, modelInfo.getModelId());
80-
errorTest(
81-
statement,
82-
"create model origin_chronos using uri \"file:///data/chronos2_origin\"",
83-
"1505: 't5' is already used by a Transformers config, pick another name.");
84-
statement.execute("drop model origin_chronos");
8580

8681
// Test PytorchModelHubMixin model (mantis) in tree.
8782
modelInfo = new FakeModelInfo("user_mantis", "custom_mantis", "user_defined", "active");
@@ -100,11 +95,6 @@ public void userDefinedModelManagementTestInTable() throws SQLException, Interru
10095
registerUserDefinedModel(statement, modelInfo, "file:///data/chronos2");
10196
forecastTableFunctionTest(statement, modelInfo);
10297
dropUserDefinedModel(statement, modelInfo.getModelId());
103-
errorTest(
104-
statement,
105-
"create model origin_chronos using uri \"file:///data/chronos2_origin\"",
106-
"1505: 't5' is already used by a Transformers config, pick another name.");
107-
statement.execute("drop model origin_chronos");
10898

10999
// Test PytorchModelHubMixin model (mantis) in table.
110100
modelInfo = new FakeModelInfo("user_mantis", "custom_mantis", "user_defined", "active");

iotdb-core/ainode/build_binary.py

Lines changed: 5 additions & 232 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
"""
2323

2424
import os
25-
import shutil
2625
import subprocess
2726
import sys
2827
from pathlib import Path
@@ -122,7 +121,6 @@ def get_venv_env(venv_dir):
122121
123122
Sets VIRTUAL_ENV and prepends the venv's bin/Scripts directory to PATH
124123
so that tools installed in the venv take precedence.
125-
Also sets POETRY_VIRTUALENVS_PATH to force poetry to use our venv.
126124
127125
Returns:
128126
dict: Environment variables dictionary
@@ -133,10 +131,6 @@ def get_venv_env(venv_dir):
133131
venv_bin = str(venv_dir / ("Scripts" if sys.platform == "win32" else "bin"))
134132
env["PATH"] = f"{venv_bin}{os.pathsep}{env.get('PATH', '')}"
135133

136-
# Force poetry to use our virtual environment by setting POETRY_VIRTUALENVS_PATH
137-
# This tells poetry where to look for/create virtual environments
138-
env["POETRY_VIRTUALENVS_PATH"] = str(venv_dir.parent.absolute())
139-
140134
return env
141135

142136

@@ -159,10 +153,11 @@ def install_dependencies(venv_python, venv_dir, script_dir):
159153
venv_env = get_venv_env(venv_dir)
160154
poetry_exe = get_poetry_executable(venv_dir)
161155

162-
# Configure poetry settings
156+
# Configure poetry to NOT create its own virtual environments.
157+
# Poetry will use the already-activated venv via the VIRTUAL_ENV
158+
# environment variable set in get_venv_env().
163159
print("Configuring poetry settings...")
164160
try:
165-
# Set poetry to not create venvs in project directory
166161
subprocess.run(
167162
[str(poetry_exe), "config", "virtualenvs.in-project", "false"],
168163
cwd=str(script_dir),
@@ -171,24 +166,8 @@ def install_dependencies(venv_python, venv_dir, script_dir):
171166
capture_output=True,
172167
text=True,
173168
)
174-
# Set poetry virtualenvs path to our venv directory's parent
175-
# This forces poetry to look for/create venvs in the same location as our venv
176-
subprocess.run(
177-
[
178-
str(poetry_exe),
179-
"config",
180-
"virtualenvs.path",
181-
str(venv_dir.parent.absolute()),
182-
],
183-
cwd=str(script_dir),
184-
env=venv_env,
185-
check=True,
186-
capture_output=True,
187-
text=True,
188-
)
189-
# Ensure poetry can use virtual environments
190169
subprocess.run(
191-
[str(poetry_exe), "config", "virtualenvs.create", "true"],
170+
[str(poetry_exe), "config", "virtualenvs.create", "false"],
192171
cwd=str(script_dir),
193172
env=venv_env,
194173
check=True,
@@ -197,28 +176,8 @@ def install_dependencies(venv_python, venv_dir, script_dir):
197176
)
198177
except Exception as e:
199178
print(f"Warning: Failed to configure poetry settings: {e}")
200-
# Continue anyway, as these may not be critical
201179

202-
# Remove any existing poetry virtual environments for this project
203-
# This ensures poetry will use our specified virtual environment
204-
print("Removing any existing poetry virtual environments...")
205-
remove_result = subprocess.run(
206-
[str(poetry_exe), "env", "remove", "--all"],
207-
cwd=str(script_dir),
208-
env=venv_env,
209-
check=False, # Don't fail if no venv exists
210-
capture_output=True,
211-
text=True,
212-
)
213-
if remove_result.stdout:
214-
print(remove_result.stdout.strip())
215-
if remove_result.stderr:
216-
stderr = remove_result.stderr.strip()
217-
# Ignore "No virtualenv has been activated" error
218-
if "no virtualenv" not in stderr.lower():
219-
print(remove_result.stderr.strip())
220-
221-
# Verify the virtual environment Python is valid before configuring poetry
180+
# Verify the virtual environment Python is valid
222181
print(f"Verifying virtual environment Python at: {venv_python}")
223182
if not venv_python.exists():
224183
print(f"ERROR: Virtual environment Python not found at: {venv_python}")
@@ -235,190 +194,8 @@ def install_dependencies(venv_python, venv_dir, script_dir):
235194
sys.exit(1)
236195
print(f" Python version: {python_version_result.stdout.strip()}")
237196

238-
# Instead of using poetry env use (which creates new venvs), we'll use a different approach:
239-
# 1. Create a symlink from poetry's expected venv location to our venv
240-
# 2. Or, directly use poetry install with VIRTUAL_ENV set (poetry should detect it)
241-
#
242-
# The issue is that poetry env use creates venvs with hash-based names in its cache.
243-
# We need to work around this by either:
244-
# - Creating a symlink from poetry's expected location to our venv
245-
# - Or bypassing poetry env use entirely and using poetry install directly
246-
247-
# Strategy: Create a symlink from poetry's expected venv location to our venv
248-
# Poetry creates venvs with names like: <project-name>-<hash>-py<python-version>
249-
# We need to find out what poetry would name our venv, then create a symlink
250-
251-
print(f"Configuring poetry to use virtual environment at: {venv_dir}")
252-
253-
# Get poetry's expected venv name by checking what it would create
254-
# First, let's try poetry env use, but catch if it tries to create a new venv
255-
result = subprocess.run(
256-
[str(poetry_exe), "env", "use", str(venv_python)],
257-
cwd=str(script_dir),
258-
env=venv_env,
259-
check=False,
260-
capture_output=True,
261-
text=True,
262-
)
263-
264-
output_text = (result.stdout or "") + (result.stderr or "")
265-
266-
# If poetry is creating a new venv, we need to stop it and use a different approach
267-
if (
268-
"Creating virtualenv" in output_text
269-
or "Creating virtual environment" in output_text
270-
or "Using virtualenv:" in output_text
271-
):
272-
print("Poetry is attempting to create/use a new virtual environment.")
273-
print(
274-
"Stopping this and using alternative approach: creating symlink to our venv..."
275-
)
276-
277-
# Extract the venv path poetry is trying to create/use
278-
# Look for patterns like "Using virtualenv: /path/to/venv" or "Creating virtualenv name in /path"
279-
import re
280-
281-
poetry_venv_path = None
282-
283-
# Try to extract from "Using virtualenv: /path/to/venv"
284-
using_match = re.search(r"Using virtualenv:\s*([^\s\n]+)", output_text)
285-
if using_match:
286-
poetry_venv_path = Path(using_match.group(1))
287-
288-
# If not found, try to extract from "Creating virtualenv name in /path"
289-
if not poetry_venv_path:
290-
creating_match = re.search(
291-
r"Creating virtualenv[^\n]*in\s+([^\s\n]+)", output_text
292-
)
293-
if creating_match:
294-
venv_dir_path = Path(creating_match.group(1))
295-
# Extract venv name from the output
296-
name_match = re.search(r"Creating virtualenv\s+([^\s]+)", output_text)
297-
if name_match:
298-
venv_name = name_match.group(1)
299-
poetry_venv_path = venv_dir_path / venv_name
300-
301-
# If still not found, try to find any path in pypoetry/virtualenvs
302-
if not poetry_venv_path:
303-
pypoetry_match = re.search(
304-
r"([^\s]+pypoetry[^\s]*virtualenvs[^\s]+)", output_text
305-
)
306-
if pypoetry_match:
307-
poetry_venv_path = Path(pypoetry_match.group(1))
308-
309-
if poetry_venv_path:
310-
print(f"Poetry wants to create/use venv at: {poetry_venv_path}")
311-
312-
# Remove the venv poetry just created (if it exists)
313-
if poetry_venv_path.exists() and poetry_venv_path.is_dir():
314-
print(f"Removing poetry's newly created venv: {poetry_venv_path}")
315-
shutil.rmtree(poetry_venv_path, ignore_errors=True)
316-
317-
# Create a symlink from poetry's expected location to our venv
318-
print(f"Creating symlink from {poetry_venv_path} to {venv_dir}")
319-
try:
320-
if poetry_venv_path.exists() or poetry_venv_path.is_symlink():
321-
if poetry_venv_path.is_symlink():
322-
poetry_venv_path.unlink()
323-
elif poetry_venv_path.is_dir():
324-
shutil.rmtree(poetry_venv_path, ignore_errors=True)
325-
poetry_venv_path.parent.mkdir(parents=True, exist_ok=True)
326-
poetry_venv_path.symlink_to(venv_dir)
327-
print(f"Symlink created successfully")
328-
except Exception as e:
329-
print(f"WARNING: Failed to create symlink: {e}")
330-
print("Will try to use poetry install directly with VIRTUAL_ENV set")
331-
else:
332-
print("Could not determine poetry's venv path from output")
333-
print(f"Output was: {output_text}")
334-
else:
335-
if result.stdout:
336-
print(result.stdout.strip())
337-
if result.stderr:
338-
stderr = result.stderr.strip()
339-
if stderr:
340-
print(f"Poetry output: {stderr}")
341-
342-
# Verify poetry is using the correct virtual environment BEFORE running lock/install
343-
# This is critical - if poetry uses the wrong venv, dependencies won't be installed correctly
344-
print("Verifying poetry virtual environment...")
345-
346-
# Wait a moment for symlink to be recognized (if we created one)
347-
import time
348-
349-
time.sleep(0.5)
350-
351-
verify_result = subprocess.run(
352-
[str(poetry_exe), "env", "info", "--path"],
353-
cwd=str(script_dir),
354-
env=venv_env,
355-
check=False, # Don't fail if poetry hasn't activated a venv yet
356-
capture_output=True,
357-
text=True,
358-
)
359-
360-
expected_venv_path_resolved = str(Path(venv_dir.absolute()).resolve())
361-
362-
# If poetry env info fails, it might mean poetry hasn't activated the venv yet
363-
if verify_result.returncode != 0:
364-
print(
365-
"Warning: poetry env info failed, poetry may not have activated the virtual environment yet"
366-
)
367-
print(
368-
"This may be okay if we created a symlink - poetry should use it when running commands"
369-
)
370-
poetry_venv_path_resolved = None
371-
else:
372-
poetry_venv_path = verify_result.stdout.strip()
373-
374-
# Normalize paths for comparison (resolve symlinks, etc.)
375-
poetry_venv_path_resolved = str(Path(poetry_venv_path).resolve())
376-
377-
# Only verify path if we successfully got poetry's venv path
378-
if poetry_venv_path_resolved is not None:
379-
if poetry_venv_path_resolved != expected_venv_path_resolved:
380-
print(
381-
f"ERROR: Poetry is using {poetry_venv_path}, but expected {expected_venv_path_resolved}"
382-
)
383-
print(
384-
"Poetry must use the virtual environment we created for the build to work correctly."
385-
)
386-
print("The symlink approach may not have worked. Please check the symlink.")
387-
sys.exit(1)
388-
else:
389-
print(f"Poetry is correctly using virtual environment: {poetry_venv_path}")
390-
else:
391-
print("Warning: Could not verify poetry virtual environment path")
392-
print(
393-
"Continuing anyway - poetry should use the venv via symlink or VIRTUAL_ENV"
394-
)
395-
396197
# Update lock file and install dependencies
397-
# Re-verify environment before each command to ensure poetry doesn't switch venvs
398-
def verify_poetry_env():
399-
verify_result = subprocess.run(
400-
[str(poetry_exe), "env", "info", "--path"],
401-
cwd=str(script_dir),
402-
env=venv_env,
403-
check=False, # Don't fail if poetry env info is not available
404-
capture_output=True,
405-
text=True,
406-
)
407-
if verify_result.returncode == 0:
408-
current_path = str(Path(verify_result.stdout.strip()).resolve())
409-
expected_path = str(Path(venv_dir.absolute()).resolve())
410-
if current_path != expected_path:
411-
print(
412-
f"ERROR: Poetry switched to different virtual environment: {current_path}"
413-
)
414-
print(f"Expected: {expected_path}")
415-
sys.exit(1)
416-
# If poetry env info fails, we can't verify, but continue anyway
417-
# Poetry should still use the Python we specified via env use
418-
return True
419-
420198
print("Running poetry lock...")
421-
verify_poetry_env() # Verify before lock
422199
result = subprocess.run(
423200
[str(poetry_exe), "lock"],
424201
cwd=str(script_dir),
@@ -434,7 +211,6 @@ def verify_poetry_env():
434211
if result.returncode != 0:
435212
print(f"ERROR: poetry lock failed with exit code {result.returncode}")
436213
sys.exit(1)
437-
verify_poetry_env() # Verify after lock
438214

439215
accelerator = detect_accelerator()
440216
print(f"Selected accelerator: {accelerator}")
@@ -447,12 +223,9 @@ def verify_poetry_env():
447223
check=True,
448224
text=True,
449225
)
450-
verify_poetry_env() # Verify before install
451226
poetry_install_with_accel(poetry_exe, script_dir, venv_env, accelerator)
452-
verify_poetry_env() # Verify after install
453227

454228
# Verify installation by checking if key packages are installed
455-
# This is critical - if packages aren't installed, PyInstaller won't find them
456229
print("Verifying package installation...")
457230
test_packages = ["torch", "transformers", "tokenizers"]
458231
missing_packages = []

iotdb-core/ainode/iotdb/ainode/core/model/model_info.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def __init__(
3232
repo_id: str = "",
3333
auto_map: Optional[Dict] = None,
3434
hub_mixin_cls: Optional[str] = None,
35-
transformers_registered: bool = False,
3635
):
3736
self.model_id = model_id
3837
self.model_type = model_type
@@ -42,7 +41,6 @@ def __init__(
4241
self.repo_id = repo_id
4342
self.auto_map = auto_map
4443
self.hub_mixin_cls = hub_mixin_cls
45-
self.transformers_registered = transformers_registered
4644

4745
def __repr__(self):
4846
return (
@@ -119,7 +117,6 @@ def __repr__(self):
119117
"AutoConfig": "configuration_timer.TimerConfig",
120118
"AutoModelForCausalLM": "modeling_timer.TimerForPrediction",
121119
},
122-
transformers_registered=True,
123120
),
124121
"sundial": ModelInfo(
125122
model_id="sundial",
@@ -132,7 +129,6 @@ def __repr__(self):
132129
"AutoConfig": "configuration_sundial.SundialConfig",
133130
"AutoModelForCausalLM": "modeling_sundial.SundialForPrediction",
134131
},
135-
transformers_registered=True,
136132
),
137133
"chronos2": ModelInfo(
138134
model_id="chronos2",
@@ -145,7 +141,6 @@ def __repr__(self):
145141
"AutoConfig": "config.Chronos2CoreConfig",
146142
"AutoModelForCausalLM": "model.Chronos2Model",
147143
},
148-
transformers_registered=True,
149144
),
150145
"moirai2": ModelInfo(
151146
model_id="moirai2",
@@ -158,7 +153,6 @@ def __repr__(self):
158153
"AutoConfig": "configuration_moirai2.Moirai2Config",
159154
"AutoModelForCausalLM": "modeling_moirai2.Moirai2ForPrediction",
160155
},
161-
transformers_registered=True,
162156
),
163157
"toto": ModelInfo(
164158
model_id="toto",
@@ -171,7 +165,6 @@ def __repr__(self):
171165
"AutoConfig": "configuration_toto.TotoConfig",
172166
"AutoModelForCausalLM": "modeling_toto.TotoForPrediction",
173167
},
174-
transformers_registered=True,
175168
),
176169
"moment": ModelInfo(
177170
model_id="moment",
@@ -184,6 +177,5 @@ def __repr__(self):
184177
"AutoConfig": "configuration_moment.MomentConfig",
185178
"AutoModelForCausalLM": "modeling_moment.MomentForPrediction",
186179
},
187-
transformers_registered=True,
188180
),
189181
}

0 commit comments

Comments
 (0)