-
Notifications
You must be signed in to change notification settings - Fork 2.8k
fix: update file handling functions #4455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,47 @@ def init_sandbox_dir(): | |
| except Exception as e: | ||
| maxkb_logger.error(f'Exception: {e}', exc_info=True) | ||
|
|
||
| def exist_function(self, code_str, name): | ||
| _id = str(uuid.uuid7()) | ||
| python_paths = CONFIG.get_sandbox_python_package_paths().split(',') | ||
| set_run_user = f'os.setgid({pwd.getpwnam(_run_user).pw_gid});os.setuid({pwd.getpwnam(_run_user).pw_uid});' if _enable_sandbox else '' | ||
| _exec_code = f""" | ||
| try: | ||
| import os, sys, json | ||
| path_to_exclude = ['/opt/py3/lib/python3.11/site-packages', '/opt/maxkb-app/apps'] | ||
| sys.path = [p for p in sys.path if p not in path_to_exclude] | ||
| sys.path += {python_paths} | ||
| locals_v={{}} | ||
| globals_v={{}} | ||
| {set_run_user} | ||
| os.environ.clear() | ||
| exec({dedent(code_str)!a}, globals_v, locals_v) | ||
| exec_result=locals_v.__contains__('{name}') | ||
| sys.stdout.write("\\n{_id}:") | ||
| json.dump({{'code':200,'msg':'success','data':exec_result}}, sys.stdout, default=str) | ||
| except Exception as e: | ||
| if isinstance(e, MemoryError): e = Exception("Cannot allocate more memory: exceeded the limit of {_process_limit_mem_mb} MB.") | ||
| sys.stdout.write("\\n{_id}:") | ||
| json.dump({{'code':500,'msg':str(e),'data':False}}, sys.stdout, default=str) | ||
| sys.stdout.flush() | ||
| """ | ||
| maxkb_logger.debug(f"Sandbox execute code: {_exec_code}") | ||
| with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=True) as f: | ||
| f.write(_exec_code) | ||
| f.flush() | ||
| subprocess_result = self._exec(f.name) | ||
| if subprocess_result.returncode != 0: | ||
| raise Exception(subprocess_result.stderr or subprocess_result.stdout or "Unknown exception occurred") | ||
| lines = subprocess_result.stdout.splitlines() | ||
| result_line = [line for line in lines if line.startswith(_id)] | ||
| if not result_line: | ||
| maxkb_logger.error("\n".join(lines)) | ||
| raise Exception("No result found.") | ||
| result = json.loads(result_line[-1].split(":", 1)[1]) | ||
| if result.get('code') == 200: | ||
| return result.get('data') | ||
| raise Exception(result.get('msg')) | ||
|
|
||
| def exec_code(self, code_str, keywords, function_name=None): | ||
| _id = str(uuid.uuid7()) | ||
| action_function = f'({function_name !a}, locals_v.get({function_name !a}))' if function_name else 'locals_v.popitem()' | ||
|
|
@@ -213,7 +254,7 @@ def get_tool_mcp_config(self, code, params): | |
| ], | ||
| 'cwd': _sandbox_path, | ||
| 'env': { | ||
| 'LD_PRELOAD': f'{_sandbox_path}/lib/sandbox.so', | ||
| 'LD_PRELOAD': f'{_sandbox_path}/lib/sandbox.so', | ||
| }, | ||
| 'transport': 'stdio', | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code looks generally correct, but here are some suggestions for improvement:
Here's an improved version with these points considered: import uuid
import pwd
from subprocess import run, CalledProcessError
import json
from logging import getLogger
MAXKB_LOGGER = getLogger(__name__)
def get_sandbox_python_package_paths():
# Replace this with actual configuration management logic
return "/path/to/globally/defined/python_packages"
class SandboxManager:
def __init__(self, _process_limit_mem_mb):
self._process_limit_mem_mb = _process_limit_mem_mb
def init_sandbox_dir(self):
try:
# Initialization logic
pass
except Exception as e:
MAXKB_LOGGER.error(f'Exception: {e}', exc_info=True)
def exist_function(self, code_str, name):
_id = str(uuid.uuid7())
python_paths = get_sandbox_python_package_paths().split(',')
set_run_user = (
f"if _enable_sandbox: "
f"os.setgid({getpwnam(_run_user).pw_gid})\n"
f"os.setuid({getpwnam(_run_user).pw_uid})"
)
_exec_code = f"""
try:
import os, sys, json
path_to_exclude = ['/opt/py3/lib/python3.11/site-packages', '/opt/maxkb-app/apps']
sys.path = [p for p in sys.path if p not in path_to_exclude]
sys.path += {repr(python_paths)}
locals_v={}
globals_v={}
{set_run_user}
os.environ.clear()
exec({dedent(code_str)!a}, globals_v, locals_v)
exec_result=locals_v.get('{name}')
sys.stdout.write("\\n{_id}:")
json.dump({'code':200,'msg':'success','data':exec_result}), sys.stdout.flush()
except Exception as e:
if isinstance(e, MemoryError): e = Exception("Cannot allocate more memory: exceeded the limit of {_process_limit_mem_mb} MB.")
sys.stdout.write("\\n{_id}:")
json.dump({'code':500,'msg':str(e),'data':False}), sys.stdout.flush()
sys.stdout.flush()
"""
MAXKB_LOGGER.debug(f"Sandbox execute code: {_exec_code}")
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=True) as f:
f.write(_exec_code)
f.flush()
subprocess_result = self._exec(f.name)
if subprocess_result.returncode != 0:
raise Exception(subprocess_result.stderr or subprocess_result.stdout or "Unknown exception occurred")
lines = subprocess_result.stdout.splitlines()
result_line = [line for line in lines if line.startswith(_id)]
if not result_line:
MAXKB_LOGGER.error("\n".join(lines))
raise Exception("No result found.")
result = json.loads(result_line[-1].split(":", 1)[1])
if result['code'] == 200:
return result['data']
raise Exception(result['msg'])
@staticmethod
def _exec(script_path):
command = ["python", "-c", f"exec(open('{script_path}').read())"]
try:
return run(command, capture_output=True, text=True, encoding="utf-8", timeout=60)
except CalledProcessError as e:
raise Exception(e.stderr or e.stdout or f"The command '{command[0]}' terminated unexpectedly.")
# Example usage
# sm = SandboxManager(_process_limit_mem_mb)
# exists = sm.exist_function('print("hello")', 'exists')
# print(exists)Key Changes:
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here’s a concise review of the provided Python code:
Irregularities found:
Import Statement: There seems to be an extra blank line at the end of the import statement block.
String Formatting: The string formatting uses
@datewhich is not valid in modern Python syntax. Use f-string or triple quotes for more readability and flexibility.Code Duplication: There is significant duplication between the sections where files from a list are downloaded and handled versus when no such list exists. This could potentially be refactored into a single method for handling both cases.
Base64 Usage vs ast.literal_eval: Using
ast.literal_evalinstead of directbase64operations can simplify error handling and improve clarity, especially with regards to parsing potential JSON data safely.Functionality Comments: The comments explain what each part does, but they could be slightly refined for better understanding.
Potential Issues:
Security Vulnerabilities: Direct use of base64 decoding without proper sanitation can lead to security vulnerabilities. Ensure that
file_bytesare properly validated and sanitized before using them.Performance Improvements: Consider optimizing memory management and processing efficiency, particularly in larger datasets.
Optimization Suggestions:
Efficient Download Handling: Instead of converting data chunks one by one to bytes and then back to a file object, convert everything directly once you have the final byte array from
bytes_to_uploaded_file.Error Handling: Enhance exception handling across multiple points to ensure robustness and prevent errors from crashing the system abruptly.
Reusability Enhancements: Factor out reusable code snippets into separate functions or classes, making the codebase cleaner and easier to maintain.
Here's a revised version incorporating some of these suggestions:
By addressing these points, the code should become more maintainable and efficient.