Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion backend/.tes_instances
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ TESK/Kubernetes @ ELIXIR-CZ (NA),https://tesk-na.cloud.e-infra.cz/
TESK/Kubernetes @ ELIXIR-DE,https://tesk.elixir-cloud.bi.denbi.de/
TESK/Kubernetes @ ELIXIR-GR,https://tesk-eu.hypatia-comp.athenarc.gr/
TESK/OpenShift @ ELIXIR-FI,https://csc-tesk-noauth.rahtiapp.fi/
TESK North America,https://tesk-na.cloud.e-infra.cz/

# Local Development
Local TES,http://localhost:8080
21 changes: 21 additions & 0 deletions backend/routes/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@ def get_healthy_instances_route():
print(f"❌ Error in get_healthy_instances: {str(e)}")
return jsonify({'error': str(e)}), 500

@instances_bp.route('/api/instances-with-status', methods=['GET'])
def get_instances_with_status():
"""Get all TES instances with their current health status"""
try:
from utils.tes_utils import load_tes_location_data
instances = load_tes_location_data()

# Fetch status for all instances in parallel
with ThreadPoolExecutor(max_workers=8) as pool:
results = list(pool.map(fetch_tes_status, instances))

return jsonify({
'instances': results,
'count': len(results),
'last_updated': datetime.now(timezone.utc).isoformat()
})

except Exception as e:
print(f"Error in get_instances_with_status: {str(e)}")
return jsonify({'error': str(e)}), 500
Comment on lines +31 to +50

Copilot AI Jan 26, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new '/api/instances-with-status' endpoint is not included in the cache patterns in middleware_config.py. This endpoint performs real-time health checks on all TES instances which can be expensive (lines 38-40 show parallel HTTP requests to multiple endpoints). The endpoint should be added to the cache_patterns list to avoid repeated expensive operations, similar to '/api/instances' and '/api/tes_locations' which are already cached.

Copilot uses AI. Check for mistakes.

@instances_bp.route('/api/tes_locations', methods=['GET'])
def tes_locations():
from utils.tes_utils import load_tes_location_data
Expand Down
62 changes: 58 additions & 4 deletions backend/routes/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import json
import time
import requests
import shlex
from services.task_service import get_submitted_tasks, add_task, update_single_task_status
from utils.tes_utils import load_tes_instances
from utils.auth_utils import get_instance_credentials
import logging

logger = logging.getLogger(__name__)

tasks_bp = Blueprint('tasks', __name__)

Expand Down Expand Up @@ -36,9 +40,38 @@ def submit_task():
tes_name = inst['name']
break


command_raw = data.get('command', '')
if isinstance(command_raw, list):
command = command_raw
elif command_raw:

shell_operators = ['&&', '||', '|', '>', '<', '>>', '2>', '&', ';', '$(', '`']
needs_shell = any(op in command_raw for op in shell_operators)

if needs_shell:

command = ["/bin/sh", "-c", command_raw]
print(f"🐚 Command contains shell operators, wrapping in shell: {command}")
else:

try:
command = shlex.split(command_raw)
print(f"📝 Simple command parsed: {command}")
except ValueError as e:
logger.error(f"Command parsing error: {e}")
return jsonify({
'success': False,
'error': f'Invalid command syntax: {str(e)}',
'error_type': 'bad_request',
'error_code': 'INVALID_COMMAND'
}), 400
else:
command = ['echo', 'Hello World']

executor = {
"image": docker_image,
"command": data.get('command') if isinstance(data.get('command'), list) else (data.get('command', '').split() if data.get('command') else ['echo', 'Hello World']),
"command": command,
"workdir": data.get('workdir', '/tmp')
}

Expand Down Expand Up @@ -100,13 +133,24 @@ def submit_task():

try:
print(f" Trying service-info: {service_info_url}")
test_response = requests.get(service_info_url, timeout=10)
if test_response.status_code in [200, 403]:
test_response = requests.get(service_info_url, timeout=10)

if test_response.status_code == 200:
service_is_reachable = True
working_endpoint = tasks_url
print(f" ✅ Service reachable at {service_info_url} (status {test_response.status_code})")
print(f" Will use tasks endpoint: {tasks_url}")
break
elif test_response.status_code in [401, 403]:
# Authentication required - treat as unreachable since we can't use it
print(f" 🔐 Authentication required at {service_info_url} (status {test_response.status_code})")
connectivity_error_info = {
'error_type': 'unauthorized',
'error_code': 'UNAUTHORIZED',
'message': 'Authentication required - TES instance requires credentials',
'reason': 'This TES instance requires authentication. Please configure TESK_PROD_TOKEN or credentials in environment variables.'
}
continue
else:
print(f" ⚠️ Service returned status {test_response.status_code}")

Expand Down Expand Up @@ -199,6 +243,7 @@ def submit_task():

tes_endpoint = working_endpoint
print(f"🚀 Submitting task to {tes_endpoint}")
print(f"📦 Task payload: {json.dumps(tes_task, indent=2)}")

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

print(f"📦 Task payload: ...") logs the full TES task payload. Since this includes user-provided fields (command, URLs, descriptions), it can leak data and become very noisy in production. Consider using logger.debug (or guarding behind a debug flag) and/or redacting fields before logging.

Suggested change
print(f"📦 Task payload: {json.dumps(tes_task, indent=2)}")
logger.debug("📦 Task payload keys: %s", list(tes_task.keys()))

Copilot uses AI. Check for mistakes.

credentials = get_instance_credentials(tes_name, tes_url)
headers = {
Expand Down Expand Up @@ -301,7 +346,7 @@ def submit_task():
else:
error_type_map = {
400: {'error_type': 'bad_request', 'error_code': 'BAD_REQUEST', 'reason': 'The task specification is invalid or malformed'},
401: {'error_type': 'unauthorized', 'error_code': 'UNAUTHORIZED', 'reason': 'Authentication required or credentials are invalid'},
401: {'error_type': 'unauthorized', 'error_code': 'UNAUTHORIZED', 'reason': 'Authentication required. Please configure TESK_PROD_TOKEN or TESK_PROD_USER/TESK_PROD_PASSWORD environment variables for this instance.'},
403: {'error_type': 'forbidden', 'error_code': 'FORBIDDEN', 'reason': 'You do not have permission to submit tasks to this instance'},
404: {'error_type': 'not_found', 'error_code': 'NOT_FOUND', 'reason': 'The TES endpoint was not found. Check if the URL is correct.'},
408: {'error_type': 'timeout', 'error_code': 'TIMEOUT', 'reason': 'The request timed out. The TES instance may be overloaded.'},
Expand All @@ -319,12 +364,21 @@ def submit_task():
})

error_msg = f'TES submission failed with status {response.status_code}'
print(f"❌ TES returned status {response.status_code}")
print(f"Response headers: {dict(response.headers)}")
print(f"Response body: {response.text[:500]}")

try:
error_data = response.json()
print(f"Error data JSON: {error_data}")
Comment on lines +367 to +373

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On submission errors, the code prints full TES response headers and body (Response headers, Response body). Headers/bodies can include sensitive information and can be large; this is better handled via logger.debug with redaction/truncation (and ideally only when debugging).

Suggested change
print(f"❌ TES returned status {response.status_code}")
print(f"Response headers: {dict(response.headers)}")
print(f"Response body: {response.text[:500]}")
try:
error_data = response.json()
print(f"Error data JSON: {error_data}")
logger.debug("TES submission failed with status %s", response.status_code)
logger.debug(
"TES response header names: %s",
list(response.headers.keys())
)
logger.debug(
"TES response body (truncated): %s",
(response.text[:200] + "..." if response.text and len(response.text) > 200 else response.text)
)
try:
error_data = response.json()
logger.debug(
"TES error data JSON (truncated): %s",
str(error_data)[:500] + ("..." if len(str(error_data)) > 500 else "")
)

Copilot uses AI. Check for mistakes.
if error_data.get('message'):
error_msg = error_data.get('message')
elif error_data.get('error'):
error_msg = error_data.get('error')
elif error_data.get('detail'):
error_msg = error_data.get('detail')
elif error_data.get('title'):
error_msg = error_data.get('title')
else:
error_msg = f'{error_msg}: {str(error_data)}'
except:
Expand Down
41 changes: 40 additions & 1 deletion backend/services/tes_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,46 @@ def fetch_tes_status(instance):
r = requests.get(f"{tes_base_url}/ga4gh/tes/v1/service-info", timeout=5)
latency_ms = int((time.time() - start_time) * 1000)

status = "healthy" if r.status_code == 200 else "unhealthy"
# First check service-info endpoint
if r.status_code in [401, 403]:
status = "unhealthy" # Authentication required but not available
elif r.status_code != 200:
status = "unhealthy"
else:
# Service-info is accessible, but we need to check if tasks endpoint is usable
# Try a HEAD/OPTIONS request to tasks endpoint to see if it requires auth

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says "Try a HEAD/OPTIONS request" to the tasks endpoint, but the code uses requests.get(...). Either update the comment to match the implementation or switch to HEAD/OPTIONS if that’s the intended (lower-impact) check.

Suggested change
# Try a HEAD/OPTIONS request to tasks endpoint to see if it requires auth
# Try a request to the tasks endpoint to see if it requires auth

Copilot uses AI. Check for mistakes.
try:
instance_name = instance.get("name", "")
credentials = get_instance_credentials(instance_name, tes_base_url)

# Test tasks endpoint with credentials (if available)
headers = {'Accept': 'application/json'}
auth = None
if credentials.get('token'):
headers['Authorization'] = f"Bearer {credentials['token']}"
elif credentials.get('user') and credentials.get('password'):
auth = (credentials['user'], credentials['password'])

# Try to list tasks (with view=MINIMAL to reduce payload)
tasks_response = requests.get(
f"{tes_base_url}/ga4gh/tes/v1/tasks?view=MINIMAL",
headers=headers,
auth=auth,
timeout=5
)

# If tasks endpoint returns 401/403, mark as unhealthy (auth required but not configured)
if tasks_response.status_code in [401, 403]:
print(f"⚠️ {instance.get('name')} tasks endpoint requires authentication (status {tasks_response.status_code})")
status = "unhealthy"
else:
# Tasks endpoint is accessible (200) or returns other non-auth error
status = "healthy"
except Exception as tasks_error:
Comment on lines +59 to +66

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fetch_tes_status, the instance is marked healthy for any tasks endpoint response that isn’t 401/403. This can incorrectly classify instances as healthy even when /tasks returns 404/500/405, which would still break demo submissions. Consider only marking healthy for 2xx (ideally 200) responses, and treating other status codes as unhealthy (while still keeping the special-case for 401/403).

Copilot uses AI. Check for mistakes.
print(f"⚠️ Could not check tasks endpoint for {instance.get('name')}: {tasks_error}")
# If we can't check tasks endpoint, assume healthy based on service-info
status = "healthy"

version = ""
try:
version = r.json().get("version", "")
Expand Down
8 changes: 7 additions & 1 deletion frontend/src/hooks/useInstances.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import instanceService from '../services/instanceService';
const useInstances = () => {
const [state, setState] = useState({
instances: [],
allInstances: [],
loading: true,
error: null,
lastUpdate: null
Expand All @@ -15,7 +16,11 @@ const useInstances = () => {
};
instanceService.addListener(handleUpdate);
const initialState = instanceService.getHealthyInstances();
setState(initialState);
const allInstancesState = instanceService.getAllInstancesWithStatus();
setState({
...initialState,
allInstances: allInstancesState.instances
});
Comment on lines 17 to +23

Copilot AI Jan 26, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a potential state synchronization issue in the useEffect hook. The code gets initialState from getHealthyInstances() and allInstancesState from getAllInstancesWithStatus() separately, then merges them manually. However, the listener added at line 17 receives updates that already include both 'instances' and 'allInstances' (as seen in instanceService.notifyListeners() which passes both fields). This manual merging could be out of sync with the listener updates.

A cleaner approach would be to initialize the state directly from the service's cached data without the manual merge, since the notifyListeners already provides the complete state structure with both fields.

Copilot uses AI. Check for mistakes.
return () => {
instanceService.removeListener(handleUpdate);
};
Expand All @@ -26,6 +31,7 @@ const useInstances = () => {

return {
instances: state.instances,
allInstances: state.allInstances,
loading: state.loading,
error: state.error,
lastUpdate: state.lastUpdate,
Expand Down
Loading
Loading