Fetch

The fetch operation retrieves data from query results into Python. It's the second step after constructing a query with operators.

Basic Fetch

Fetch All Entities

# As NumPy recarray (default)
data = Subject.fetch()

# As list of dictionaries
data = Subject.fetch(as_dict=True)

# As pandas DataFrame
data = Subject.fetch(format='frame')

Fetch Single Entity

Use fetch1 when the query returns exactly one entity:

# Fetch entire entity
subject = (Subject & 'subject_id=1').fetch1()
# Returns: {'subject_id': 1, 'species': 'mouse', ...}

# Raises error if zero or multiple entities match

Fetch Specific Attributes

# Single attribute returns 1D array
names = Subject.fetch('species')
# Returns: array(['mouse', 'mouse', 'rat', ...])

# Multiple attributes return tuple of arrays
ids, species = Subject.fetch('subject_id', 'species')

# With fetch1, returns scalar values
subject_id, species = (Subject & 'subject_id=1').fetch1('subject_id', 'species')
# Returns: (1, 'mouse')

Fetch Primary Keys

# List of key dictionaries
keys = Subject.fetch('KEY')
# Returns: [{'subject_id': 1}, {'subject_id': 2}, ...]

# Single key
key = (Subject & 'subject_id=1').fetch1('KEY')
# Returns: {'subject_id': 1}

Output Formats

NumPy Recarray (Default)

data = Subject.fetch()
# Access attributes by name
data['subject_id']
data['species']

# Iterate over entities
for entity in data:
    print(entity['subject_id'], entity['species'])

List of Dictionaries

data = Subject.fetch(as_dict=True)
# [{'subject_id': 1, 'species': 'mouse', ...}, ...]

for entity in data:
    print(entity['subject_id'])

Pandas DataFrame

df = Subject.fetch(format='frame')
# DataFrame indexed by primary key

# Query on the DataFrame
df[df['species'] == 'mouse']
df.groupby('sex').count()

Sorting and Limiting

Order By

# Ascending (default)
data = Subject.fetch(order_by='date_of_birth')

# Descending
data = Subject.fetch(order_by='date_of_birth desc')

# Multiple attributes
data = Subject.fetch(order_by=('species', 'date_of_birth desc'))

# By primary key
data = Subject.fetch(order_by='KEY')

# SQL reserved words require backticks
data = Table.fetch(order_by='`select` desc')

Limit and Offset

# First 10 entities
data = Subject.fetch(limit=10)

# Entities 11-20 (skip first 10)
data = Subject.fetch(limit=10, offset=10)

# Most recent 5 subjects
data = Subject.fetch(order_by='date_of_birth desc', limit=5)

Note: offset requires limit to be specified.

Practical Examples

Query and Filter

# Fetch subjects of a specific species
mice = (Subject & 'species="mouse"').fetch()

# Fetch with complex restriction
recent_mice = (Subject & 'species="mouse"'
                       & 'date_of_birth > "2023-01-01"').fetch(as_dict=True)

Fetch with Projection

# Fetch only specific attributes
data = Subject.proj('species', 'sex').fetch()

# Rename attributes
data = Subject.proj(animal_species='species').fetch()

Fetch from Joins

# Fetch combined data from multiple tables
data = (Session * Subject).fetch()

# Select attributes from join
ids, dates, species = (Session * Subject).fetch(
    'session_id', 'session_date', 'species'
)

Aggregation Results

# Count sessions per subject
session_counts = (Subject.aggr(Session, count='count(*)')).fetch()

# Average duration per subject
avg_durations = (Subject.aggr(Trial, avg_dur='avg(duration)')).fetch()

Working with Blobs

Blob attributes contain serialized Python objects:

@schema
class Image(dj.Manual):
    definition = """
    image_id : int
    ---
    pixels : longblob  # numpy array
    metadata : longblob  # dict
    """

# Fetch returns deserialized objects
image = (Image & 'image_id=1').fetch1()
pixels = image['pixels']  # numpy array
metadata = image['metadata']  # dict

# Fetch specific blob attribute
pixels = (Image & 'image_id=1').fetch1('pixels')

Object Attributes

Object attributes return ObjectRef handles for efficient access to large files:

record = Recording.fetch1()
obj = record['raw_data']

# Metadata (no I/O)
print(obj.path)      # Storage path
print(obj.size)      # Size in bytes
print(obj.checksum)  # Content hash
print(obj.is_dir)    # True if folder

# Read content
content = obj.read()  # Returns bytes

# Open as file
with obj.open() as f:
    data = f.read()

# Download locally
local_path = obj.download('/local/destination/')

Zarr and Xarray Integration

import zarr
import xarray as xr

obj = Recording.fetch1()['neural_data']

# Open as Zarr
z = zarr.open(obj.store, mode='r')
data = z[:]

# Open with xarray
ds = xr.open_zarr(obj.store)

Performance Considerations

Check Size Before Fetching

# Check table size before fetch
print(f"Table size: {Subject.size_on_disk / 1e6:.2f} MB")
print(f"Entity count: {len(Subject)}")

Stream Large Results

# Process entities one at a time (memory efficient)
for entity in Subject.fetch(as_dict=True):
    process(entity)

# Or with a cursor
for key in Subject.fetch('KEY'):
    entity = (Subject & key).fetch1()
    process(entity)

Fetch Only What You Need

# Bad: fetch everything, use only ID
all_data = Subject.fetch()
ids = all_data['subject_id']

# Good: fetch only needed attribute
ids = Subject.fetch('subject_id')

Common Patterns

Conditional Fetch

def get_subject(subject_id):
    """Fetch subject if exists, else None."""
    query = Subject & {'subject_id': subject_id}
    if query:
        return query.fetch1()
    return None

Fetch with Defaults

def fetch_with_default(query, attribute, default=None):
    """Fetch attribute with default value."""
    try:
        return query.fetch1(attribute)
    except DataJointError:
        return default

Batch Processing

def process_in_batches(table, batch_size=100):
    """Process table in batches."""
    keys = table.fetch('KEY')
    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i + batch_size]
        batch_data = (table & batch_keys).fetch(as_dict=True)
        yield batch_data

Entity Ordering Note

Fetch results are not guaranteed to be in any particular order unless order_by is specified. The order may vary between queries. If you need matching pairs of attributes, fetch them in a single call:

# Correct: attributes are matched
ids, names = Subject.fetch('subject_id', 'species')

# Risky: separate fetches may return different orders
ids = Subject.fetch('subject_id')
names = Subject.fetch('species')  # May not match ids!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fetch

Basic Fetch

Fetch All Entities

Fetch Single Entity

Fetch Specific Attributes

Fetch Primary Keys

Output Formats

NumPy Recarray (Default)

List of Dictionaries

Pandas DataFrame

Sorting and Limiting

Order By

Limit and Offset

Practical Examples

Query and Filter

Fetch with Projection

Fetch from Joins

Aggregation Results

Working with Blobs

Object Attributes

Zarr and Xarray Integration

Performance Considerations

Check Size Before Fetching

Stream Large Results

Fetch Only What You Need

Common Patterns

Conditional Fetch

Fetch with Defaults

Batch Processing

Entity Ordering Note

FilesExpand file tree

fetch.md

Latest commit

History

fetch.md

File metadata and controls

Fetch

Basic Fetch

Fetch All Entities

Fetch Single Entity

Fetch Specific Attributes

Fetch Primary Keys

Output Formats

NumPy Recarray (Default)

List of Dictionaries

Pandas DataFrame

Sorting and Limiting

Order By

Limit and Offset

Practical Examples

Query and Filter

Fetch with Projection

Fetch from Joins

Aggregation Results

Working with Blobs

Object Attributes

Zarr and Xarray Integration

Performance Considerations

Check Size Before Fetching

Stream Large Results

Fetch Only What You Need

Common Patterns

Conditional Fetch

Fetch with Defaults

Batch Processing

Entity Ordering Note