Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions bbconf/config_parser/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from pathlib import Path
from typing import Literal

from pydantic import BaseModel, ConfigDict, computed_field, field_validator
from yacman import load_yaml
Expand Down Expand Up @@ -126,6 +127,17 @@ class ConfigPepHubClient(BaseModel):
tag: str | None = DEFAULT_PEPHUB_TAG


class ConfigAnalysis(BaseModel):
"""Analysis backend configuration.

Controls which statistics engine is used for BED file analysis.
"""

backend: Literal["r", "gtars"] = "r"

model_config = ConfigDict(extra="forbid")


class ConfigFile(BaseModel):
database: ConfigDB
qdrant: ConfigQdrant = None
Expand All @@ -134,6 +146,7 @@ class ConfigFile(BaseModel):
access_methods: AccessMethods = None
s3: ConfigS3 = None
phc: ConfigPepHubClient = None
analysis: ConfigAnalysis = ConfigAnalysis()

model_config = ConfigDict(extra="allow")

Expand Down
11 changes: 11 additions & 0 deletions bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,12 @@ class BedStats(Base):
promotercore_percentage: Mapped[Optional[float]]
tssdist: Mapped[Optional[float]]

distributions: Mapped[Optional[dict]] = mapped_column(
JSON,
nullable=True,
comment="Full distribution arrays from gtars genomicdist (JSONB)",
)

bed: Mapped["Bed"] = relationship("Bed", back_populates="stats")


Expand Down Expand Up @@ -337,6 +343,11 @@ class BedSets(Base):
bedset_standard_deviation: Mapped[Optional[dict]] = mapped_column(
JSON, comment="Median values of the bedset"
)
bedset_stats: Mapped[Optional[dict]] = mapped_column(
JSON,
nullable=True,
comment="Pre-aggregated distribution statistics from gtars (JSONB)",
)

bedfiles: Mapped[list["BedFileBedSetRelation"]] = relationship(
"BedFileBedSetRelation", back_populates="bedset", cascade="all, delete-orphan"
Expand Down
2 changes: 2 additions & 0 deletions bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class BedStatsModel(BaseModel):
promoterprox_frequency: float | None = None
promoterprox_percentage: float | None = None

distributions: dict | None = None

model_config = ConfigDict(extra="ignore", populate_by_name=True)


Expand Down
26 changes: 26 additions & 0 deletions bbconf/models/bedset_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from typing import Optional

from pydantic import BaseModel, ConfigDict, model_validator

Expand All @@ -7,10 +8,34 @@


class BedSetStats(BaseModel):
"""Bedset statistics: mean/sd of scalar columns.

Populated from bedset_means and bedset_standard_deviation database columns.
"""

mean: BedStatsModel = None
sd: BedStatsModel = None


class BedSetDistributions(BaseModel):
"""Collection-level aggregated distribution statistics for a bedset.

Stored in the bedset_stats JSONB database column. Populated when
member bed files have been processed with the gtars analysis backend.
"""

n_files: int = 0
composition: Optional[dict] = None
scalar_summaries: Optional[dict] = None
tss_histogram: Optional[dict] = None
widths_histogram: Optional[dict] = None
neighbor_distances: Optional[dict] = None
gc_content: Optional[dict] = None
region_distribution: Optional[dict] = None
partitions: Optional[dict] = None
chromosome_summaries: Optional[dict] = None


class BedSetPlots(BaseModel):
region_commonality: FileModel = None

Expand All @@ -24,6 +49,7 @@ class BedSetMetadata(BaseModel):
submission_date: datetime.datetime = None
last_update_date: datetime.datetime = None
statistics: BedSetStats | None = None
distributions: BedSetDistributions | None = None
plots: BedSetPlots | None = None
description: str = None
summary: str = None
Expand Down
6 changes: 5 additions & 1 deletion bbconf/modules/bedsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,11 @@ def _calculate_statistics(self, bed_ids: list[str]) -> BedSetStats:
"""

_LOGGER.info("Calculating bedset statistics")
numeric_columns = BedStatsModel.model_fields
numeric_columns = [
name
for name, field in BedStatsModel.model_fields.items()
if field.annotation in (float, float | None)
]

bedset_sd = {}
bedset_mean = {}
Expand Down