Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions protos/ensembl/production/metadata/grpc/ensembl_metadata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ service EnsemblMetadata {
// Get release version for a given genome_uuid
rpc GetReleaseVersionByUUID(ReleaseVersionRequest) returns (ReleaseVersion) {}

// Get release label for a given genome_uuid
rpc GetReleaseLabelByUUID(ReleaseInfoRequest) returns (ReleaseLabel) {}

// Get attributes by genome UUID
rpc GetAttributesValuesByUUID(DatasetAttributesValuesRequest) returns (DatasetAttributesValues) {}

Expand Down Expand Up @@ -382,6 +385,10 @@ message ReleaseVersion {
double release_version = 1;
}

message ReleaseLabel {
string release_label = 1;
}

message DatasetAttributeValue {
string attribute_name = 1;
string attribute_value = 2;
Expand Down Expand Up @@ -586,13 +593,26 @@ message FTPLinksRequest {

/*
Release version request
TODO: before removing this request message
we need to update the code in:
1. Thoas: https://github.com/Ensembl/ensembl-thoas/blob/876dc846f7e7e61771c13aea152e1e3623881e39/grpc_service/grpc_model.py#L106 and
2. Compara: https://github.com/Ensembl/ensembl-naxos/blob/b6430373ba59ce8aed7a50d9749d5ecc2d3ebaea/src/ensembl/io/naxos/metadata/grpc/client.py#L403
*/
message ReleaseVersionRequest {
string genome_uuid = 1; // Mandatory
string dataset_type = 2; // Optional
double release_version = 3; // Optional
}

/*
Release info request
*/
message ReleaseInfoRequest {
string genome_uuid = 1; // Mandatory
string dataset_type = 2; // Optional
double release_version = 3; // Optional
}

/*
Dataset attribute request
*/
Expand Down
35 changes: 35 additions & 0 deletions src/ensembl/production/metadata/grpc/client_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
GroupTypeRequest,
GenomesInGroupRequest,
GenomeCountsRequest,
ReleaseInfoRequest,
)


Expand Down Expand Up @@ -545,6 +546,38 @@ def get_genome_counts(stub):
print("**** Genome Counts (Mock) ****")
print(genome_counts)

def get_release_label_by_genome_uuid(stub):
request1 = ReleaseInfoRequest(
genome_uuid="a73351f7-93e7-11ec-a39d-005056b38ce3"
)
release_label1 = stub.GetReleaseLabelByUUID(request1)

request2 = ReleaseInfoRequest(
genome_uuid="a73351f7-93e7-11ec-a39d-005056b38ce3",
dataset_type="genebuild"
)
release_label2 = stub.GetReleaseLabelByUUID(request2)

request3 = ReleaseInfoRequest(
dataset_type="genebuild"
)
release_label3 = stub.GetReleaseLabelByUUID(request3)

request4 = ReleaseInfoRequest(
genome_uuid="a73351f7-93e7-11ec-a39d-005056b38ce3",
dataset_type="blabla"
)
release_label4 = stub.GetReleaseLabelByUUID(request4)

print("**** Get release label: By genome_uuid (Ecoli)****")
print(release_label1)
print("**** Get release label: By genome_uuid and dataset_type = genebuild (Ecoli)****")
print(release_label2)
print("**** Get release label: No genome_uuid provided and dataset_type = genebuild (Ecoli) No results****")
print(release_label3)
print("**** Get release label: By genome_uuid and dataset_type = blabla (Ecoli) No results****")
print(release_label4)


def run():
with grpc.insecure_channel("localhost:50051") as channel:
Expand Down Expand Up @@ -605,6 +638,8 @@ def run():
get_genomes_in_groups(stub)
print("-------------- Get Genome Counts --------------")
get_genome_counts(stub)
print("-------------- Get Release Label By Genome UUID --------------")
get_release_label_by_genome_uuid(stub)


if __name__ == "__main__":
Expand Down
130 changes: 67 additions & 63 deletions src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions src/ensembl/production/metadata/grpc/ensembl_metadata_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ def __init__(self, channel):
request_serializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseVersionRequest.SerializeToString,
response_deserializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseVersion.FromString,
_registered_method=True)
self.GetReleaseLabelByUUID = channel.unary_unary(
'/ensembl_metadata.EnsemblMetadata/GetReleaseLabelByUUID',
request_serializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseInfoRequest.SerializeToString,
response_deserializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseLabel.FromString,
_registered_method=True)
self.GetAttributesValuesByUUID = channel.unary_unary(
'/ensembl_metadata.EnsemblMetadata/GetAttributesValuesByUUID',
request_serializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.DatasetAttributesValuesRequest.SerializeToString,
Expand Down Expand Up @@ -361,6 +366,13 @@ def GetReleaseVersionByUUID(self, request, context):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

def GetReleaseLabelByUUID(self, request, context):
"""Get release label for a given genome_uuid
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

def GetAttributesValuesByUUID(self, request, context):
"""Get attributes by genome UUID
"""
Expand Down Expand Up @@ -519,6 +531,11 @@ def add_EnsemblMetadataServicer_to_server(servicer, server):
request_deserializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseVersionRequest.FromString,
response_serializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseVersion.SerializeToString,
),
'GetReleaseLabelByUUID': grpc.unary_unary_rpc_method_handler(
servicer.GetReleaseLabelByUUID,
request_deserializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseInfoRequest.FromString,
response_serializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseLabel.SerializeToString,
),
'GetAttributesValuesByUUID': grpc.unary_unary_rpc_method_handler(
servicer.GetAttributesValuesByUUID,
request_deserializer=ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.DatasetAttributesValuesRequest.FromString,
Expand Down Expand Up @@ -1207,6 +1224,33 @@ def GetReleaseVersionByUUID(request,
metadata,
_registered_method=True)

@staticmethod
def GetReleaseLabelByUUID(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/ensembl_metadata.EnsemblMetadata/GetReleaseLabelByUUID',
ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseInfoRequest.SerializeToString,
ensembl_dot_production_dot_metadata_dot_grpc_dot_ensembl__metadata__pb2.ReleaseLabel.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)

@staticmethod
def GetAttributesValuesByUUID(request,
target,
Expand Down
26 changes: 26 additions & 0 deletions src/ensembl/production/metadata/grpc/protobuf_msg_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,32 @@ def create_release_version(data=None):
)
return release

def create_label_version(data=None):
"""
This function will be used by Thoas to determine the MongoDB instance containing
the data for a specified genome_uuid. It either constructs a ReleaseLabel
instance with the release version obtained from the provided data or returns
a default ReleaseLabel instance when data is None or lacks the necessary attributes.

Args:
data (Optional[sqlalchemy.engine.row.Row]): The input data from which the release
version is extracted. It's expected to have an attribute 'EnsemblRelease'
with a nested attribute 'version'. If None or the 'EnsemblRelease' attribute
is absent, a default ReleaseLabel instance is returned.

Returns:
ensembl_metadata_pb2.ReleaseLabel: An instance of the ReleaseLabel message.
It contains the release label extracted from the input data if the relevant
attributes are present; otherwise, it's a default instance of ReleaseLabel.
"""
if data is None:
return ensembl_metadata_pb2.ReleaseLabel()
logger.debug(f"Release data {data}")
release = ensembl_metadata_pb2.ReleaseLabel(
release_label=data.release.label if hasattr(data, 'release') else None,
)
return release


def create_datasets(data=None):
if data is None:
Expand Down
6 changes: 6 additions & 0 deletions src/ensembl/production/metadata/grpc/servicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ def GetReleaseVersionByUUID(self, request, context):
self.genome_adaptor, request.genome_uuid, request.dataset_type, request.release_version
)

def GetReleaseLabelByUUID(self, request, context):
logger.debug(f"Received RPC for GetReleaseLabelByUUID with request: {request}")
return utils.get_release_label_by_uuid(
self.genome_adaptor, request.genome_uuid, request.dataset_type, request.release_version
)

def GetAttributesValuesByUUID(self, request, context):
logger.debug(f"Received RPC for GetAttributesByUUID with request: {request}")
attribute_names = list(request.attribute_name) if request.attribute_name else None
Expand Down
54 changes: 53 additions & 1 deletion src/ensembl/production/metadata/grpc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,59 @@ def get_release_version_by_uuid(db_conn, genome_uuid, dataset_type, release_vers
return response_data
return msg_factory.create_release_version()

def get_release_label_by_uuid(db_conn, genome_uuid, dataset_type, release_version):
"""
Retrieve the release label for a genome dataset, prioritizing partial releases.

This function looks up release labels for a given genome UUID, dataset type,
and release version. When multiple release labels are available, **partial
releases are preferred over integrated releases**.

Rationale:
- Genomes are always released first as partial releases before being
included in integrated releases.
- Integrated releases only reference genes that were previously loaded
via partial releases.
- Thoas uses the release label to locate the correct MongoDB database;
all MongoDB databases follow the naming pattern: ``release-YYYY-MM-DD``.

As a result, only partial release labels are considered. If multiple partial
releases are found, the most recent one (by release date) is selected.
"""
if not genome_uuid:
logger.warning("Missing or Empty Genome UUID field.")
return msg_factory.create_label_version()

results = db_conn.fetch_genome_datasets(
genome_uuid=genome_uuid,
dataset_type_name=dataset_type,
release_version=release_version,
)

if not results:
logger.error(f"No result found for {genome_uuid}/{dataset_type}/{release_version}")
return msg_factory.create_label_version()

# When looking up release labels, prioritize partial releases over integrated releases.
partials = [
item for item in results
if getattr(getattr(item, "release", None), "release_type", None) == "partial"
]

if not partials:
logger.error(
f"No partial release found for {genome_uuid}/{dataset_type}/{release_version}"
)
return msg_factory.create_label_version()

if len(partials) > 1:
logger.warning(f"Multiple partial results returned. {partials}")

# Pick the latest partial by release_date (works for 1 item too).
latest = max(partials, key=lambda item: item.release.release_date)

return msg_factory.create_label_version(latest)


def get_attributes_values_by_uuid(db_conn, genome_uuid, dataset_type, release_version, attribute_names, latest_only):
"""
Expand Down Expand Up @@ -1285,4 +1338,3 @@ def get_genome_counts(db_conn: Any, release_label: str | None):
"(release_label=%r)",release_label
)
return msg_factory.create_genome_counts([])