From a05214ae574438bd1de5ff2c521fc38d35e4a324 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:31:06 +0900 Subject: [PATCH 01/27] added file --- model.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 model.py diff --git a/model.py b/model.py new file mode 100644 index 0000000..6f7305f --- /dev/null +++ b/model.py @@ -0,0 +1 @@ +import tensorflow as tf From f4776b4a2624c9a877eaa6dca9ca73cdd437412a Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:34:14 +0900 Subject: [PATCH 02/27] Update model.py --- model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model.py b/model.py index 6f7305f..7bc78ba 100644 --- a/model.py +++ b/model.py @@ -1 +1,2 @@ import tensorflow as tf +from tensorflow import keras From 4b5a3ffbb8a4bc8d10386181e4af21186fc1aeaf Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:37:24 +0900 Subject: [PATCH 03/27] Update model.py --- model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model.py b/model.py index 7bc78ba..25c9371 100644 --- a/model.py +++ b/model.py @@ -1,2 +1,3 @@ +import os import tensorflow as tf from tensorflow import keras From e1e0bb9fdf55d908df3095f64834e39c09319299 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:50:04 +0900 Subject: [PATCH 04/27] Update model.py --- model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model.py b/model.py index 25c9371..668e3f8 100644 --- a/model.py +++ b/model.py @@ -1,3 +1,4 @@ import os import tensorflow as tf from tensorflow import keras +from tensorflow.keras.models import Model From 4b71d317e92967beef0419a579d3d8a34100c2f4 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:50:30 +0900 Subject: [PATCH 05/27] Update model.py --- model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model.py b/model.py index 668e3f8..a01e995 100644 --- a/model.py +++ b/model.py @@ -2,3 +2,4 @@ import tensorflow as tf from tensorflow import keras from tensorflow.keras.models import Model +from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input From c5c326b2b1ce0d297444a3e4e0bdc75bc130d042 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:51:00 +0900 Subject: [PATCH 06/27] Update model.py --- model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model.py b/model.py index a01e995..6874ca3 100644 --- a/model.py +++ b/model.py @@ -3,3 +3,4 @@ from tensorflow import keras from tensorflow.keras.models import Model from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input +from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, UpSampling2D, Reshape, Dense, LayerNormalization, Dropout, Attention From 1e7af9b2e9abbe5692fff2b922e2d376806e177b Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:52:54 +0900 Subject: [PATCH 07/27] Update model.py --- model.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/model.py b/model.py index 6874ca3..cfc9978 100644 --- a/model.py +++ b/model.py @@ -4,3 +4,16 @@ from tensorflow.keras.models import Model from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, UpSampling2D, Reshape, Dense, LayerNormalization, Dropout, Attention + + +def SqueezeAndExcite(inputs, ratio=8): + init = inputs + filters = init.shape[-1] + se_shape = (1, 1, filters) + + se = GlobalAveragePooling2D()(init) + se = Reshape(se_shape)(se) + se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se) + se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se) + x = init * se + return x From b4dc866a474763f45f015c96eff08990be5e9e13 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 00:57:42 +0900 Subject: [PATCH 08/27] Update model.py --- model.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/model.py b/model.py index cfc9978..69cf812 100644 --- a/model.py +++ b/model.py @@ -17,3 +17,13 @@ def SqueezeAndExcite(inputs, ratio=8): se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se) x = init * se return x + +def ASPP(inputs): + """ Image Pooling """ + shape = inputs.shape + y1 = AveragePooling2D(pool_size=(shape[1], shape[2]))(inputs) + y1 = Conv2D(256, 1, padding="same", use_bias=False)(y1) + y1 = BatchNormalization()(y1) + y1 = Activation("relu")(y1) + y1 = UpSampling2D((shape[1], shape[2]), interpolation="bilinear")(y1) + y1 = Attention()([y1, y1]) From cae3f9276b475ffeec94089db233ee85f550ce83 Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 01:47:02 +0900 Subject: [PATCH 09/27] Create eval.py --- eval.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 eval.py diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..54c03e2 --- /dev/null +++ b/eval.py @@ -0,0 +1,4 @@ +import os +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "0" From f281357498e62fcc5d136fc17ee029839236744e Mon Sep 17 00:00:00 2001 From: sppham Date: Mon, 16 Jan 2023 02:10:47 +0900 Subject: [PATCH 10/27] Update eval.py --- eval.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eval.py b/eval.py index 54c03e2..c961e89 100644 --- a/eval.py +++ b/eval.py @@ -2,3 +2,5 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" + +import tensorflow as tf From a54f4a05beac8b0570a1c3abbb76e5e9b4cf8ef8 Mon Sep 17 00:00:00 2001 From: sppham Date: Wed, 18 Jan 2023 16:50:21 +0900 Subject: [PATCH 11/27] Update model.py --- model.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/model.py b/model.py index 69cf812..b0ab0a0 100644 --- a/model.py +++ b/model.py @@ -27,3 +27,12 @@ def ASPP(inputs): y1 = Activation("relu")(y1) y1 = UpSampling2D((shape[1], shape[2]), interpolation="bilinear")(y1) y1 = Attention()([y1, y1]) + + """ 1x1 conv """ + y2 = Conv2D(256, 1, padding="same", use_bias=False)(inputs) + y2 = BatchNormalization()(y2) + y2 = Activation("relu")(y2) + #y2 = reshape(y2, y2.shape[1:]) + + y2 = Attention()([y2, y2]) + From 8dcfe624e28d977d499e80bdd2f6a12565ee5b48 Mon Sep 17 00:00:00 2001 From: sppham Date: Wed, 18 Jan 2023 16:50:58 +0900 Subject: [PATCH 12/27] Update model.py --- model.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/model.py b/model.py index b0ab0a0..e8da9fe 100644 --- a/model.py +++ b/model.py @@ -33,6 +33,12 @@ def ASPP(inputs): y2 = BatchNormalization()(y2) y2 = Activation("relu")(y2) #y2 = reshape(y2, y2.shape[1:]) - y2 = Attention()([y2, y2]) + """ 3x3 conv rate=6 """ + y3 = Conv2D(256, 3, padding="same", use_bias=False, dilation_rate=6)(inputs) + y3 = BatchNormalization()(y3) + y3 = Activation("relu")(y3) + #y3 = reshape(y3, y3.shape[1:]) + y3 = Attention()([y3, y3]) + From eaf573feb9fe991cac3fa326f04418daf2aec0d5 Mon Sep 17 00:00:00 2001 From: sppham Date: Wed, 18 Jan 2023 16:51:15 +0900 Subject: [PATCH 13/27] Update model.py --- model.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/model.py b/model.py index e8da9fe..b3b7b58 100644 --- a/model.py +++ b/model.py @@ -42,3 +42,10 @@ def ASPP(inputs): #y3 = reshape(y3, y3.shape[1:]) y3 = Attention()([y3, y3]) + """ 3x3 conv rate=12 """ + y4 = Conv2D(256, 3, padding="same", use_bias=False, dilation_rate=12)(inputs) + y4 = BatchNormalization()(y4) + y4 = Activation("relu")(y4) + #y4 = reshape(y4, y4.shape[1:]) + y4 = Attention()([y4, y4]) + From dd40bff9eb4ae16fd4884110bcea503b359d4a84 Mon Sep 17 00:00:00 2001 From: sppham Date: Wed, 18 Jan 2023 16:51:32 +0900 Subject: [PATCH 14/27] Update model.py --- model.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/model.py b/model.py index b3b7b58..d65a721 100644 --- a/model.py +++ b/model.py @@ -49,3 +49,12 @@ def ASPP(inputs): #y4 = reshape(y4, y4.shape[1:]) y4 = Attention()([y4, y4]) + + """ 3x3 conv rate=18 """ + y5 = Conv2D(256, 3, padding="same", use_bias=False, dilation_rate=18)(inputs) + y5 = BatchNormalization()(y5) + y5 = Activation("relu")(y5) + #y5 = reshape(y5, y5.shape[1:]) + y5 = Attention()([y5, y5]) + + From c5f39532576f6c2af753e37296588ea293f9ba23 Mon Sep 17 00:00:00 2001 From: sppham Date: Wed, 18 Jan 2023 16:51:47 +0900 Subject: [PATCH 15/27] Update model.py --- model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/model.py b/model.py index d65a721..d3c70dd 100644 --- a/model.py +++ b/model.py @@ -58,3 +58,9 @@ def ASPP(inputs): y5 = Attention()([y5, y5]) + y = Concatenate()([y1, y2, y3, y4, y5]) + y = Conv2D(256, 1, padding="same", use_bias=False)(y) + y = BatchNormalization()(y) + y = Activation("relu")(y) + + return y From 5a8ed60a0fc2e7d89b9d87555cb73c68d4bf7190 Mon Sep 17 00:00:00 2001 From: sppham Date: Thu, 19 Jan 2023 14:02:40 +0900 Subject: [PATCH 16/27] Delete README.md --- README.md | 169 ------------------------------------------------------ 1 file changed, 169 deletions(-) delete mode 100644 README.md diff --git a/README.md b/README.md deleted file mode 100644 index 1db2488..0000000 --- a/README.md +++ /dev/null @@ -1,169 +0,0 @@ -# Medical Multimedia Task: Transparent Tracking of Spermatozoa - -**See the [MediaEval 2022 webpage](https://multimediaeval.github.io/editions/2022/) for information on how to register and participate.* - -**Development dataset is avaiable now [\[Download link 1 - Kaggle\]](https://www.kaggle.com/datasets/vlbthambawita/visemtracking) [\[Download link 2 - Simula-dataset\]](https://datasets.simula.no/visem-tracking/)* - -**Testing dataset is avaiable now [\[Download link\]](https://www.dropbox.com/sh/2ohitza5ouzh2d3/AAD_8VnvdhPqOVlCcAn21Uc8a?dl=0)* - -### Task Description -The 2022 Medico task tackles the challenge of tracking sperm cells of video recordings of spermatozoa. The development dataset contains 20 videos, each one is 30 seconds long, a set of sperm characteristics (hormones, fatty acids data, etc.), frame-by-frame bounding box annotations, some anonymized study participants-related data, and motility and morphology data following the WHO guidelines. The goal is to encourage task participants to track individual sperms in real-time and combine different data sources to predict common measurements used for sperm quality assessment, specifically the motility (movement) spermatozoa (living sperm). - -We hope that this task will encourage the multimedia community to aid in the development of computer-assisted reproductive health and discover new and clever ways of analyzing multimodal datasets. In addition to good analysis performance, an important aspect is also the efficiency of the algorithms due to the fact that the assessment of the sperm is performed in real-time and therefore requires real-time feedback. - -For the task, we will provide a dataset of videos and other data from 20 different patients. Based on this data, the participants will be asked to address the following four subtasks: - -* *Subtask 1: Sperm cell tracking* is real-time tracking of sperm cells in a given sperm videos. Tracking should be performed by predicting bounding box coordinates with the similar format to the bounding box coordinates provided with the development datasets. In this task, models should track sperm in each frame of a provided video in real-time. Therefore, frames per second is a important factor to measure. - -* *Subtask 2: Prediction of motility* in terms of the percentage of progressive and non-progressive spermatozoa is the second task. The prediction needs to be performed sample wise resulting in one value per sample per predicted attribute. Sperm tracking or bounding boxes predicted in the task 1 are required to use to solve the task. Motility is the ability of an organism to move independently, and where a progressive spermatozoon is able to "move forward", a non-progressive would move in circles without any forward progression. - -* *Subtask 3: Catch and highlight* task focus on identifying fastest sperm cells with corresponding average speed and highest top speed. One specific challenge with this subtask is that the video also changes the view on the sample. This happens because the sample is moved below the microscope to observe the complete sample area. Therefore, the tracking has to be performed per viewpoint on the sample. (Optional Subtask.) - -* *Subtask 4: Explainability of predicitons* is perfomed in Subtasks 1 and/or 2 and/or 3 should be explained using machine learning explainable methods to convince domain experts about the final outputs. There is no any specific pre-requirements for this task. However, a report should be provided with explainable methods and corresponding results. (Optional Subtask.) - -For both Subtasks 2 and 3, task-participants are asked to perform video analysis over single frame analysis. This is important due to the fact that single frame-based analysis will not be able to catch the movement of the spermatozoa (motility) which contains important information to perform the predictions on Subtasks 2 and 3. - -### Motivation and background -Manual evaluation of a sperm sample using a microscope is time-consuming and requires costly experts who have extensive training. In addition, the validity of manual sperm analysis becomes unreliable due to limited reproducibility and high inter-personnel variations due to the complexity of tracking, identifying, and counting sperms in fresh samples. The existing computer-aided sperm analyzer systems are not working well enough for application in a real clinical setting due to unreliability caused by the consistency of the semen sample. Therefore, we need to research new methods for automated sperm analysis. - -### Target group -The task is of interest to researchers in the areas of machine learning (classification), visual content analysis and multimodal fusion. Overall, this task is intended to encourage the multimedia community to help improve the health care system through application of their knowledge and methods to reach the next level of computer and multimedia assisted diagnosis, detection and interpretation. - -### Data -The task uses the data set VISEM [2], which contains data from 85 male participants aged 18 years or older. For this task, we have selected only 30 seconds video clips from selected 20 videos. For each participant, we include a set of measurements from a standard semen analysis, a video of live spermatozoa, a sperm fatty acid profile, the fatty acid composition of serum phospholipids, study participants-related data, and WHO analysis data. The dataset contains 20 videos, with each video has 30 seconds duration with corresponding bounding box coordinates. Each video has a resolution of 640x480 and runs at 50 frames-per-second. The dataset contains in total six CSV files (five for data and one which maps video IDs to study participants' IDs), a description file, and folders containing the videos and bounding box data. The name of each video file contains the video's ID, the date it was recorded, and a small optional description. Then, the end of the filename contains the code of the person who assessed the video. Furthermore, VISEM contains five CSV files for each of the other data provided, a CSV file with the IDs linked to each video, and a text file containing * descriptions of some of the columns of the CSV files. One row in each CSV file represents a participant. The provided CSV files are: -* semen_analysis_data: The results of standard semen analysis. -* fatty_acids_spermatozoa: The levels of several fatty acids in the spermatozoa of the participants. -* fatty_acids_serum: The serum levels of the fatty acids of the phospholipids (measured from the blood of the participant). -* sex_hormones: The serum levels of sex hormones measured in the blood of the participants. -* study_participant_related_data: General information about the participants such as age, abstinence time, and Body Mass Index (BMI). -* videos: Overview of which video file belongs to what participant. - -All Study participants agreed to donate their data for the purpose of science and provided the necessary consent for us to be able to distribute the data (checked and approved by the Norwegian data authority and ethical committee). - -### Evaluation -Different evaluation criteria are used based on the task that is to be evaluated. For task 1, we allow for detection and tracking predictions, where we evaluate using standard tracking metrics like HOTA and detection metrics like recall and precision. The scripts used to evaluate the submissions are based on TrackEval (https://github.com/JonathonLuiten/TrackEval), which supports the evaluation of several benchmarks with multiple metrics. The scripts used can be found under the `evaluation` directory in this repository. More information about the specific metrics can be found in the TrackEval repository: https://github.com/JonathonLuiten/TrackEval. - -### Test data downlod link -The prediction of this test dataset should be uploaded using the following submission form. -[Test data download link](https://www.dropbox.com/sh/2ohitza5ouzh2d3/AAD_8VnvdhPqOVlCcAn21Uc8a?dl=0) - -### Submission instructions - -#### Sub-task 1: - -If you are interested in submitting only for detecting sperm in individual frames, then your submission file should be matched to the provided ground truth format (YOLO format). You have to follow the similar file structure of the dataset. Check the folder structure in [https://www.kaggle.com/datasets/vlbthambawita/visemtracking](https://www.kaggle.com/datasets/vlbthambawita/visemtracking). A sample .txt file is below. - -``` -source_code - |- code_and_checkpoints - |- README.txt (must explain how to run your model to detect sperms on a new video) - |- run.sh (shell script file to run your models for new video inputs (.mp4)) -predictions - |- - |- labels - |-