Skip to content

Commit 17bcbfb

Browse files
Update fafb data version to 783 (#93)
* - update fafb data to v783 - retire SVD embeddings and morphology clusters * ingest consolidated cell types * fix tests
1 parent 785ebf7 commit 17bcbfb

27 files changed

Lines changed: 367 additions & 1395 deletions

README.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,23 @@ We recommend using an environment manager such as [Poetry](https://python-poetry
1717

1818
```sh
1919
poetry install
20-
poetry shell
2120
```
2221

2322
### Download and initialize the FlyWire connectome data (initially or upon version updates)
2423
```bash
25-
./scripts/make_data.sh
24+
poetry run ./scripts/make_data.sh
2625
```
2726

2827
## Run service locally
2928

3029
```bash
31-
./scripts/run_local.sh
30+
poetry run ./scripts/run_local.sh
3231
```
3332

3433
To run in [Flask debug mode](https://flask.palletsprojects.com/en/2.2.x/debugging/#the-built-in-debugger)
3534

3635
```sh
37-
./scripts/run_local_dev.sh
36+
poetry run ./scripts/run_local_dev.sh
3837
```
3938

4039
Navigate to [localhost:5000](http://localhost:5000)
@@ -48,11 +47,11 @@ Run service locally and click around in all pages
4847
### Unit tests & code coverage (Required)
4948

5049
```sh
51-
./scripts/run_unit_tests.sh
50+
poetry run ./scripts/run_unit_tests.sh
5251
```
5352

5453
## Linting / code formatting
5554

5655
```sh
57-
./scripts/lint.sh
56+
poetry run ./scripts/lint.sh
5857
```

codex/blueprints/app.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@ def stats():
104104
searched_for_root_id=can_be_flywire_root_id(filter_string),
105105
# If num results is small enough to pass to browser, pass it to allow copying root IDs to clipboard.
106106
# Otherwise it will be available as downloadable file.
107-
root_ids_str=",".join([str(ddi) for ddi in filtered_root_id_list])
108-
if len(filtered_root_id_list) <= MAX_NEURONS_FOR_DOWNLOAD
109-
else [],
107+
root_ids_str=(
108+
",".join([str(ddi) for ddi in filtered_root_id_list])
109+
if len(filtered_root_id_list) <= MAX_NEURONS_FOR_DOWNLOAD
110+
else []
111+
),
110112
filter_string=filter_string,
111113
hint=hint,
112114
data_versions=DATA_SNAPSHOT_VERSION_DESCRIPTIONS,
@@ -205,7 +207,6 @@ def render_neuron_list(
205207
"class",
206208
"sub_class",
207209
"cell_type",
208-
"hemibrain_type",
209210
"hemilineage",
210211
"nt_type",
211212
"nerve",
@@ -227,9 +228,11 @@ def render_neuron_list(
227228
skeleton_thumbnail_urls=skeleton_thumbnail_urls,
228229
# If num results is small enough to pass to browser, pass it to allow copying root IDs to clipboard.
229230
# Otherwise it will be available as downloadable file.
230-
root_ids_str=",".join([str(ddi) for ddi in sorted_search_result_root_ids])
231-
if len(sorted_search_result_root_ids) <= MAX_NEURONS_FOR_DOWNLOAD
232-
else [],
231+
root_ids_str=(
232+
",".join([str(ddi) for ddi in sorted_search_result_root_ids])
233+
if len(sorted_search_result_root_ids) <= MAX_NEURONS_FOR_DOWNLOAD
234+
else []
235+
),
233236
num_items=len(sorted_search_result_root_ids),
234237
searched_for_root_id=can_be_flywire_root_id(filter_string),
235238
pagination_info=pagination_info,
@@ -256,11 +259,6 @@ def render_neuron_list(
256259
page_ids=page_ids,
257260
all_ids=sorted_search_result_root_ids,
258261
),
259-
non_uniform_hemibrain_types=neuron_db.non_uniform_values(
260-
list_attr_key="hemibrain_type",
261-
page_ids=page_ids,
262-
all_ids=sorted_search_result_root_ids,
263-
),
264262
)
265263

266264

@@ -280,9 +278,6 @@ def _search_and_sort():
280278
output_sets=neuron_db.output_sets(),
281279
label_count_getter=lambda x: len(neuron_db.get_neuron_data(x)["label"]),
282280
nt_type_getter=lambda x: neuron_db.get_neuron_data(x)["nt_type"],
283-
morphology_cluster_getter=lambda x: neuron_db.get_neuron_data(x)[
284-
"morphology_cluster"
285-
],
286281
synapse_neuropil_count_getter=lambda x: len(
287282
neuron_db.get_neuron_data(x)["input_neuropils"]
288283
)
@@ -292,7 +287,6 @@ def _search_and_sort():
292287
+ len(neuron_db.input_sets()[x]),
293288
similar_shape_cells_getter=neuron_db.get_similar_shape_cells,
294289
similar_connectivity_cells_getter=neuron_db.get_similar_connectivity_cells,
295-
similar_embedding_cells_getter=neuron_db.get_similar_embedding_cells,
296290
connections_getter=lambda x: neuron_db.cell_connections(x),
297291
sort_by=sort_by,
298292
)
@@ -377,15 +371,12 @@ def download_search_results():
377371
"class",
378372
"sub_class",
379373
"cell_type",
380-
"hemibrain_type",
381374
"hemilineage",
382375
"nerve",
383376
"connectivity_tag",
384377
"side",
385378
"input_synapses",
386379
"output_synapses",
387-
"morphology_cluster",
388-
"connectivity_cluster",
389380
]
390381
data = [cols]
391382
for i in sorted_search_result_root_ids:
@@ -702,9 +693,9 @@ def path_length():
702693
from_root_id = int(r[0])
703694
for j, val in enumerate(r):
704695
if j == 0:
705-
r[
706-
j
707-
] = f'<a href="{url_for("app.search", filter_string="id == " + str(from_root_id))}">{neuron_db.get_neuron_data(from_root_id)["name"]}</a><br><small>{from_root_id}</small>'
696+
r[j] = (
697+
f'<a href="{url_for("app.search", filter_string="id == " + str(from_root_id))}">{neuron_db.get_neuron_data(from_root_id)["name"]}</a><br><small>{from_root_id}</small>'
698+
)
708699
elif val > 0:
709700
to_root_id = int(matrix[0][j])
710701
if not min_syn_count:

codex/data/catalog.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,18 @@
1212
"ach_avg",
1313
"oct_avg",
1414
],
15-
# LR matching
16-
"lr_matching": [
17-
"left_root_id",
18-
"right_root_id",
19-
],
2015
# one row per id
2116
"classification": [
2217
"root_id",
2318
"flow",
2419
"super_class",
2520
"class",
2621
"sub_class",
27-
"cell_type",
28-
"hemibrain_type",
2922
"hemilineage",
3023
"side",
3124
"nerve",
3225
],
26+
"cell_types": ["root_id", "primary_type", "additional_type(s)"],
3327
# one row per id
3428
"cell_stats": [
3529
"root_id",
@@ -73,25 +67,21 @@
7367
"root_id",
7468
"scores",
7569
],
76-
# one row (or none) per id
77-
"morphology_clusters": ["root_id", "morphology_cluster"],
78-
# one row (or none) per id
79-
"connectivity_clusters": ["root_id", "connectivity_cluster"],
8070
}
8171

8272

8373
def get_neurons_file_columns():
8474
return list(_CODEX_DATA_SCHEMA["neurons"])
8575

8676

87-
def get_lr_matching_file_columns():
88-
return list(_CODEX_DATA_SCHEMA["lr_matching"])
89-
90-
9177
def get_classification_file_columns():
9278
return list(_CODEX_DATA_SCHEMA["classification"])
9379

9480

81+
def get_cell_types_file_columns():
82+
return list(_CODEX_DATA_SCHEMA["cell_types"])
83+
84+
9585
def get_cell_stats_file_columns():
9686
return list(_CODEX_DATA_SCHEMA["cell_stats"])
9787

@@ -114,11 +104,3 @@ def get_nblast_file_columns():
114104

115105
def get_connectivity_tags_file_columns():
116106
return list(_CODEX_DATA_SCHEMA["connectivity_tags"])
117-
118-
119-
def get_morphology_clusters_columns():
120-
return list(_CODEX_DATA_SCHEMA["morphology_clusters"])
121-
122-
123-
def get_connectivity_clusters_columns():
124-
return list(_CODEX_DATA_SCHEMA["connectivity_clusters"])

codex/data/local_data_loader.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import gzip
44
import os
55
import pickle
6-
from datetime import datetime
6+
from datetime import datetime, UTC
77

88
from codex.data.neuron_data_initializer import (
99
initialize_neuron_data,
@@ -17,23 +17,20 @@
1717
DATA_ROOT_PATH = "static/data"
1818
NEURON_FILE_NAME = "neurons.csv.gz"
1919
CLASSIFICATION_FILE_NAME = "classification.csv.gz"
20+
CONSOLIDATED_CELL_TYPES_FILE_NAME = "consolidated_cell_types.csv.gz"
2021
CELL_STATS_ROWS = "cell_stats.csv.gz"
2122
CONNECTIONS_FILE_NAME = "connections.csv.gz"
2223
LABELS_FILE_NAME = "labels.csv.gz"
2324
COORDINATES_FILE_NAME = "coordinates.csv.gz"
2425
NBLAST_FILE_NAME = "nblast.csv.gz"
2526
CONNECTIVITY_TAGS_FILE_NAME = "connectivity_tags.csv.gz"
26-
MORPHOLOGY_CLUSTERS_FILE_NAME = "morphology_clusters.csv.gz"
27-
CONNECTIVITY_CLUSTERS_FILE_NAME = "connectivity_clusters.csv.gz"
28-
SVD_FILE_NAME = "svd.csv.gz"
29-
LR_MATCHING_FILE_NAME = "lr_matching.csv.gz"
3027

3128

3229
NEURON_DB_PICKLE_FILE_NAME = "neuron_db.pickle.gz"
3330

34-
GCS_PICKLE_URL_TEMPLATE = "https://storage.googleapis.com/flywire-data/codex/data/{version}/neuron_db.pickle.gz"
31+
GCS_PICKLE_URL_TEMPLATE = "https://storage.googleapis.com/flywire-data/codex/data/fafb/{version}/neuron_db.pickle.gz"
3532
GCS_RAW_DATA_URL_TEMPLATE = (
36-
"https://storage.googleapis.com/flywire-data/codex/data/{version}/{filename}"
33+
"https://storage.googleapis.com/flywire-data/codex/data/fafb/{version}/{filename}"
3734
)
3835

3936

@@ -67,8 +64,8 @@ def _read_data(filename, with_timestamp=False):
6764
if os.path.exists(fname):
6865
rows = read_csv(fname)
6966
if with_timestamp:
70-
return rows, datetime.utcfromtimestamp(
71-
os.path.getmtime(fname)
67+
return rows, datetime.fromtimestamp(
68+
os.path.getmtime(fname), UTC
7269
).strftime("%Y-%m-%d")
7370
else:
7471
return rows
@@ -80,6 +77,7 @@ def _read_data(filename, with_timestamp=False):
8077

8178
neuron_rows = _read_data(NEURON_FILE_NAME)
8279
classification_rows = _read_data(CLASSIFICATION_FILE_NAME)
80+
cell_type_rows = _read_data(CONSOLIDATED_CELL_TYPES_FILE_NAME)
8381
cell_stats_rows = _read_data(CELL_STATS_ROWS)
8482
connection_rows = _read_data(CONNECTIONS_FILE_NAME)
8583
label_rows, labels_file_timestamp = _read_data(
@@ -88,10 +86,6 @@ def _read_data(filename, with_timestamp=False):
8886
coordinate_rows = _read_data(COORDINATES_FILE_NAME)
8987
nblast_rows = _read_data(NBLAST_FILE_NAME)
9088
connectivity_tag_rows = _read_data(CONNECTIVITY_TAGS_FILE_NAME)
91-
morphology_cluster_rows = _read_data(MORPHOLOGY_CLUSTERS_FILE_NAME)
92-
connectivity_cluster_rows = _read_data(CONNECTIVITY_CLUSTERS_FILE_NAME)
93-
svd_rows = _read_data(SVD_FILE_NAME)
94-
lr_matching_rows = _read_data(LR_MATCHING_FILE_NAME)
9589

9690
print(
9791
f" loading data from {data_file_path}:\n"
@@ -104,23 +98,21 @@ def _read_data(filename, with_timestamp=False):
10498
neuron_db = initialize_neuron_data(
10599
neuron_file_rows=neuron_rows,
106100
classification_rows=classification_rows,
101+
cell_type_rows=cell_type_rows,
107102
cell_stats_rows=cell_stats_rows,
108103
connection_rows=connection_rows,
109104
label_rows=label_rows,
110105
labels_file_timestamp=labels_file_timestamp,
111106
coordinate_rows=coordinate_rows,
112107
nblast_rows=nblast_rows,
113108
connectivity_tag_rows=connectivity_tag_rows,
114-
morphology_cluster_rows=morphology_cluster_rows,
115-
connectivity_cluster_rows=connectivity_cluster_rows,
116-
svd_rows=svd_rows,
117-
lr_matching_rows=lr_matching_rows,
118109
)
119110
# free mem
120111
del neuron_rows
121112
del connection_rows
122113
del label_rows
123114
del coordinate_rows
115+
del cell_type_rows
124116
return neuron_db
125117

126118

0 commit comments

Comments
 (0)