Skip to content

Commit c2e95de

Browse files
committed
Some bug fixes
1 parent 98f67a0 commit c2e95de

14 files changed

Lines changed: 12 additions & 9 deletions

crispr-ambiguous-mapping/crispr_ambiguous_mapping/processing/crispr_guide_counting.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from .crispr_count_processing import get_counterseries_all_results
2727
from ..quality_control.crispr_mapping_quality_control import perform_counts_quality_control
2828
from ..models.mapping_models import GeneralGuideCountType, GeneralMappingInferenceDict
29-
from ..models.mapping_models import AllMatchSetWhitelistReporterCounterSeriesResults, WhitelistReporterCountsResult, SampleWhitelistReporterCountsResult, InferenceResult, CountInput
29+
from ..models.mapping_models import AllMatchSetWhitelistReporterCounterSeriesResults, WhitelistReporterCountsResult, SampleWhitelistReporterCountsResult, InferenceResult, CountInput, QualityControlResult
3030

3131

3232
# TODO: There will probably be some type errors with the DefaultDict when testing on non UMI (since it requires CounterType), so make sure to test with different variations of inputs
@@ -164,7 +164,7 @@ def pad_series(series):
164164
# Some organization: Map the inferred result of each observed sequence to a dict with the inferred result and correspoding count
165165

166166
if contains_sample_barcode:
167-
observed_guide_reporter_umi_counts_inferred_all_samples: DefaultDict[str, GeneralMappingInferenceDict] = defaultdict(GeneralMappingInferenceDict)
167+
observed_guide_reporter_umi_counts_inferred_all_samples: DefaultDict[str, GeneralMappingInferenceDict] = defaultdict(lambda: defaultdict(dict))
168168
# Add all cell_barcodes
169169
for observed_guide_reporter_key_index, observed_guide_reporter_key in enumerate(observed_guide_reporter_list): # Iterate through each observed guide key
170170
observed_guide_reporter_cell_counts = observed_guide_reporter_umi_counts[observed_guide_reporter_key]
@@ -184,17 +184,20 @@ def pad_series(series):
184184

185185
# GET THE MAPPED COUNT SERIES BASED ON THE INFERENCE RESULTS
186186
print("Prepare the processed count series ")
187-
all_cell_barcodes: List[str] = observed_guide_reporter_umi_counts_inferred_all_samples.keys()
188-
all_match_set_whitelist_reporter_counter_series_results_all_samples: DefaultDict[str, AllMatchSetWhitelistReporterCounterSeriesResults]
189-
quality_control_result_all_samples: DefaultDict[str, GeneralMappingInferenceDict]
190-
for cell_barcode in all_cell_barcodes:
187+
all_cell_barcodes: List[str] = list(observed_guide_reporter_umi_counts_inferred_all_samples.keys())
188+
all_match_set_whitelist_reporter_counter_series_results_all_samples: DefaultDict[str, AllMatchSetWhitelistReporterCounterSeriesResults] = defaultdict(AllMatchSetWhitelistReporterCounterSeriesResults)
189+
quality_control_result_all_samples: DefaultDict[str, QualityControlResult] = defaultdict(QualityControlResult)
190+
for cell_barcode_i, cell_barcode in enumerate(all_cell_barcodes):
191191
observed_guide_reporter_umi_counts_inferred_per_sample = observed_guide_reporter_umi_counts_inferred_all_samples[cell_barcode]
192192
all_match_set_whitelist_reporter_counter_series_results_per_sample = get_counterseries_all_results(observed_guide_reporter_umi_counts_inferred_per_sample, whitelist_guide_reporter_df, contains_guide_barcode, contains_guide_surrogate, contains_guide_umi)
193-
quality_control_result_per_sample = perform_counts_quality_control(observed_guide_reporter_umi_counts_inferred_per_sample, contains_guide_umi, contains_guide_surrogate, contains_guide_barcode)
193+
quality_control_result_per_sample: QualityControlResult = perform_counts_quality_control(observed_guide_reporter_umi_counts_inferred_per_sample, contains_guide_umi, contains_guide_surrogate, contains_guide_barcode)
194194

195195
all_match_set_whitelist_reporter_counter_series_results_all_samples[cell_barcode] = all_match_set_whitelist_reporter_counter_series_results_per_sample
196196
quality_control_result_all_samples[cell_barcode] = quality_control_result_per_sample
197197

198+
if cell_barcode_i % 2500 == 0:
199+
print(f"- Processed cell_barcode {cell_barcode_i} out of {len(all_cell_barcodes)}")
200+
198201

199202
count_input= CountInput(whitelist_guide_reporter_df=whitelist_guide_reporter_df,
200203
contains_surrogate=contains_guide_surrogate,
@@ -232,7 +235,7 @@ def pad_series(series):
232235
print(f"{(after_counterseries_time-after_inference_processing_time).seconds} seconds for counter series generation")
233236

234237
print("Preparing quality control")
235-
quality_control_result = perform_counts_quality_control(observed_guide_reporter_umi_counts_inferred, contains_guide_umi, contains_guide_surrogate, contains_guide_barcode)
238+
quality_control_result: QualityControlResult = perform_counts_quality_control(observed_guide_reporter_umi_counts_inferred, contains_guide_umi, contains_guide_surrogate, contains_guide_barcode)
236239

237240
after_qualitycontrol_time = datetime.now()
238241
print(f"{(after_qualitycontrol_time-after_counterseries_time).seconds} seconds for quality control")
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)