ncsa
diff --git a/‎ChangeLog.md‎
Lines changed: 6 additions & 0 deletions b/‎ChangeLog.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 8 deletions b/‎README.md‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎environment.yml‎
Lines changed: 1 addition & 1 deletion b/‎environment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎neat/common/io.py‎
Lines changed: 1 addition & 1 deletion b/‎neat/common/io.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎neat/models/error_models.py‎
Lines changed: 2 additions & 2 deletions b/‎neat/models/error_models.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎neat/models/variant_models.py‎
Lines changed: 9 additions & 4 deletions b/‎neat/models/variant_models.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎neat/read_simulator/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎neat/read_simulator/__init__.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎neat/read_simulator/parallel_runner.py‎
Lines changed: 0 additions & 116 deletions b/‎neat/read_simulator/parallel_runner.py‎
Lines changed: 0 additions & 116 deletions
@@ -1,6 +1,12 @@
 # NEAT has a new home
 NEAT is now a part of the NCSA github and active development will continue here. Please direct issues, comments, and requests to the NCSA issue tracker. Submit pull requests here insead of the old repo.
 
+# NEAT v4.3.2
+- Bug fixes for parallel processing, which was causing some of the headers to be printed incorrectly. To fix that, we had to rewrite a bunch of the code and integrate parallelism more directly into NEAT.
+
+# NEAT v4.3.1
+- Bug fixes (see issue #160) having to do with output files.
+
 # NEAT v4.3.1
 - Updated parallel module to integrate it into the code more fluidly. We also updated the options section to revise the process and allow for copying of options objects for parallelism run.
 
 
@@ -138,15 +138,15 @@ The default is given:
 `mutation_bed`: full path to a list of regions with a column describing the mutation rate of that region, as a float with values between 0 and 0.3. The mutation rate must be in the third column as, e.g., mut_rate=0.00.    
 `rng_seed`: Manually enter a seed for the random number generator. Used for repeating runs. _Must be an integer._    
 `min_mutations`: Set the minimum number of mutations that NEAT should add, per contig. _Default is 0._ We recommend setting this to at least one for small chromosomes, so NEAT will produce at least one mutation per contig.
-'threads': Number of threads to use. More than 1 will activate parallel mode and perform part of the calclutations in parallel then recombine into the desired output files.
-'parallel_mode': 'size' or 'contig' whether to divide the contigs into blocks or just by contig. By contig is the default, try by size. Varying the parallel_block_size parameter may help if default values are not sufficient.
-'parallel_block_size': Default value of 500,000.
-'cleanup_splits': If running more than one simulation on the same input fasta, you can reuse splits files. By default, this will be set to False, and splits files will be deleted at the end of the run.
-'reuse_splits': If an existing splits file exists in the output folder, it will use those splits, if this value is set to True.
+`threads`: Number of threads to use. More than 1 will activate parallel mode and perform part of the calclutations in parallel then recombine into the desired output files.
+`parallel_mode`: 'size' or 'contig' whether to divide the contigs into blocks or just by contig. By contig is the default, try by size. Varying the parallel_block_size parameter may help if default values are not sufficient.
+`parallel_block_size`: Default value of 500,000.
+`cleanup_splits`: If running more than one simulation on the same input fasta, you can reuse splits files. By default, this will be set to False, and splits files will be deleted at the end of the run.
+`reuse_splits`: If an existing splits file exists in the output folder, it will use those splits, if this value is set to True.
 
 The command line options for NEAT are as follows:
 
-Universal options can be applied to any subfunction. The commands should come before the function name (e.g., neat --log-level DEBUG read-simulator ...), excetp -h or --help, which can appear anywhere in the command.
+Universal options can be applied to any subfunction. The commands should come before the function name (e.g., neat --log-level DEBUG read-simulator ...), except -h or --help, which can appear anywhere in the command.
 | Universal Options   | Description                          |
 |---------------------|--------------------------------------|
 | -h, --help          | Displays usage information           |
@@ -161,7 +161,7 @@ read-simulator command line options
 |---------------------|-------------------------------------|
 | -c VALUE, --config VALUE | The VALUE should be the name of the config file to use for this run |
 | -o OUTPUT_DIR, --output_dir OUTPUT_DIR | The path to the directory to write the output files |
-| -p PREFIX, --prefix PREFIX | The prefix for file names |
+| -p PREFIX, --prefix String | The prefix for file names |
 
 ## Functionality
 
@@ -188,7 +188,7 @@ Features:
 
 ## Examples
 
-The following commands are examples for common types of data to be generated. The simulation uses a reference genome in fasta format to generate reads of 126 bases with default 10X coverage. Outputs paired fastq files, a BAM file and a VCF file. The random variants inserted into the sequence will be present in the VCF and all of the reads will show their proper alignment in the BAM. Unless specified, the simulator will also insert some "sequencing error" -- random variants in some reads that represents false positive results from sequencing.
+The following commands are examples for common types of data to be generated. The simulation uses a reference genome in fasta format to generate reads of 126 bases with default 10X coverage. Outputs paired fastq files, a BAM file and a VCF file. The random variants inserted into the sequence will be present in the VCF and the reads will show their proper alignment in the BAM. Unless specified, the simulator will also insert some "sequencing error" -- random variants in some reads that represents false positive results from sequencing.
 
 ### Whole genome simulation
 Simulate whole genome dataset with random variants inserted according to the default model. 
 
@@ -5,7 +5,7 @@ channels:
 
 dependencies:
   - python=3.10.*
-  - biopython=1.79
+  - biopython=1.85
   - pkginfo
   - matplotlib
   - numpy
 
@@ -63,7 +63,7 @@ def open_input(path: str | Path) -> Iterator[TextIO]:
     # - https://github.com/python/mypy/issues/12053
     open_: Callable[..., TextIO]
     if is_compressed(path):
-        open_ = gzip.open
+        open_ = bgzf.open
     else:
         open_ = open
     handle = open_(path, "rt", encoding="utf-8")
 
@@ -269,8 +269,8 @@ def __init__(self,
                  error_type: VariantTypes,
                  location: int,
                  length: int,
-                 ref: str or Seq,
-                 alt: str or Seq):
+                 ref: str | Seq,
+                 alt: str | Seq):
         self.error_type = error_type
         self.location = location
         self.length = length
 
@@ -2,7 +2,7 @@
 Classes for the variant models included in NEAT.
 Every Variant type in variants > variant_types must have a corresponding model in order to be fully implemented.
 """
-
+import pdb
 import re
 import logging
 import abc
@@ -78,7 +78,7 @@ class DeletionModel(VariantModel):
     _type = Deletion
     _description = "A deletion of a random number of bases"
 
-    def __init__(self, deletion_len_model: dict[int: float, ...]):
+    def __init__(self, deletion_len_model: dict[int, float, ...]):
         # Creating probabilities from the weights
         tot = sum(deletion_len_model.values())
         self.deletion_len_model = {key: val/tot for key, val in deletion_len_model.items()}
@@ -133,8 +133,8 @@ def __init__(
         self.trinuc_bias_map = None
 
         # Some local variables for modeling
-        self.local_trinuc_bias: np.array = None
-        self.local_sequence: Seq or None = None
+        self.local_trinuc_bias: np.ndarray | None = None
+        self.local_sequence: Seq | None = None
 
     def map_local_trinuc_bias(
             self,
@@ -163,7 +163,12 @@ def map_local_trinuc_bias(
                 # Update the map bias at the central position for that trinuc
                 for trinuc in ALL_TRINUCS:
                     for match in re.finditer(trinuc, str(sequence)):
+                        # match.start() + 1 puts us at the center of the trinuc
+                        if match.start() + 1 > len(self.local_trinuc_bias):
+                            print("???")
                         self.local_trinuc_bias[match.start() + 1] = self.trinuc_mutation_bias[TRINUC_IND[trinuc]]
+                        if len(self.local_trinuc_bias) != len(sequence):
+                            print("???")
 
             # Now we normalize the bias
             self.local_trinuc_bias = self.local_trinuc_bias / sum(self.local_trinuc_bias)
 
@@ -1,5 +1,4 @@
 """
 Modules to generate reads
 """
-from .runner import *
-from .parallel_runner import main
+from .runner import *