Skip to content

Commit f631d72

Browse files
Merge pull request #22 from RIVM-bioinformatics/dev
Use AminoExtract 0.4.1
2 parents 9593eb9 + 369d152 commit f631d72

12 files changed

Lines changed: 189 additions & 106 deletions

File tree

.github/workflows/release.yml

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,32 @@ on:
44
branches:
55
- main
66

7+
permissions:
8+
contents: write
9+
pull-requests: write
10+
711
jobs:
812
bump-version:
913
name: Release version
1014
runs-on: ubuntu-latest
1115

1216
steps:
13-
- uses: GoogleCloudPlatform/release-please-action@v2
17+
- uses: GoogleCloudPlatform/release-please-action@v4
1418
id: release
1519
with:
1620
release-type: python
17-
package-name: TrueConsense
21+
token: ${{ secrets.RELEASE_TOKEN }}
22+
config-file: release-please-config.yaml
23+
manifest-file: .release-please-manifest.json
1824

1925
update-docs:
2026
needs: bump-version
2127
name: Update docs
2228
runs-on: ubuntu-latest
23-
if: "contains(github.event.head_commit.message, 'chore:')"
29+
if: contains(github.event.head_commit.message, 'chore(main):')
2430
steps:
2531
- name: Checkout main
26-
uses: actions/checkout@v2
32+
uses: actions/checkout@v4
2733
with:
2834
fetch-depth: 0
2935

@@ -38,17 +44,15 @@ jobs:
3844
run: |
3945
git config --global user.name "Github Actions"
4046
git config --global user.email '41898282+github-actions[bot]@users.noreply.github.com'
41-
4247
- name: Setup Python
43-
uses: actions/setup-python@v2
48+
uses: actions/setup-python@v5
4449
with:
45-
python-version: '3.7'
50+
python-version: '3.11'
4651

4752
- name: Install dependencies
4853
run: |
4954
python -m pip install --upgrade pip
5055
pip install -r docs-requirements.txt
51-
5256
- name: Publish docs
5357
run: |
54-
mike deploy --config-file mkdocs.yml --push --force --update-aliases $(git tag --sort=committerdate | tail -1 | sed 's/v//') latest
58+
mike deploy --config-file mkdocs.yml --push --update-aliases $(git tag --sort=committerdate | tail -1 | sed 's/v//') latest

.github/workflows/sync.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,32 @@ on:
44
branches:
55
- main
66

7+
permissions:
8+
contents: write
9+
pull-requests: write
10+
711
jobs:
812
sync-branches:
913
runs-on: ubuntu-latest
1014
name: Syncing branches
11-
if: "contains(github.event.head_commit.message, 'chore:')"
15+
# if: contains(github.event.head_commit.message, 'chore(main):')
1216

1317
steps:
1418
- name: Checkout
15-
uses: actions/checkout@v2
19+
uses: actions/checkout@v4
1620

1721
- name: Set up Node
18-
uses: actions/setup-node@v1
22+
uses: actions/setup-node@v4
1923
with:
20-
node-version: 12
24+
node-version: 20
2125

26+
# note: this is a fork of the original sync-branches action which seems to be abandoned. We might need a better solution in the future
2227
- name: Opening pull request
2328
id: pull
24-
uses: tretuna/sync-branches@1.4.0
29+
uses: jdtx0/branch-sync@v1.5.1
2530
with:
2631
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
2732
FROM_BRANCH: "main"
2833
TO_BRANCH: "dev"
2934
PULL_REQUEST_TITLE: "chore: sync main to dev"
35+
PULL_REQUEST_AUTO_MERGE_METHOD: "squash"

.release-please-manifest.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
".": "0.5.1"
3+
}

TrueConsense/Outputs.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .Events import ListInserts
99
from .indexing import Readbam
1010
from .Sequences import BuildConsensus
11-
11+
from AminoExtract.gff_data import GFFColumns
1212

1313
def WriteGFF(gffheader, gffdict, output_gff, name):
1414
"""Function takes a GFF header, a dictionary of GFF features, an output directory, and a name for
@@ -26,16 +26,49 @@ def WriteGFF(gffheader, gffdict, output_gff, name):
2626
the name of the file you want to write
2727
2828
"""
29+
cols = GFFColumns.get_names()
30+
cols_without_attr = [col for col in cols if col != "attributes"]
31+
32+
def combine_dict_into_attributes(input_dict: dict[str, str]) -> str:
33+
attribute_dict = {}
34+
for k, v in input_dict.items():
35+
if k == "attributes":
36+
for attribute in v.split(";"):
37+
if attribute == "":
38+
continue
39+
key, value = attribute.split("=")
40+
attribute_dict[key] = value
41+
else:
42+
attribute_dict[k] = v
43+
44+
return ";".join(f"{k}={v}" for k, v in attribute_dict.items())
45+
46+
def clean_dict(input_dict: dict[str, str]) -> dict[str, str]:
47+
clean_dict = {}
48+
attribute_dict = {}
49+
for k, v in input_dict.items():
50+
if str(k).lower() not in cols_without_attr:
51+
attribute_dict[str(k).lower()] = str(v)
52+
else:
53+
clean_dict[str(k).lower()] = str(v)
54+
55+
attribute_str = combine_dict_into_attributes(attribute_dict)
56+
clean_dict["attributes"] = attribute_str
57+
assert list(clean_dict.keys()) == cols
58+
return clean_dict
59+
60+
61+
# the gffdict will have 0, 1, 2, etc as keys, for each line in the GFF file
62+
# the values will be dictionaries containing the GFF columns for that line, with a lot of additional columns
63+
# these additional columns will all be forced into the attributes column
64+
2965
with open(output_gff, "w") as out:
30-
out.write(gffheader)
66+
out.write(gffheader.raw_text)
67+
3168

32-
for k, v in gffdict.items():
33-
for nk, nv in v.items():
34-
if str(nk) == str(list(v.keys())[-1]):
35-
out.write(str(nv))
36-
else:
37-
out.write(str(nv) + "\t")
38-
out.write("\n")
69+
for line_number, gff_data in gffdict.items():
70+
cleaned_data = clean_dict(gff_data)
71+
out.write("\t".join([str(v) for v in cleaned_data.values()]) + "\n")
3972

4073

4174
def WriteOutputs(
@@ -65,6 +98,7 @@ def WriteOutputs(
6598
)[0]
6699

67100
if output_gff is not None:
101+
68102
WriteGFF(gffheader, newgff, output_gff, name)
69103

70104
if output_vcf is not None:

TrueConsense/Sequences.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,15 @@ def _orf_codonposition(gffdict, p):
102102
end = gffdict[k].get("end") + 1
103103

104104
if p in range(start, end):
105-
a.append(str(gffdict[k].get("attributes").split(";")[1].split("=")[-1]))
106-
a.append((p - start) % 3)
105+
# attributes itself can also be "", so both no attributes and empty attributes are skipped
106+
attr = gffdict[k].get("attributes", "")
107+
if not attr:
108+
continue
109+
split_attr = attr.split(";")
110+
if len(split_attr) < 2:
111+
a.extend((str(attr.split(";")[0].split("=")[-1]), (p - start) % 3))
112+
continue
113+
a.extend((str(attr.split(";")[1].split("=")[-1]), (p - start) % 3))
107114
if a:
108115
return tuple(a)
109116
return None, None

TrueConsense/TrueConsense.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -209,53 +209,56 @@ def check_index_override(fname):
209209
return args
210210

211211

212-
def main():
213-
if len(sys.argv[1:]) < 1:
212+
def main(args: list[str] | None = None):
213+
if not args:
214+
args = sys.argv[1:]
215+
216+
if len(args) < 1:
214217
print(
215218
"TrueConsense was called but no arguments were given, please try again.\nUse 'TrueConsense -h' to see the help document"
216219
)
217220
sys.exit(1)
218-
args = GetArgs(sys.argv[1:])
221+
parsed_args = GetArgs(args)
219222

220-
bam = Readbam(args.input)
223+
bam = Readbam(parsed_args.input)
221224

222-
with cf.ThreadPoolExecutor(max_workers=args.threads) as xc:
223-
IndexDF = xc.submit(BuildIndex, args.input, args.reference)
224-
IndexGff = xc.submit(Gffindex, args.features)
225+
with cf.ThreadPoolExecutor(max_workers=parsed_args.threads) as xc:
226+
IndexDF = xc.submit(BuildIndex, parsed_args.input, parsed_args.reference)
227+
IndexGff = xc.submit(Gffindex, parsed_args.features)
225228

226229
IndexDF = IndexDF.result()
227230
IndexGff = IndexGff.result()
228231

229-
if args.index_override:
232+
if parsed_args.index_override:
230233
IndexDF = Override_index_positions(
231-
IndexDF, read_override_index(args.index_override)
234+
IndexDF, read_override_index(parsed_args.index_override)
232235
)
233236

234237
indexDict = IndexDF.to_dict("index")
235238
GffHeader = IndexGff.header
236239
GffDF = IndexGff.df
237-
GffDF["seq_id"] = args.samplename
240+
GffDF["seqid"] = parsed_args.samplename
238241
GffDict = GffDF.to_dict("index")
239242

240-
with cf.ThreadPoolExecutor(max_workers=args.threads) as xc:
241-
if args.depth_of_coverage is not None:
242-
xc.submit(BuildCoverage, indexDict, args.depth_of_coverage)
243+
with cf.ThreadPoolExecutor(max_workers=parsed_args.threads) as xc:
244+
if parsed_args.depth_of_coverage is not None:
245+
xc.submit(BuildCoverage, indexDict, parsed_args.depth_of_coverage)
243246

244-
if args.noambiguity is False:
247+
if parsed_args.noambiguity is False:
245248
IncludeAmbig = True
246-
elif args.noambiguity is True:
249+
elif parsed_args.noambiguity is True:
247250
IncludeAmbig = False
248251

249252
WriteOutputs(
250-
args.coverage_level,
253+
parsed_args.coverage_level,
251254
indexDict,
252255
GffDict,
253-
args.input,
256+
parsed_args.input,
254257
IncludeAmbig,
255-
args.variants,
256-
args.samplename,
257-
args.reference,
258-
args.output_gff,
258+
parsed_args.variants,
259+
parsed_args.samplename,
260+
parsed_args.reference,
261+
parsed_args.output_gff,
259262
GffHeader,
260-
args.output,
263+
parsed_args.output,
261264
)

TrueConsense/indexing.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import gffpandas.gffpandas as gffpd
1+
from AminoExtract import SequenceReader, GFFDataFrame
22
import pandas as pd
33
import pysam
44

@@ -19,7 +19,7 @@ def Readbam(f):
1919
return pysam.AlignmentFile(f, "rb")
2020

2121

22-
def Gffindex(file):
22+
def Gffindex(file: str) -> GFFDataFrame:
2323
"""Reads in a GFF3 file and returns a pandas dataframe
2424
2525
Parameters
@@ -32,7 +32,8 @@ def Gffindex(file):
3232
A dataframe
3333
3434
"""
35-
return gffpd.read_gff3(file)
35+
reader = SequenceReader(logger=None)
36+
return reader.read_gff(file)
3637

3738

3839
def read_override_index(f):

docs/installation.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,17 @@ TrueConsense is only available on Linux (or Linux-based) operating systems. MacO
55
TrueConsense will be made available for installation through Conda and Pip. However, this is currently not yet available.
66
We will update these docs when installation through Conda and/or pip is available.
77

8-
## Prerequisistes
8+
## Prerequisites
99

10-
TrueConsense requires Python 3.7 or later to be installed on your system (or in an environment).
10+
TrueConsense requires Python 3.10 or later to be installed on your system (or in an environment).
1111

1212
Other dependencies will be installed during the installation, your don't have to install them manually. These extra dependencies are as follows:
1313

14-
* pysam<0.16
15-
* pandas>=1.2.3
16-
* gffpandas>=1.2.0
17-
* parmap>=1.5.2
18-
* tqdm>=4.59.0
19-
* biopython>=1.78
14+
* pysam==0.23.3
15+
* pandas==2.3.*
16+
* tqdm==4.59.*
17+
* biopython==1.85
18+
* aminoextract==0.4.1
2019

2120
We strongly advise you to use a conda environment (or similar) to make sure there won't be any conflicts in package dependencies.
2221

env.yml

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,11 @@ name: TrueConsense
22
channels:
33
- bioconda
44
- conda-forge
5-
- intel
6-
- anaconda
7-
- defaults
85
dependencies:
9-
- python=3.7
10-
- pandas>=1.2.3
11-
- pysam<0.16
12-
- biopython>=1.78
6+
- python>=3.10
7+
- pandas==2.3.*
8+
- pysam==0.23.3
9+
- biopython==1.85
1310
- tqdm==4.59.0
14-
- biopython>=1.78
15-
- pip
16-
- pip:
17-
- gffpandas>=1.2
11+
- aminoextract==0.4.1
12+
- pytest==8.4.*

0 commit comments

Comments
 (0)