Skip to content

Commit be06b20

Browse files
committed
Add ungapped-prefilter mode to colabfold_search
1 parent fe88237 commit be06b20

1 file changed

Lines changed: 15 additions & 1 deletion

File tree

colabfold/mmseqs/search.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def mmseqs_search_monomer(
4040
diff: int = 3000,
4141
qsc: float = -20.0,
4242
max_accept: int = 1000000,
43+
prefilter_mode: int = 0,
4344
s: float = 8,
4445
db_load_mode: int = 2,
4546
threads: int = 32,
@@ -86,10 +87,12 @@ def mmseqs_search_monomer(
8687
# fmt: off
8788
# @formatter:off
8889
search_param = ["--num-iterations", "3", "--db-load-mode", str(db_load_mode), "-a", "-e", "0.1", "--max-seqs", "10000"]
90+
search_param += ["--prefilter-mode", str(prefilter_mode)]
8991
if s is not None:
9092
search_param += ["-s", "{:.1f}".format(s)]
9193
else:
9294
search_param += ["--k-score", "'seq:96,prof:80'"]
95+
9396
filter_param = ["--filter-msa", str(filter), "--filter-min-enable", "1000", "--diff", str(diff), "--qid", "0.0,0.2,0.4,0.6,0.8,1.0", "--qsc", "0", "--max-seq-id", "0.95",]
9497
expand_param = ["--expansion-mode", "0", "-e", str(expand_eval), "--expand-filter-clusters", str(filter), "--max-seq-id", "0.95",]
9598

@@ -141,7 +144,7 @@ def mmseqs_search_monomer(
141144

142145
if use_templates:
143146
run_mmseqs(mmseqs, ["search", base.joinpath("prof_res"), dbbase.joinpath(template_db), base.joinpath("res_pdb"),
144-
base.joinpath("tmp2"), "--db-load-mode", str(db_load_mode), "--threads", str(threads), "-s", "7.5", "-a", "-e", "0.1"])
147+
base.joinpath("tmp2"), "--db-load-mode", str(db_load_mode), "--threads", str(threads), "-s", "7.5", "-a", "-e", "0.1", "--prefilter-mode", str(prefilter_mode)])
145148
run_mmseqs(mmseqs, ["convertalis", base.joinpath("prof_res"), dbbase.joinpath(f"{template_db}{dbSuffix3}"), base.joinpath("res_pdb"),
146149
base.joinpath(f"{template_db}"), "--format-output",
147150
"query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,cigar",
@@ -172,6 +175,7 @@ def mmseqs_search_pair(
172175
base: Path,
173176
uniref_db: Path = Path("uniref30_2302_db"),
174177
mmseqs: Path = Path("mmseqs"),
178+
prefilter_mode: int = 0,
175179
s: float = 8,
176180
threads: int = 64,
177181
db_load_mode: int = 2,
@@ -197,6 +201,7 @@ def mmseqs_search_pair(
197201
# fmt: off
198202
# @formatter:off
199203
search_param = ["--num-iterations", "3", "--db-load-mode", str(db_load_mode), "-a", "-e", "0.1", "--max-seqs", "10000",]
204+
search_param += ["--prefilter-mode", str(prefilter_mode)]
200205
if s is not None:
201206
search_param += ["-s", "{:.1f}".format(s)]
202207
else:
@@ -239,6 +244,13 @@ def main():
239244
parser.add_argument(
240245
"base", type=Path, help="Directory for the results (and intermediate files)"
241246
)
247+
parser.add_argument(
248+
"--prefilter-mode",
249+
type=int,
250+
default=0,
251+
choices=[0, 1, 2],
252+
help="Prefiltering algorithm to use: 0: k-mer (high-mem), 1: ungapped (high-cpu), 2: exhaustive (no prefilter, very slow). See wiki for more details: https://github.com/sokrypton/ColabFold/wiki#colabfold_search",
253+
)
242254
parser.add_argument(
243255
"-s",
244256
type=float,
@@ -388,6 +400,7 @@ def main():
388400
diff=args.diff,
389401
qsc=args.qsc,
390402
max_accept=args.max_accept,
403+
prefilter_mode=args.prefilter_mode,
391404
s=args.s,
392405
db_load_mode=args.db_load_mode,
393406
threads=args.threads,
@@ -398,6 +411,7 @@ def main():
398411
dbbase=args.dbbase,
399412
base=args.base,
400413
uniref_db=args.db1,
414+
prefilter_mode=args.prefilter_mode,
401415
s=args.s,
402416
db_load_mode=args.db_load_mode,
403417
threads=args.threads,

0 commit comments

Comments
 (0)