Skip to content

Commit c3d2de9

Browse files
committed
0.2 version
1 parent cbde8c2 commit c3d2de9

10 files changed

Lines changed: 156 additions & 13 deletions

File tree

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cff-version: 1.2.0
22
message: "If you use this software, please cite it using the metadata from this file."
33
title: "PROBEst"
4-
version: "0.1.4"
4+
version: "0.2.0"
55
doi: "10.20944/preprints202511.2140.v1"
66
date-released: "2025-11-01"
77
authors:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ python setup.py install
2323
## Dependencies
2424
```bash
2525
conda install bioconda::primer3
26-
conda install bioconda::blast
26+
conda install bioconda::blast==2.16.0
2727
```
2828

2929
### Validate installation

app/templates/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
<div class="logo-title">
1515
<img src="{{ url_for('static', filename='ctlab_probest_white.png') }}" alt="PROBESt" style="width: 30vw;">
1616
<div>
17-
<h1>PROBESt v0.1.4</h1>
17+
<h1>PROBESt v0.2.0</h1>
1818
<p class="subtitle">ITMO probe generation and optimization tool</p>
1919
</div>
2020
</div>

pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def merge_iter(iter: int):
5151
"/scripts/generator/"
5252

5353
# 1. Initial set generation ----
54-
print("\n---- PROBESt v.0.1.4 ----\n")
54+
print("\n---- PROBESt v.0.2.0 ----\n")
5555
print("Arguments passed")
5656

5757
# Create TMP

scripts/databases/generate_noisy_probes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,10 @@ def main():
183183
parser = argparse.ArgumentParser(description='Generate noisy probe data')
184184
parser.add_argument('--input', required=True, help='Input probeBase CSV file')
185185
parser.add_argument('--output', type=str, default='data/databases/open/probeBase_false.csv', help='Output noisy probeBase CSV file')
186-
parser.add_argument('--mutation-number', type=int, default=5, help='Maximum number of mutations to apply to each probe')
187-
parser.add_argument('--insertion-rate', type=float, default=0.01, help='Insertion mutation rate')
188-
parser.add_argument('--deletion-rate', type=float, default=0.01, help='Deletion mutation rate')
189-
parser.add_argument('--mutation-rate', type=float, default=0.1, help='SNP mutation rate')
186+
parser.add_argument('--mutation-number', type=int, default=20, help='Maximum number of mutations to apply to each probe')
187+
parser.add_argument('--insertion-rate', type=float, default=0.1, help='Insertion mutation rate')
188+
parser.add_argument('--deletion-rate', type=float, default=0.1, help='Deletion mutation rate')
189+
parser.add_argument('--mutation-rate', type=float, default=0.2, help='SNP mutation rate')
190190
parser.add_argument('--iterations', type=int, default=10, help='Number of iterations to generate noisy data')
191191

192192
args = parser.parse_args()

scripts/generator/ML_filtration.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44
from sklearn.model_selection import train_test_split
55
from PROBESt.AI import LogisticRegressionModel, PerceptronModel, DeepNeuralNetworkModel
66
from PROBESt.filtration import train_filtration_AI, validate_filtration_AI, apply_filtration_AI
7+
from models_registry import ShallowNet, WideNet, ResidualNet, GAILDiscriminator, TabTransformer
8+
from PROBESt.AI import TorchClassifier
9+
from PROBESt.filtration import train_filtration_AI, validate_filtration_AI
10+
11+
MODELS = {
12+
"ShallowNet": lambda n: TorchClassifier(ShallowNet(n), weight_pos=5),
13+
"WideNet": lambda n: TorchClassifier(WideNet(n), weight_pos=5),
14+
"ResidualNet": lambda n: TorchClassifier(ResidualNet(n), weight_pos=5),
15+
"GAIL": lambda n: TorchClassifier(GAILDiscriminator(n), weight_pos=5),
16+
"TabTransformer": lambda n: TorchClassifier(TabTransformer(n), weight_pos=5),
17+
}
718

819
def main():
920
# Load data

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33

44
setup(
55
name='PROBESt',
6-
version='0.1.4',
6+
version='0.2.0',
77
packages=find_packages(where='src'),
88
package_dir={'': 'src'},
9-
python_requires='>=3.10',
9+
python_requires='>=3.12',
1010
author='CTLab',
1111
author_email='dvsmutin@itmo.ru',
1212
description='PROBESt: package for nucleotide probes generation',
@@ -17,7 +17,7 @@
1717
'Development Status :: 3 - Alpha',
1818
'Intended Audience :: Science/Research',
1919
'License :: OSI Approved :: MIT License',
20-
'Programming Language :: Python :: 3.10',
20+
'Programming Language :: Python :: 3.12',
2121
'Topic :: Scientific/Engineering :: Bio-Informatics',
2222
],
2323
entry_points={

src/PROBESt/AI.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,51 @@ def __init__(self, input_size: int, dropout_rate: float = 0.3):
125125

126126
def forward(self, x):
127127
return self.network(x)
128+
class TorchClassifier(BaseAIModel):
129+
def __init__(self, model: nn.Module, learning_rate=0.001, weight_pos=1.0):
130+
super().__init__()
131+
self.model = model
132+
self.learning_rate = learning_rate
133+
134+
pos_weight = torch.tensor([weight_pos])
135+
self.criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
136+
137+
self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate)
138+
139+
def train(self, X, y, epochs=100, batch_size=32):
140+
X_scaled = self.preprocess_data(X)
141+
X_tensor = torch.FloatTensor(X_scaled)
142+
y_tensor = torch.FloatTensor(y.values).reshape(-1, 1)
143+
144+
dataset = torch.utils.data.TensorDataset(X_tensor, y_tensor)
145+
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
146+
147+
for e in range(epochs):
148+
total_loss = 0
149+
for bx, by in loader:
150+
self.optimizer.zero_grad()
151+
logits = self.model(bx)
152+
loss = self.criterion(logits, by)
153+
loss.backward()
154+
self.optimizer.step()
155+
total_loss += loss.item()
156+
157+
if e % 20 == 0:
158+
print(f"Epoch {e}: loss = {total_loss:.4f}")
159+
160+
def predict(self, X):
161+
X_scaled = self.scaler.transform(X)
162+
X_tensor = torch.FloatTensor(X_scaled)
163+
with torch.no_grad():
164+
preds = torch.sigmoid(self.model(X_tensor)).numpy()
165+
return (preds > 0.5).astype(int)
166+
167+
def predict_proba(self, X):
168+
X_scaled = self.scaler.transform(X)
169+
X_tensor = torch.FloatTensor(X_scaled)
170+
with torch.no_grad():
171+
preds = torch.sigmoid(self.model(X_tensor)).numpy()
172+
return preds
128173

129174
class DeepNeuralNetworkModel(BaseAIModel):
130175
def __init__(self, input_size: int, learning_rate: float = 0.001, dropout_rate: float = 0.3):
@@ -202,4 +247,4 @@ def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
202247
self.model.eval()
203248
with torch.no_grad():
204249
predictions = self.model(X_tensor)
205-
return predictions.numpy()
250+
return predictions.numpy()

src/PROBESt/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@
77
from . import merge
88
from . import misc
99
from . import args
10-
from . import bash_wrappers
10+
from . import bash_wrappers
11+
from . import models_registry
12+
from . import AI
13+
from . import filtration

src/PROBESt/models_registry.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import torch
2+
import torch.nn as nn
3+
from torch.nn import functional as F
4+
5+
# ---------- Shallow small net ----------
6+
class ShallowNet(nn.Module):
7+
def __init__(self, input_size):
8+
super().__init__()
9+
self.net = nn.Sequential(
10+
nn.Linear(input_size, 32),
11+
nn.ReLU(),
12+
nn.Linear(32, 1),
13+
nn.Sigmoid()
14+
)
15+
def forward(self, x):
16+
return self.net(x)
17+
18+
# ---------- Wide fully-connected net ----------
19+
class WideNet(nn.Module):
20+
def __init__(self, input_size):
21+
super().__init__()
22+
self.net = nn.Sequential(
23+
nn.Linear(input_size, 256),
24+
nn.ReLU(),
25+
nn.Linear(256, 256),
26+
nn.ReLU(),
27+
nn.Linear(256, 1),
28+
nn.Sigmoid()
29+
)
30+
def forward(self, x):
31+
return self.net(x)
32+
33+
# ---------- Residual MLP ----------
34+
class ResidualBlock(nn.Module):
35+
def __init__(self, width):
36+
super().__init__()
37+
self.fc = nn.Linear(width, width)
38+
self.bn = nn.BatchNorm1d(width)
39+
def forward(self, x):
40+
return F.relu(self.bn(self.fc(x)) + x)
41+
42+
class ResidualNet(nn.Module):
43+
def __init__(self, input_size):
44+
super().__init__()
45+
self.input_layer = nn.Linear(input_size, 128)
46+
self.block1 = ResidualBlock(128)
47+
self.block2 = ResidualBlock(128)
48+
self.output = nn.Linear(128, 1)
49+
def forward(self, x):
50+
x = F.relu(self.input_layer(x))
51+
x = self.block1(x)
52+
x = self.block2(x)
53+
return torch.sigmoid(self.output(x))
54+
55+
# ---------- GAIL-style discriminator ----------
56+
class GAILDiscriminator(nn.Module):
57+
def __init__(self, input_size):
58+
super().__init__()
59+
self.net = nn.Sequential(
60+
nn.Linear(input_size, 256),
61+
nn.LeakyReLU(0.2),
62+
nn.Linear(256, 128),
63+
nn.LeakyReLU(0.2),
64+
nn.Linear(128, 1),
65+
nn.Sigmoid()
66+
)
67+
def forward(self, x):
68+
return self.net(x)
69+
70+
# ---------- TabTransformer ----------
71+
class TabTransformer(nn.Module):
72+
def __init__(self, input_size, n_heads=4, depth=3):
73+
super().__init__()
74+
self.embedding = nn.Linear(input_size, 64)
75+
encoder_layer = nn.TransformerEncoderLayer(
76+
d_model=64, nhead=n_heads, batch_first=True
77+
)
78+
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
79+
self.fc_out = nn.Linear(64, 1)
80+
81+
def forward(self, x):
82+
x = self.embedding(x).unsqueeze(1)
83+
x = self.transformer(x)
84+
return torch.sigmoid(self.fc_out(x[:, 0]))

0 commit comments

Comments
 (0)