Skip to content

Commit 0441fd3

Browse files
committed
Switching to using Postgres DB
1 parent 37375c7 commit 0441fd3

8 files changed

Lines changed: 203 additions & 46 deletions

File tree

.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@ BRAIN_CREDENTIAL_PASSWORD=
33

44
# Only google api key is used at the moment
55
GOOGLE_API_KEY=
6-
# OPENAI_API_KEY=
6+
OPENAI_API_KEY=
7+
8+
# Database URL
9+
DATABASE_URL=

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,4 @@ brain/tools/data/delay_0.csv
135135
brain/tools/data/delay_1.csv
136136
brain/tools/data/TOP*_delay_*.csv
137137
alphas_database.db
138+
migrations/schema.sql

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,19 @@ For development of code and commiting into git repository, run following command
3737
```bash
3838
pre-commit install
3939
```
40+
41+
## Databse Migrations
42+
43+
1. Set `DATABASE_URL` in `.env` file
44+
2. Create new migration:
45+
46+
```bash
47+
./db_migrate.sh new my_migration_name
48+
```
49+
50+
2. Update migrations file
51+
3. Execute migrations
52+
53+
```bash
54+
./db_migrate.sh up
55+
```

brain/agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ def prompt(
3838
6. Always try uncommon data fields first before using close, volumn, open, close, etc.
3939
7. Propose the alpha and run the alpha simulation.
4040
41-
- Experiment with different operators
4241
- Try applying different functions and changing parameter values.
4342
- For d parameter use one of the following values: 2, 5, 10, 20, 30, 50, 100, 120, 250
43+
- Create simple alphas that are easy to understand and explain
4444
- Start by combining two data fields and add more if alpha is good.
4545
- Apply vec_avg(x) or vec_sum(x) if data field is a type VECTOR.
4646
- Change alpha completely if you think it is not good enough.

brain/database.py

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,45 @@
11
"""Database module for storing and managing alphas."""
22

3-
import sqlite3
4-
from pathlib import Path
3+
import os
54

65
import numpy as np
6+
import psycopg
7+
from dotenv import load_dotenv
8+
from psycopg.rows import dict_row
79

810
from brain.alpha_class import Alpha
911

10-
DB_PATH = Path(__file__).parent.parent / "alphas_database.db"
12+
load_dotenv()
1113

12-
13-
sqlite3.register_adapter(np.int64, lambda x: int(x))
14+
psycopg.adapters.register_dumper(np.int64, psycopg.types.numeric.IntDumper)
1415

1516

1617
class Database:
17-
def __init__(self, db_path=DB_PATH):
18-
self.conn = sqlite3.connect(db_path)
19-
self.conn.row_factory = sqlite3.Row
18+
def __init__(self, db_url: str = None):
19+
"""Initialize the database connection."""
20+
self.db_url = db_url or os.environ.get("DATABASE_URL")
21+
self.conn = psycopg.connect(self.db_url, row_factory=dict_row)
22+
self.conn.set_autocommit(True)
2023
self.cursor = self.conn.cursor()
21-
self._create_table()
22-
23-
def _create_table(self):
24-
schema = """
25-
CREATE TABLE IF NOT EXISTS alphas (
26-
alpha_id TEXT PRIMARY KEY,
27-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
28-
regular TEXT NOT NULL,
29-
region TEXT NOT NULL,
30-
universe TEXT NOT NULL,
31-
decay INTEGER NOT NULL,
32-
delay INTEGER NOT NULL,
33-
truncation REAL NOT NULL,
34-
neutralization TEXT NOT NULL,
35-
pasteurization TEXT NOT NULL,
36-
nan_handling TEXT NOT NULL,
37-
unit_handling TEXT NOT NULL,
38-
fitness REAL,
39-
sharpe REAL,
40-
returns REAL,
41-
drawdown REAL,
42-
turnover REAL,
43-
margin REAL,
44-
long_count INTEGER,
45-
short_count INTEGER,
46-
self_correlation REAL,
47-
failing_tests TEXT
48-
);
49-
"""
50-
self.cursor.execute(schema)
51-
self.conn.commit()
5224

5325
def insert_alpha(self, alpha: Alpha) -> int:
5426
"""Insert a Alpha class instance into the table."""
5527
record = alpha.as_dict()
5628
cols = ", ".join(record)
57-
bangs = ", ".join("?" for _ in record)
58-
29+
bangs = ", ".join("%s" for _ in record)
5930
sql = f"INSERT INTO alphas ({cols}) VALUES ({bangs})"
6031
self.cursor.execute(sql, tuple(record.values()))
61-
self.conn.commit()
6232

6333
def find_by_code(self, code: str, neutralization: str, delay: int) -> list[Alpha]:
6434
"""Find an alpha by its code."""
6535
sql = (
66-
"SELECT * FROM alphas WHERE regular = ? AND neutralization = ? AND delay = ? "
36+
"SELECT * FROM alphas WHERE regular = %s AND neutralization = %s AND delay = %s "
6737
"ORDER BY created_at DESC"
6838
)
69-
rows = self.cursor.execute(sql, (code, neutralization, delay))
39+
self.cursor.execute(sql, (code, neutralization, delay))
40+
rows = self.cursor.fetchall()
7041
return [Alpha.from_row(r) for r in rows]
7142

7243
def close(self):
44+
self.cursor.close()
7345
self.conn.close()

db_migrate.sh

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/bin/bash
2+
# Runs migrations on PostgreSQL
3+
# Based on:
4+
# https://github.com/Schniz/migrate.sh/edit/master/migrate.sh
5+
6+
# Load environment variables from .env file
7+
set -a && source .env && set +a
8+
9+
set -e
10+
11+
function make_new_migration() {
12+
FILE_DESC=$(echo $@ | sed -E "s/[[:space:]]+/_/g" | awk '{print tolower($0)}')
13+
FILE_PATH=migrations/$(get_timestamp)_${FILE_DESC}.sql
14+
mkdir -p migrations
15+
touch $FILE_PATH
16+
echo " --> touched $FILE_PATH"
17+
}
18+
19+
function verify_database_url() {
20+
if [ "$DATABASE_URL" == "" ]; then
21+
echo " --> ERROR: variable DATABASE_URL is not defined"
22+
exit 1
23+
fi
24+
}
25+
26+
function pending_migrations() {
27+
UP_MIGRATIONS=$(bash -c 'psql $DATABASE_URL -t -c "select filename from migrations" | sed "s@[[:space:]]@@g" | grep . | cat' 2> /dev/null)
28+
ALL_MIGRATIONS=$(bash -c 'cd migrations && ls *.sql')
29+
STRINGIFIED_MIGRATIONS=" ${ALL_MIGRATIONS[*]} "
30+
for item in ${UP_MIGRATIONS[@]}; do
31+
STRINGIFIED_MIGRATIONS=${STRINGIFIED_MIGRATIONS/${item}/ }
32+
done
33+
MIGRATIONS_TO_RUN=( $STRINGIFIED_MIGRATIONS )
34+
for item in ${MIGRATIONS_TO_RUN[@]}; do
35+
echo $item
36+
done
37+
}
38+
39+
function run_migrations() {
40+
PENDING_MIGRATIONS=$(pending_migrations)
41+
if [ "$PENDING_MIGRATIONS" == "" ]; then
42+
echo " --> Nothing to migrate!"
43+
else
44+
echo "PENDING MIGRATIONS:"
45+
echo ${PENDING_MIGRATIONS[*]}
46+
echo "==================="
47+
for PENDING_MIGRATION in $PENDING_MIGRATIONS; do
48+
CONTENTS=$(cat migrations/$PENDING_MIGRATION)
49+
CONTENTS_WITH_MIGRATION_RESULT="
50+
begin;
51+
$CONTENTS;
52+
insert into migrations (filename) values ('$(basename $PENDING_MIGRATION)');
53+
commit;
54+
"
55+
echo " --> Running $PENDING_MIGRATION"
56+
cat <<< "$CONTENTS_WITH_MIGRATION_RESULT"
57+
psql $DATABASE_URL -v ON_ERROR_STOP=1 <<< $CONTENTS_WITH_MIGRATION_RESULT
58+
done
59+
fi
60+
# dump_schema
61+
}
62+
63+
function dump_schema() {
64+
mkdir -p migrations
65+
66+
pg_dump \
67+
--schema-only \
68+
$DATABASE_URL \
69+
> migrations/schema.sql.tmp
70+
pg_dump \
71+
--data-only \
72+
-t migrations \
73+
$DATABASE_URL \
74+
>> migrations/schema.sql.tmp
75+
76+
cat migrations/schema.sql.tmp | grep -v -e "---\?\( .\+\)\$" | grep -v -e "^--\$" | grep -v -e "^\$" > migrations/schema.sql
77+
rm migrations/schema.sql.tmp
78+
79+
echo " --> Wrote migrations/schema.sql"
80+
}
81+
82+
function reset_db() {
83+
psql $DATABASE_URL <<< "
84+
DROP SCHEMA public CASCADE;
85+
CREATE SCHEMA public;
86+
GRANT ALL ON SCHEMA public TO postgres;
87+
GRANT ALL ON SCHEMA public TO public;
88+
CREATE TABLE IF NOT EXISTS migrations (
89+
filename TEXT PRIMARY KEY,
90+
ran_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
91+
);
92+
"
93+
}
94+
95+
function show_help() {
96+
echo "Usage:"
97+
echo "------"
98+
echo ""
99+
echo "$0 new add_something_table - create a new migration file"
100+
echo "$0 schema:dump - dump db schema"
101+
echo "$0 schema:load - load db schema"
102+
echo "$0 up - run migrations"
103+
echo "$0 danger:reset - resets the database state"
104+
echo "$0 help - show this message"
105+
}
106+
107+
function get_timestamp() {
108+
date +%Y%m%d%H%M%S
109+
}
110+
111+
function main_migrate() {
112+
ACTION=${1-:"help"}
113+
shift
114+
case $ACTION in
115+
new)
116+
make_new_migration $@
117+
;;
118+
schema:dump)
119+
verify_database_url
120+
dump_schema $@
121+
;;
122+
schema:load)
123+
verify_database_url
124+
psql $DATABASE_URL -f migrations/schema.sql
125+
;;
126+
danger:reset)
127+
verify_database_url
128+
reset_db
129+
;;
130+
up)
131+
verify_database_url
132+
run_migrations $@
133+
;;
134+
*)
135+
show_help
136+
exit 1
137+
esac
138+
}
139+
140+
main_migrate $@

migrations/20250527124436_init.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CREATE TABLE IF NOT EXISTS alphas (
2+
alpha_id TEXT PRIMARY KEY,
3+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
4+
regular TEXT NOT NULL,
5+
region TEXT NOT NULL,
6+
universe TEXT NOT NULL,
7+
decay INTEGER NOT NULL,
8+
delay INTEGER NOT NULL,
9+
truncation REAL NOT NULL,
10+
neutralization TEXT NOT NULL,
11+
pasteurization TEXT NOT NULL,
12+
nan_handling TEXT NOT NULL,
13+
unit_handling TEXT NOT NULL,
14+
fitness REAL,
15+
sharpe REAL,
16+
returns REAL,
17+
drawdown REAL,
18+
turnover REAL,
19+
margin REAL,
20+
long_count INTEGER,
21+
short_count INTEGER,
22+
self_correlation REAL,
23+
failing_tests TEXT
24+
);

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ langchain[google-genai,openai]
22
langgraph
33
langmem
44
pandas
5+
psycopg[binary]
56
python-dotenv
67
requests
78
tqdm

0 commit comments

Comments
 (0)