Skip to content

Commit de8ad33

Browse files
authored
Merge pull request #2 from rundef/feat/indexes_support
Benchmarks and performance improvements
2 parents 73b7682 + 7ec5ee7 commit de8ad33

File tree

11 files changed

+275
-38
lines changed

11 files changed

+275
-38
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.0
2+
current_version = 0.3.1
33
commit = True
44
tag = True
55

benchmark.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from sqlalchemy import create_engine, Column, Integer, String, Boolean, select, Index, update, delete
2+
from sqlalchemy.orm import declarative_base, sessionmaker
3+
from sqlalchemy_memory import MemorySession
4+
import argparse
5+
import time
6+
import random
7+
from faker import Faker
8+
9+
try:
10+
from sqlalchemy_memory import create_memory_engine
11+
except ImportError:
12+
create_memory_engine = None
13+
14+
Base = declarative_base()
15+
fake = Faker()
16+
CATEGORIES = list("ABCDEFGHIJK")
17+
18+
class Item(Base):
19+
__tablename__ = "items"
20+
21+
id = Column(Integer, primary_key=True)
22+
name = Column(String)
23+
active = Column(Boolean, index=True)
24+
category = Column(String, index=True)
25+
26+
def generate_items(n):
27+
for _ in range(n):
28+
yield Item(
29+
name=fake.name(),
30+
active=random.choice([True, False]),
31+
category=random.choice(CATEGORIES)
32+
)
33+
34+
def generate_random_select_query():
35+
clauses = []
36+
if random.random() < 0.5:
37+
clauses.append(Item.active == random.choice([True, False]))
38+
if random.random() < 0.5 or not clauses:
39+
subset = random.sample(CATEGORIES, random.randint(1, 4))
40+
clauses.append(Item.category.in_(subset))
41+
return select(Item).where(*clauses)
42+
43+
def inserts(Session, count):
44+
insert_start = time.time()
45+
with Session() as session:
46+
session.add_all(generate_items(count))
47+
session.commit()
48+
insert_duration = time.time() - insert_start
49+
print(f"Inserted {count} items in {insert_duration:.2f} seconds.")
50+
return insert_duration
51+
52+
def selects(Session, count):
53+
queries = [generate_random_select_query() for _ in range(count)]
54+
55+
query_start = time.time()
56+
with Session() as session:
57+
for stmt in queries:
58+
list(session.execute(stmt).scalars())
59+
query_duration = time.time() - query_start
60+
print(f"Executed {count} select queries in {query_duration:.2f} seconds.")
61+
return query_duration
62+
63+
def updates(Session, random_ids):
64+
update_start = time.time()
65+
with Session() as session:
66+
for rid in random_ids:
67+
stmt = update(Item).where(Item.id == rid).values(
68+
name=fake.name(),
69+
category=random.choice(CATEGORIES),
70+
active=random.choice([True, False])
71+
)
72+
session.execute(stmt)
73+
session.commit()
74+
update_duration = time.time() - update_start
75+
print(f"Executed {len(random_ids)} updates in {update_duration:.2f} seconds.")
76+
return update_duration
77+
78+
def deletes(Session, random_ids):
79+
delete_start = time.time()
80+
with Session() as session:
81+
for rid in random_ids:
82+
stmt = delete(Item).where(Item.id == rid)
83+
session.execute(stmt)
84+
session.commit()
85+
delete_duration = time.time() - delete_start
86+
print(f"Deleted {len(random_ids)} items in {delete_duration:.2f} seconds.")
87+
return delete_duration
88+
89+
def run_benchmark(db_type="sqlite", count=100_000):
90+
print(f"Running benchmark: type={db_type}, count={count}")
91+
92+
if db_type == "sqlite":
93+
engine = create_engine("sqlite:///:memory:", echo=False)
94+
Session = sessionmaker(engine)
95+
elif db_type == "memory":
96+
engine = create_engine("memory://")
97+
Session = sessionmaker(
98+
engine,
99+
class_=MemorySession,
100+
expire_on_commit=False,
101+
)
102+
else:
103+
raise ValueError("Invalid --type. Use 'sqlite' or 'memory'.")
104+
105+
Base.metadata.create_all(engine)
106+
107+
elapsed = inserts(Session, count)
108+
elapsed += selects(Session, 500)
109+
110+
random_ids = random.sample(range(1, count + 1), 500)
111+
elapsed += updates(Session, random_ids)
112+
113+
random_ids = random.sample(range(1, count + 1), 500)
114+
elapsed += deletes(Session, random_ids)
115+
116+
print(f"Total runtime for {db_type}: {elapsed:.2f} seconds.")
117+
118+
119+
120+
if __name__ == "__main__":
121+
parser = argparse.ArgumentParser()
122+
parser.add_argument("--type", choices=["sqlite", "memory"], required=True)
123+
parser.add_argument("--count", type=int, default=10_000)
124+
args = parser.parse_args()
125+
run_benchmark(args.type, args.count)

docs/benchmarks.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
Benchmark Comparison (20,000 items)
2+
===================================
3+
4+
This benchmark compares `sqlalchemy-memory` to `in-memory SQLite` using 20,000 inserted items and a series of 500 queries, updates, and deletions.
5+
6+
As the results show, `sqlalchemy-memory` **excels in read-heavy workloads**, delivering significantly faster query performance. While SQLite performs slightly better on update and delete operations, the overall runtime of `sqlalchemy-memory` remains substantially lower, making it a strong choice for prototyping and simulation.
7+
8+
.. list-table::
9+
:header-rows: 1
10+
:widths: 25 25 25
11+
12+
* - Operation
13+
- SQLite (in-memory)
14+
- sqlalchemy-memory
15+
* - Insert
16+
- 3.17 sec
17+
- 2.70 sec
18+
* - 500 Select Queries
19+
- 26.37 sec
20+
- 2.94 sec
21+
* - 500 Updates
22+
- 0.26 sec
23+
- 1.12 sec
24+
* - 500 Deletes
25+
- 0.09 sec
26+
- 0.90 sec
27+
* - **Total Runtime**
28+
- **29.89 sec**
29+
- **7.66 sec**

docs/index.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,5 @@ Quickstart: async example
112112
query
113113
update
114114
delete
115-
commit_rollback
115+
commit_rollback
116+
benchmarks

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "sqlalchemy-memory"
7-
version = "0.3.0"
7+
version = "0.3.1"
88
dependencies = [
99
"sqlalchemy>=2.0,<3.0",
1010
"sortedcontainers>=2.4.0"

sqlalchemy_memory/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
"AsyncMemorySession",
77
]
88

9-
__version__ = '0.3.0'
9+
__version__ = '0.3.1'

sqlalchemy_memory/base/indexes.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
from typing import Any, List
44
from sqlalchemy.sql import operators
55

6+
from ..helpers.ordered_set import OrderedSet
7+
68

79
class IndexManager:
10+
__slots__ = ('hash_index', 'range_index', 'table_indexes', 'columns_mapping', )
11+
812
def __init__(self):
913
self.hash_index = HashIndex()
1014
self.range_index = RangeIndex()
1115

1216
self.table_indexes = {}
1317
self.columns_mapping = {}
1418

19+
1520
def get_indexes(self, obj):
1621
"""
1722
Retrieve index from object's table as dict: indexname => list of column name
@@ -21,18 +26,27 @@ def get_indexes(self, obj):
2126
if tablename not in self.table_indexes:
2227
self.table_indexes[tablename] = {}
2328

29+
pk_col_name = obj.__table__.primary_key.columns[0].name
30+
2431
for index in obj.__table__.indexes:
2532
if len(index.expressions) > 1:
2633
# Ignoring compound indexes for now ...
2734
continue
2835

36+
if index.name == pk_col_name:
37+
pk_col_name = None
38+
2939
self.table_indexes[tablename][index.name] = [
3040
col.name
3141
for col in index.expressions
3242
]
3343

44+
if pk_col_name:
45+
self.table_indexes[tablename][pk_col_name] = [pk_col_name]
46+
3447
return self.table_indexes[tablename]
3548

49+
3650
def _column_to_index(self, tablename, colname):
3751
"""
3852
Get index name from tablename & column name
@@ -51,6 +65,7 @@ def _column_to_index(self, tablename, colname):
5165

5266
return self.columns_mapping[tablename][colname]
5367

68+
5469
def _get_index_key(self, obj, columns):
5570
if len(columns) == 1:
5671
return getattr(obj, columns[0])
@@ -65,7 +80,7 @@ def on_insert(self, obj):
6580

6681
self.hash_index.add(tablename, indexname, value, obj)
6782
self.range_index.add(tablename, indexname, value, obj)
68-
83+
6984
def on_delete(self, obj):
7085
tablename = obj.__tablename__
7186
indexes = self.get_indexes(obj)
@@ -145,6 +160,7 @@ def query(self, collection, tablename, colname, operator, value):
145160
in_range = self.range_index.query(tablename, indexname, gte=value[0], lte=value[1])
146161
return list(set(collection) - set(in_range))
147162

163+
148164
def get_selectivity(self, tablename, colname, operator, value, total_count):
149165
"""
150166
Estimate selectivity: higher means worst filtering.
@@ -187,23 +203,24 @@ class HashIndex:
187203
Maintains insertion order of objects.
188204
"""
189205

206+
__slots__ = ('index',)
207+
190208
def __init__(self):
191-
self.index = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
209+
self.index = defaultdict(lambda: defaultdict(lambda: defaultdict(OrderedSet)))
210+
192211

193212
def add(self, tablename: str, indexname: str, value: Any, obj: Any):
194-
self.index[tablename][indexname][value].append(obj)
213+
self.index[tablename][indexname][value].add(obj)
214+
195215

196216
def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
197-
lst = self.index[tablename][indexname][value]
198-
try:
199-
lst.remove(obj)
200-
if not lst:
201-
del self.index[tablename][indexname][value]
202-
except ValueError:
203-
pass
217+
s = self.index[tablename][indexname][value]
218+
s.discard(obj)
219+
if not s:
220+
del self.index[tablename][indexname][value]
204221

205222
def query(self, tablename: str, indexname: str, value: Any) -> List[Any]:
206-
return self.index[tablename][indexname].get(value, [])
223+
return list(self.index[tablename][indexname].get(value, []))
207224

208225

209226
class RangeIndex:
@@ -215,12 +232,19 @@ class RangeIndex:
215232
index[tablename][indexname] = SortedDict { value: [obj1, obj2, ...] }
216233
"""
217234

235+
__slots__ = ('index',)
236+
218237
def __init__(self):
219238
self.index = defaultdict(lambda: defaultdict(SortedDict))
220239

221240
def add(self, tablename: str, indexname: str, value: Any, obj: Any):
222-
self.index[tablename][indexname].setdefault(value, []).append(obj)
241+
index = self.index[tablename][indexname]
242+
if value in index:
243+
index[value].append(obj)
244+
else:
245+
index[value] = [obj]
223246

247+
224248
def remove(self, tablename: str, indexname: str, value: Any, obj: Any):
225249
col = self.index[tablename][indexname]
226250
if value in col:

sqlalchemy_memory/base/pending_changes.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ def rollback(self):
2020

2121
def add(self, obj, **kwargs):
2222
tablename = obj.__tablename__
23-
if not any(id(x) == id(obj) for x in self._to_add[tablename]):
24-
self._to_add[tablename].append(obj)
23+
self._to_add[tablename].append(obj)
2524

2625
def delete(self, obj):
2726
tablename = obj.__tablename__

sqlalchemy_memory/base/session.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ def __init__(self, *args, **kwargs):
2222
def add(self, obj, **kwargs):
2323
self.pending_changes.add(obj, **kwargs)
2424

25+
def add_all(self, instances, **kwargs):
26+
for instance in instances:
27+
self.add(instance, **kwargs)
28+
2529
def delete(self, obj):
2630
self.pending_changes.delete(obj)
2731

@@ -159,7 +163,7 @@ def _handle_update(self, statement: Update, **kwargs):
159163
pk_col_name = None
160164
for obj in collection:
161165
if pk_col_name is None:
162-
pk_col_name = self.store._get_primary_key_name(obj)
166+
pk_col_name = self.store._get_primary_key_name(obj.__table__)
163167

164168
pk_value = getattr(obj, pk_col_name)
165169
self.update(tablename, pk_value, data)
@@ -188,7 +192,7 @@ def merge(self, instance, **kwargs):
188192
Merge a possibly detached instance into the current session
189193
"""
190194

191-
pk_name = self.store._get_primary_key_name(instance)
195+
pk_name = self.store._get_primary_key_name(instance.__table__)
192196
pk_value = getattr(instance, pk_name)
193197
existing = self.store.get_by_primary_key(instance, pk_value)
194198

0 commit comments

Comments
 (0)