Skip to content

Commit 63e3834

Browse files
authored
Merge pull request #1 from davidteather/v0.2.0-add-proxy-picking-algorithm-support
V0.2.0 - Add Algos for picking proxies, Add library support for formatting proxies (and formatter)
2 parents ecd99f1 + 93d1afb commit 63e3834

26 files changed

Lines changed: 1568 additions & 68 deletions

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ jobs:
3232
- name: Type check with mypy
3333
run: poetry run mypy proxyproviders
3434

35-
- name: Run unit tests with coverage
35+
- name: Run unit tests with coverage (excluding integration tests)
3636
run: |
37-
poetry run coverage run -m pytest
37+
poetry run coverage run -m pytest tests/ --ignore=tests/integration -v
3838
poetry run coverage report --fail-under=95
3939
poetry run coverage xml
4040

.pre-commit-config.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
repos:
2+
# Black code formatter
3+
- repo: https://github.com/psf/black
4+
rev: 23.12.1
5+
hooks:
6+
- id: black
7+
language_version: python3
8+
args: [--line-length=88]
9+
10+
# isort import sorting
11+
- repo: https://github.com/pycqa/isort
12+
rev: 5.13.2
13+
hooks:
14+
- id: isort
15+
args: [--profile=black]
16+
17+
# flake8 linting
18+
- repo: https://github.com/pycqa/flake8
19+
rev: 7.0.0
20+
hooks:
21+
- id: flake8
22+
args: ["--max-line-length=88", "--extend-ignore=E203,W503,E501,F401,F541"]
23+
24+
- repo: local
25+
hooks:
26+
# Unit tests
27+
- id: pytest-unit
28+
name: Run unit tests
29+
entry: python3 -m pytest tests/ -v --ignore=tests/integration
30+
language: system
31+
pass_filenames: false
32+
always_run: true
33+
stages: [commit]
34+
35+
# Integration tests
36+
- id: pytest-integration
37+
name: Run integration tests
38+
entry: bash -c 'if [ "$RUN_INTEGRATION_TESTS" = "1" ]; then python3 -m pytest tests/integration/ -v; else echo "Skipping integration tests (env vars not set)"; fi'
39+
language: system
40+
pass_filenames: false
41+
always_run: true
42+
stages: [commit]
43+
44+
default_language_version:
45+
python: python3
46+
47+
fail_fast: true
48+
49+
default_stages: [commit]

.sphinx/conf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
# -- Project information -----------------------------------------------------
77
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
88

9-
import toml
109
import os
1110
import sys
1211

12+
import toml
13+
1314
sys.path.insert(0, os.path.abspath("../"))
1415
pyproject = toml.load("../pyproject.toml")
1516

CONTRIBUTING.md

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Contributing to ProxyProviders
2+
3+
This guide shows you how to add new proxy providers and test them properly.
4+
5+
## Quick Start
6+
7+
1. **Fork and clone**: `git clone https://github.com/your-username/proxyproviders.git`
8+
2. **Install**: `pip install -e . && pip install pre-commit && pre-commit install`
9+
3. **Test**: `python -m pytest tests/ --ignore=tests/integration -v`
10+
11+
## Development Setup
12+
13+
### Prerequisites
14+
15+
- Python 3.9 or higher
16+
- Git
17+
18+
## Running Tests
19+
20+
```bash
21+
# Unit tests (fast)
22+
python -m pytest tests/ --ignore=tests/integration -v
23+
24+
# Integration tests (need API keys set for each provider)
25+
export RUN_INTEGRATION_TESTS=1 WEBSHARE_API_KEY="your-key" BRIGHTDATA_API_KEY="your-key"
26+
python -m pytest tests/integration/ -v
27+
28+
# Check coverage (should be >95%)
29+
python -m coverage run -m pytest tests/ --ignore=tests/integration
30+
python -m coverage report --show-missing
31+
```
32+
33+
## Adding a New Proxy Provider
34+
35+
If you're adding a new provider that isn't already in the library, here's a short guide to help you get started.
36+
37+
### 1. Create the Provider
38+
39+
Consider looking at `proxyproviders/providers/webshare.py` as an example. Your implemented provider will need to implement the `ProxyProvider` from `proxyproviders/proxy_provider.py`.
40+
41+
Here's a small scaffold that might be useful, however it may be outdated so check the `ProxyProvider` object for the latest required methods.
42+
43+
```python
44+
class YourProvider(ProxyProvider):
45+
def __init__(self, api_key: str, config: Optional[ProxyConfig] = None):
46+
super().__init__(config)
47+
self.api_key = api_key
48+
49+
def _fetch_proxies(self) -> List[Proxy]:
50+
# Call your provider's API and return List[Proxy]
51+
pass
52+
53+
def _convert_to_proxy(self, data: Dict) -> Proxy:
54+
# Convert API response to Proxy object
55+
return Proxy(
56+
id=str(data["id"]),
57+
username=data["username"],
58+
password=data["password"],
59+
proxy_address=data["host"],
60+
port=int(data["port"]),
61+
# ... other fields
62+
)
63+
```
64+
65+
### 2. Add to Package
66+
67+
Update `proxyproviders/__init__.py`:
68+
```python
69+
from .providers.your_provider import YourProvider
70+
# Add "YourProvider" to __all__ list
71+
```
72+
73+
## Writing Tests
74+
75+
### 3. Unit Tests
76+
77+
Create `tests/providers/test_your_provider.py` - feel free tocopy from `test_webshare.py` and modify it:
78+
79+
**Required tests:**
80+
- Mock API success/error responses
81+
- Test algorithm integration (`get_proxy()` works)
82+
- Integration test with real API (if `YOUR_PROVIDER_API_KEY` env var set)
83+
84+
### 4. Integration Test
85+
86+
Create `tests/integration/test_your_provider_integration.py` - copy from existing integration tests.
87+
88+
## Submitting Changes
89+
90+
1. **Run tests**: `python -m pytest tests/ -v`
91+
2. **Check coverage**: `python -m coverage run -m pytest tests/ --ignore=tests/integration && python -m coverage report`
92+
3. **Pre-commit passes**: `pre-commit run --all-files`
93+
4. **Create PR** with:
94+
- What provider you're adding
95+
- How to get API keys
96+
- Test coverage proof
97+
98+
**Requirements for PR approval:**
99+
- Follows existing code patterns
100+
- \>95% test coverage
101+
- Unit + integration tests
102+
- Clear documentation

README.md

Lines changed: 89 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,24 @@ proxies = proxy_provider.list_proxies()
6262
print(proxies)
6363
```
6464

65+
For simple usage, you can get a proxy and use it immediately:
66+
```py
67+
from proxyproviders import Webshare
68+
import requests
69+
70+
provider = Webshare(api_key="your-api-key")
71+
72+
# Get a proxy (uses RoundRobin by default) and use it with requests
73+
from proxyproviders.models.proxy import ProxyFormat
74+
75+
proxy = provider.get_proxy()
76+
response = requests.get("https://httpbin.org/ip", proxies=proxy.format(ProxyFormat.REQUESTS))
77+
print(response.json())
78+
79+
# Or in one-line
80+
response = requests.get("https://httpbin.org/ip", proxies=provider.get_proxy().format(ProxyFormat.REQUESTS))
81+
```
82+
6583
Each provider has their own custom options, the `Webshare` class lets you specify url params according to their [api spec](https://apidocs.webshare.io/proxy-list/list#parameters), here's an example which will only return proxies that are based in the US.
6684

6785
```py
@@ -107,32 +125,86 @@ some_function(brightdata)
107125

108126
Here's a more meaningful example that takes the `Proxy` class and uses it to create a python requests http proxy.
109127

128+
#### Simple Usage
110129
```py
111-
from proxyproviders import Webshare, BrightData, ProxyProvider
130+
from proxyproviders import Webshare
131+
from proxyproviders.algorithms import Random, RoundRobin
132+
from proxyproviders.models.proxy import ProxyFormat
112133
import requests
113-
import os
114134

135+
provider = Webshare(api_key="your_api_key")
115136

116-
def request_with_proxy(provider: ProxyProvider):
117-
requests_proxy = None
137+
# Get proxy using default RoundRobin and make request
138+
proxy = provider.get_proxy()
139+
response = requests.get("https://httpbin.org/ip", proxies=proxy.format(ProxyFormat.REQUESTS))
118140

119-
if provider:
120-
proxies = provider.list_proxies()
141+
# Or in one-line
142+
response = requests.get("https://httpbin.org/ip", proxies=provider.get_proxy().format(ProxyFormat.REQUESTS))
143+
```
121144

122-
requests_proxy = {
123-
"http": f"http://{proxies[0].username}:{proxies[0].password}@{proxies[0].proxy_address}:{proxies[0].port}",
124-
"https": f"http://{proxies[0].username}:{proxies[0].password}@{proxies[0].proxy_address}:{proxies[0].port}",
125-
}
145+
#### Built-in Algorithms
146+
```py
147+
from proxyproviders import Webshare
148+
from proxyproviders.algorithms import Random, RoundRobin, First
126149

127-
r = requests.get("https://httpbin.org/ip", proxies=requests_proxy)
128-
return r.json()
150+
provider = Webshare(api_key="your_api_key")
129151

130-
webshare = Webshare(api_key="your_api_key")
131-
brightdata = BrightData(api_key="your_api_key", zone="your_zone")
152+
# Default: RoundRobin (cycles through proxies for load balancing)
153+
proxy = provider.get_proxy()
132154

133-
print(f"Your IP: {request_with_proxy(None)}")
134-
print(f"Webshare: {request_with_proxy(webshare)}")
135-
print(f"BrightData: {request_with_proxy(brightdata)}")
155+
# Random selection
156+
proxy = provider.get_proxy(Random())
157+
158+
# Always first proxy (deterministic)
159+
proxy = provider.get_proxy(First())
160+
161+
# Algorithm can maintain state when reused
162+
round_robin = RoundRobin()
163+
proxy1 = provider.get_proxy(round_robin)
164+
proxy2 = provider.get_proxy(round_robin) # Next in sequence
165+
```
166+
167+
#### Algorithm State Management
168+
```py
169+
from proxyproviders import Webshare
170+
from proxyproviders.algorithms import RoundRobin, Random
171+
172+
provider = Webshare(api_key="your_api_key")
173+
174+
# Create reusable algorithm for state management
175+
round_robin = RoundRobin() # Maintains state across calls
176+
random_algo = Random() # Stateless but reusable
177+
178+
# Each call to round_robin will cycle to next proxy
179+
for i in range(3):
180+
proxy = provider.get_proxy(round_robin)
181+
print(f"RoundRobin {i}: {proxy.proxy_address}")
182+
183+
# Provider also maintains its own default RoundRobin state when not specified
184+
for i in range(3):
185+
proxy = provider.get_proxy() # Uses provider's default RoundRobin
186+
print(f"Default {i}: {proxy.proxy_address}")
187+
```
188+
189+
#### Custom Algorithms
190+
```py
191+
from proxyproviders.algorithms import Algorithm
192+
from typing import List
193+
from proxyproviders.models.proxy import Proxy
194+
195+
class USProxyAlgorithm(Algorithm):
196+
"""Prefers US proxies, falls back to first available."""
197+
198+
def select(self, proxies: List[Proxy]) -> Proxy:
199+
# Try to find a US proxy
200+
for proxy in proxies:
201+
if proxy.country_code == "US":
202+
return proxy
203+
# Fall back to first proxy
204+
return proxies[0]
205+
206+
# Use your custom algorithm
207+
proxy = provider.get_proxy(USProxyAlgorithm())
136208
```
137209

138210
### Making Your Own Proxy Provider
Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,72 @@
1-
from proxyproviders import Webshare, BrightData, ProxyProvider
1+
"""Examples showing provider-agnostic proxy usage."""
2+
23
import requests
3-
import os
44

5+
from proxyproviders import BrightData, ProxyProvider, Webshare
6+
from proxyproviders.algorithms import First, Random, RoundRobin
7+
from proxyproviders.models.proxy import ProxyFormat
8+
9+
10+
def make_request_with_proxy(provider: ProxyProvider):
11+
"""Make HTTP request through any proxy provider."""
12+
proxy = provider.get_proxy()
13+
response = requests.get(
14+
"https://httpbin.org/ip", proxies=proxy.format(ProxyFormat.REQUESTS)
15+
)
16+
return response.json()
17+
18+
19+
def algorithm_examples():
20+
"""Show different algorithms with same provider."""
21+
provider = Webshare(api_key="your_api_key")
22+
23+
# Default RoundRobin
24+
provider.get_proxy()
25+
26+
# Random selection
27+
provider.get_proxy(Random())
28+
29+
# Always first proxy
30+
provider.get_proxy(First())
31+
32+
# Reusable RoundRobin (maintains state)
33+
round_robin = RoundRobin()
34+
provider.get_proxy(round_robin)
35+
provider.get_proxy(round_robin) # Next in sequence
36+
37+
38+
def one_liner_examples():
39+
"""One-liner HTTP requests through proxy."""
40+
provider = Webshare(api_key="your_api_key")
41+
42+
# Default algorithm
43+
requests.get(
44+
"https://httpbin.org/ip",
45+
proxies=provider.get_proxy().format(ProxyFormat.REQUESTS),
46+
)
47+
48+
# With specific algorithm
49+
requests.get(
50+
"https://httpbin.org/ip",
51+
proxies=provider.get_proxy(Random()).format(ProxyFormat.REQUESTS),
52+
)
553

6-
def request_with_proxy(provider: ProxyProvider):
7-
requests_proxy = None
854

9-
if provider:
10-
proxies = provider.list_proxies()
55+
def main():
56+
# Works with any provider
57+
webshare = Webshare(api_key="your_api_key")
58+
brightdata = BrightData(api_key="your_api_key", zone="your_zone")
1159

12-
requests_proxy = {
13-
"http": f"http://{proxies[0].username}:{proxies[0].password}@{proxies[0].proxy_address}:{proxies[0].port}",
14-
"https": f"http://{proxies[0].username}:{proxies[0].password}@{proxies[0].proxy_address}:{proxies[0].port}",
15-
}
60+
# Same function works with different providers
61+
make_request_with_proxy(webshare)
62+
make_request_with_proxy(brightdata)
1663

17-
r = requests.get("https://httpbin.org/ip", proxies=requests_proxy)
18-
return r.json()
64+
# Algorithm examples
65+
algorithm_examples()
1966

67+
# One-liner usage
68+
one_liner_examples()
2069

21-
webshare = Webshare(api_key="your_api_key")
22-
brightdata = BrightData(api_key="your_api_key", zone="your_zone")
2370

24-
print(f"Your IP: {request_with_proxy(None)}")
25-
print(f"Webshare: {request_with_proxy(webshare)}")
26-
print(f"BrightData: {request_with_proxy(brightdata)}")
71+
if __name__ == "__main__":
72+
main()

0 commit comments

Comments
 (0)