Skip to content

Commit f4b600e

Browse files
authored
feat: Add File-based Caching (#862)
* feat: Implement caching for GitHub contribution stats with a 24-hour expiration Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> * refactor: Improve cache key generation and file handling in cache functions Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> * style: run prettier Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> * style: format code with prettier * feat: prevent hash collisions, add file locking and cache unit test Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> * refactor: fix deepsource issues Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> * refactor: use CACHE_DURATION constant Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud> --------- Signed-off-by: Michele Palazzi <sysdadmin@m1k.cloud>
1 parent 6fc7f2b commit f4b600e

File tree

5 files changed

+431
-26
lines changed

5 files changed

+431
-26
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ yarn.lock
77
package-lock.json
88
.vercel
99

10+
# Cache directory
11+
cache/
12+
1013
# Local Configuration
1114
.DS_Store
1215

Dockerfile

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,33 @@ RUN composer install --no-dev --optimize-autoloader --no-scripts
2828
# Configure Apache to serve from src/ directory and pass environment variables
2929
RUN a2enmod rewrite headers && \
3030
echo 'ServerTokens Prod\n\
31-
ServerSignature Off\n\
32-
PassEnv TOKEN\n\
33-
PassEnv WHITELIST\n\
34-
<VirtualHost *:80>\n\
31+
ServerSignature Off\n\
32+
PassEnv TOKEN\n\
33+
PassEnv WHITELIST\n\
34+
<VirtualHost *:80>\n\
3535
ServerAdmin webmaster@localhost\n\
3636
DocumentRoot /var/www/html/src\n\
3737
<Directory /var/www/html/src>\n\
38-
Options -Indexes\n\
39-
AllowOverride None\n\
40-
Require all granted\n\
41-
Header always set Access-Control-Allow-Origin "*"\n\
42-
Header always set Content-Type "image/svg+xml" "expr=%{REQUEST_URI} =~ m#\\.svg$#i"\n\
43-
Header always set Content-Security-Policy "default-src 'none'; style-src 'unsafe-inline'; img-src data:;" "expr=%{REQUEST_URI} =~ m#\\.svg$#i"\n\
44-
Header always set Referrer-Policy "no-referrer-when-downgrade"\n\
45-
Header always set X-Content-Type-Options "nosniff"\n\
38+
Options -Indexes\n\
39+
AllowOverride None\n\
40+
Require all granted\n\
41+
Header always set Access-Control-Allow-Origin "*"\n\
42+
Header always set Content-Type "image/svg+xml" "expr=%{REQUEST_URI} =~ m#\\.svg$#i"\n\
43+
Header always set Content-Security-Policy "default-src 'none'; style-src 'unsafe-inline'; img-src data:;" "expr=%{REQUEST_URI} =~ m#\\.svg$#i"\n\
44+
Header always set Referrer-Policy "no-referrer-when-downgrade"\n\
45+
Header always set X-Content-Type-Options "nosniff"\n\
4646
</Directory>\n\
4747
ErrorLog ${APACHE_LOG_DIR}/error.log\n\
4848
CustomLog ${APACHE_LOG_DIR}/access.log combined\n\
49-
</VirtualHost>' > /etc/apache2/sites-available/000-default.conf
49+
</VirtualHost>' > /etc/apache2/sites-available/000-default.conf
5050

51-
# Set secure permissions
51+
RUN mkdir -p /var/www/html/cache
52+
53+
# Set secure permissions (cache dir needs write access for www-data)
5254
RUN chown -R www-data:www-data /var/www/html && \
5355
find /var/www/html -type d -exec chmod 755 {} \; && \
54-
find /var/www/html -type f -exec chmod 644 {} \;
56+
find /var/www/html -type f -exec chmod 644 {} \; && \
57+
chmod 775 /var/www/html/cache
5558

5659
# Health check
5760
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \

src/cache.php

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* Simple file-based cache for GitHub contribution stats
7+
*
8+
* Caches stats for 24 hours to avoid repeated API calls
9+
*/
10+
11+
// Default cache duration: 24 hours (in seconds)
12+
define("CACHE_DURATION", 24 * 60 * 60);
13+
define("CACHE_DIR", __DIR__ . "/../cache");
14+
15+
/**
16+
* Generate a cache key for a user's request
17+
*
18+
* Uses structured JSON format to prevent hash collisions between different
19+
* user/options combinations that could produce the same concatenated string.
20+
*
21+
* @param string $user GitHub username
22+
* @param array $options Additional options that affect the stats (mode, exclude_days, starting_year)
23+
* @return string Cache key (filename-safe)
24+
*/
25+
function getCacheKey(string $user, array $options = []): string
26+
{
27+
ksort($options);
28+
try {
29+
$keyData = json_encode(["user" => $user, "options" => $options], JSON_THROW_ON_ERROR);
30+
} catch (JsonException $e) {
31+
// Fallback to simple concatenation if JSON encoding fails
32+
error_log("Cache key JSON encoding failed: " . $e->getMessage());
33+
$keyData = $user . serialize($options);
34+
}
35+
return hash("sha256", $keyData);
36+
}
37+
38+
/**
39+
* Get the cache file path for a given key
40+
*
41+
* @param string $key Cache key
42+
* @return string Full path to cache file
43+
*/
44+
function getCacheFilePath(string $key): string
45+
{
46+
return CACHE_DIR . "/" . $key . ".json";
47+
}
48+
49+
/**
50+
* Ensure the cache directory exists
51+
*
52+
* @return bool True if directory exists or was created
53+
*/
54+
function ensureCacheDir(): bool
55+
{
56+
if (!is_dir(CACHE_DIR)) {
57+
return mkdir(CACHE_DIR, 0755, true);
58+
}
59+
return true;
60+
}
61+
62+
/**
63+
* Get cached stats if available and not expired
64+
*
65+
* @param string $user GitHub username
66+
* @param array $options Additional options
67+
* @param int $maxAge Maximum age in seconds (default: 24 hours)
68+
* @return array|null Cached stats array or null if not cached/expired
69+
*/
70+
function getCachedStats(string $user, array $options = [], int $maxAge = CACHE_DURATION): ?array
71+
{
72+
$key = getCacheKey($user, $options);
73+
$filePath = getCacheFilePath($key);
74+
75+
if (!file_exists($filePath)) {
76+
return null;
77+
}
78+
79+
$mtime = filemtime($filePath);
80+
if ($mtime === false) {
81+
return null;
82+
}
83+
84+
$fileAge = time() - $mtime;
85+
if ($fileAge > $maxAge) {
86+
unlink($filePath);
87+
return null;
88+
}
89+
90+
$handle = fopen($filePath, "r");
91+
if ($handle === false) {
92+
return null;
93+
}
94+
95+
if (!flock($handle, LOCK_SH)) {
96+
fclose($handle);
97+
return null;
98+
}
99+
100+
$contents = stream_get_contents($handle);
101+
flock($handle, LOCK_UN);
102+
fclose($handle);
103+
104+
if ($contents === false || $contents === "") {
105+
return null;
106+
}
107+
108+
$data = json_decode($contents, true);
109+
if (!is_array($data)) {
110+
return null;
111+
}
112+
113+
return $data;
114+
}
115+
116+
/**
117+
* Save stats to cache
118+
*
119+
* @param string $user GitHub username
120+
* @param array $options Additional options
121+
* @param array $stats Stats array to cache
122+
* @return bool True if successfully cached
123+
*/
124+
function setCachedStats(string $user, array $options, array $stats): bool
125+
{
126+
if (!ensureCacheDir()) {
127+
error_log("Failed to create cache directory: " . CACHE_DIR);
128+
return false;
129+
}
130+
131+
$key = getCacheKey($user, $options);
132+
$filePath = getCacheFilePath($key);
133+
134+
$data = json_encode($stats);
135+
if ($data === false) {
136+
error_log("Failed to encode stats to JSON for user: " . $user);
137+
return false;
138+
}
139+
140+
$result = file_put_contents($filePath, $data, LOCK_EX);
141+
if ($result === false) {
142+
error_log("Failed to write cache file: " . $filePath);
143+
return false;
144+
}
145+
146+
return true;
147+
}
148+
149+
/**
150+
* Clear all expired cache files
151+
*
152+
* @param int $maxAge Maximum age in seconds
153+
* @return int Number of files deleted
154+
*/
155+
function clearExpiredCache(int $maxAge = CACHE_DURATION): int
156+
{
157+
if (!is_dir(CACHE_DIR)) {
158+
return 0;
159+
}
160+
161+
$deleted = 0;
162+
$files = glob(CACHE_DIR . "/*.json");
163+
164+
if ($files === false) {
165+
return 0;
166+
}
167+
168+
foreach ($files as $file) {
169+
$mtime = filemtime($file);
170+
if ($mtime === false) {
171+
continue;
172+
}
173+
$fileAge = time() - $mtime;
174+
if ($fileAge > $maxAge) {
175+
if (unlink($file)) {
176+
$deleted++;
177+
}
178+
}
179+
}
180+
181+
return $deleted;
182+
}
183+
184+
/**
185+
* Clear cache for a specific user
186+
*
187+
* Note: This function only clears the cache for the user with empty/default options.
188+
* Cache entries with non-empty options (starting_year, mode, exclude_days) will NOT
189+
* be cleared. This is a limitation of the hash-based cache key system - we cannot
190+
* enumerate all possible option combinations without storing additional metadata.
191+
*
192+
* @param string $user GitHub username
193+
* @return bool True if cache was cleared (or didn't exist)
194+
*/
195+
function clearUserCache(string $user): bool
196+
{
197+
if (!is_dir(CACHE_DIR)) {
198+
return true;
199+
}
200+
201+
$key = getCacheKey($user, []);
202+
$filePath = getCacheFilePath($key);
203+
204+
if (file_exists($filePath)) {
205+
return unlink($filePath);
206+
}
207+
208+
return true;
209+
}

src/index.php

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
require_once "../vendor/autoload.php";
77
require_once "stats.php";
88
require_once "card.php";
9+
require_once "cache.php";
910

1011
// load .env
1112
$dotenv = \Dotenv\Dotenv::createImmutable(dirname(__DIR__, 1));
@@ -19,11 +20,11 @@
1920
renderOutput($message, 500);
2021
}
2122

22-
// set cache to refresh once per three horus
23-
$cacheMinutes = 3 * 60 * 60;
24-
header("Expires: " . gmdate("D, d M Y H:i:s", time() + $cacheMinutes) . " GMT");
23+
// set cache to refresh once per day (24 hours)
24+
$cacheSeconds = CACHE_DURATION;
25+
header("Expires: " . gmdate("D, d M Y H:i:s", time() + $cacheSeconds) . " GMT");
2526
header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT");
26-
header("Cache-Control: public, max-age=$cacheMinutes");
27+
header("Cache-Control: public, max-age=$cacheSeconds");
2728

2829
// redirect to demo site if user is not given
2930
if (!isset($_REQUEST["user"])) {
@@ -35,15 +36,39 @@
3536
// get streak stats for user given in query string
3637
$user = preg_replace("/[^a-zA-Z0-9\-]/", "", $_REQUEST["user"]);
3738
$startingYear = isset($_REQUEST["starting_year"]) ? intval($_REQUEST["starting_year"]) : null;
38-
$contributionGraphs = getContributionGraphs($user, $startingYear);
39-
$contributions = getContributionDates($contributionGraphs);
40-
if (isset($_GET["mode"]) && $_GET["mode"] === "weekly") {
41-
$stats = getWeeklyContributionStats($contributions);
39+
$mode = isset($_GET["mode"]) ? $_GET["mode"] : null;
40+
$excludeDaysRaw = $_GET["exclude_days"] ?? "";
41+
42+
// Build cache options based on request parameters
43+
$cacheOptions = [
44+
"starting_year" => $startingYear,
45+
"mode" => $mode,
46+
"exclude_days" => $excludeDaysRaw,
47+
];
48+
49+
// Check for cached stats first (24 hour cache)
50+
$cachedStats = getCachedStats($user, $cacheOptions);
51+
52+
if ($cachedStats !== null) {
53+
// Use cached stats - instant response!
54+
$stats = $cachedStats;
4255
} else {
43-
// split and normalize excluded days
44-
$excludeDays = normalizeDays(explode(",", $_GET["exclude_days"] ?? ""));
45-
$stats = getContributionStats($contributions, $excludeDays);
56+
// Fetch fresh data from GitHub API
57+
$contributionGraphs = getContributionGraphs($user, $startingYear);
58+
$contributions = getContributionDates($contributionGraphs);
59+
60+
if ($mode === "weekly") {
61+
$stats = getWeeklyContributionStats($contributions);
62+
} else {
63+
// split and normalize excluded days
64+
$excludeDays = normalizeDays(explode(",", $excludeDaysRaw));
65+
$stats = getContributionStats($contributions, $excludeDays);
66+
}
67+
68+
// Cache the stats for 24 hours
69+
setCachedStats($user, $cacheOptions, $stats);
4670
}
71+
4772
renderOutput($stats);
4873
} catch (InvalidArgumentException | AssertionError $error) {
4974
error_log("Error {$error->getCode()}: {$error->getMessage()}");

0 commit comments

Comments
 (0)