Skip to content

Commit 7b1e8fb

Browse files
committed
Fix for HuggingFace URL malformation
1 parent 17bbd96 commit 7b1e8fb

2 files changed

Lines changed: 86 additions & 1 deletion

File tree

lib/embeddings.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,53 @@
22
let transformersModule = null;
33
let embeddingModel = null;
44

5+
/**
6+
* Ensure global.fetch is properly overridden to fix malformed URLs
7+
* This is a backup in case the SSR bundle overwrites it
8+
*/
9+
function ensureFetchOverride() {
10+
if (typeof global !== 'undefined') {
11+
const nodeFetch = require('node-fetch');
12+
13+
const fixedFetch = function(url, options) {
14+
let fetchUrl = url;
15+
16+
if (typeof url === 'string') {
17+
// Fix malformed URLs like "http://localhost:3000https://huggingface.co/..."
18+
if (url.includes('localhost') && url.includes('huggingface.co')) {
19+
// Extract the actual HuggingFace URL (everything from https:// onwards)
20+
const httpsMatch = url.match(/https?:\/\/[^/]*huggingface\.co\/.*/);
21+
if (httpsMatch) {
22+
fetchUrl = httpsMatch[0];
23+
} else {
24+
// Fallback: find https:// in the string and use everything from there
25+
const httpsIndex = url.indexOf('https://');
26+
if (httpsIndex !== -1) {
27+
fetchUrl = url.substring(httpsIndex);
28+
}
29+
}
30+
}
31+
}
32+
33+
// Use node-fetch for all requests (more reliable in Node.js environment)
34+
return nodeFetch(fetchUrl, options);
35+
};
36+
37+
// Always override to ensure it's set correctly
38+
global.fetch = fixedFetch;
39+
if (typeof globalThis !== 'undefined') {
40+
globalThis.fetch = fixedFetch;
41+
}
42+
}
43+
}
44+
545
/**
646
* Get the transformers module using dynamic import (required for ES modules)
747
*/
848
async function getTransformersModule() {
949
if (!transformersModule) {
50+
// Ensure fetch is overridden before importing transformers
51+
ensureFetchOverride();
1052
transformersModule = await import('@xenova/transformers');
1153
}
1254
return transformersModule;
@@ -20,6 +62,10 @@ async function getEmbeddingModel() {
2062
if (!embeddingModel) {
2163
console.log('Loading embedding model...');
2264
const { pipeline } = await getTransformersModule();
65+
66+
// global.fetch is already overridden at the top of this file
67+
// to fix malformed URLs from SSR webpack polyfills
68+
2369
embeddingModel = await pipeline(
2470
'feature-extraction',
2571
'Xenova/all-MiniLM-L6-v2' // Lightweight model, ~80MB, good for semantic search

lib/server.js

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,39 @@ if (!isDevelopment) {
1616
manifest = require("../ssr/ssr-manifest.json");
1717
const appPath = path.join(__dirname, "../ssr", manifest["app.js"]);
1818
renderer = require(appPath).default;
19+
20+
// Override global.fetch AFTER SSR bundle loads to fix malformed URLs
21+
// The SSR bundle sets its own fetch that prepends localhost:3000 to HuggingFace URLs
22+
const nodeFetch = require('node-fetch');
23+
const fixedFetch = function(url, options) {
24+
let fetchUrl = url;
25+
26+
if (typeof url === 'string') {
27+
// Fix malformed URLs like "http://localhost:3000https://huggingface.co/..."
28+
if (url.includes('localhost') && url.includes('huggingface.co')) {
29+
// Extract the actual HuggingFace URL (everything from https:// onwards)
30+
const httpsMatch = url.match(/https?:\/\/[^/]*huggingface\.co\/.*/);
31+
if (httpsMatch) {
32+
fetchUrl = httpsMatch[0];
33+
} else {
34+
// Fallback: find https:// in the string and use everything from there
35+
const httpsIndex = url.indexOf('https://');
36+
if (httpsIndex !== -1) {
37+
fetchUrl = url.substring(httpsIndex);
38+
}
39+
}
40+
}
41+
}
42+
43+
// Use node-fetch for all requests (more reliable in Node.js environment)
44+
return nodeFetch(fetchUrl, options);
45+
};
46+
47+
// Override on both global and globalThis to ensure it's caught
48+
global.fetch = fixedFetch;
49+
if (typeof globalThis !== 'undefined') {
50+
globalThis.fetch = fixedFetch;
51+
}
1952
} catch (e) {
2053
console.warn("SSR build not found, running in development mode");
2154
}
@@ -557,6 +590,7 @@ module.exports = async function ({ plants, nurseries }) {
557590
// Fetch all candidates (we'll calculate similarity and sort in JS)
558591
// For performance, we might want to limit this, but for now get all matching filters
559592
const candidates = await plants.find(baseQuery).toArray();
593+
console.log(`Semantic search: Found ${candidates.length} candidates with embeddings`);
560594

561595
// Calculate similarity scores
562596
const queryEmbedding = query._queryEmbedding;
@@ -573,7 +607,12 @@ module.exports = async function ({ plants, nurseries }) {
573607
_semanticScore: similarity
574608
};
575609
})
576-
.filter(plant => plant && plant._semanticScore >= 0.3) // Minimum similarity threshold
610+
.filter(plant => {
611+
if (!plant || plant._semanticScore < 0.3) {
612+
return false;
613+
}
614+
return true;
615+
}) // Minimum similarity threshold
577616
.sort((a, b) => {
578617
// If user wants to sort by Search Relevance, prioritize semantic score
579618
if (originalSortKeys.includes("_semanticScore") || req.query.sort === "Sort by Search Relevance") {

0 commit comments

Comments
 (0)