Skip to content

Commit 4902751

Browse files
Init
0 parents  commit 4902751

3 files changed

Lines changed: 131 additions & 0 deletions

File tree

.github/workflows/npm-publish.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Publish to npm
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*'
7+
8+
jobs:
9+
npm-publish:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write
13+
id-token: write
14+
steps:
15+
- uses: actions/checkout@v4
16+
- name: Use Node.js
17+
uses: actions/setup-node@v4
18+
with:
19+
registry-url: 'https://registry.npmjs.org'
20+
- run: |
21+
npm install
22+
npm publish --provenance
23+
env:
24+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

index.js

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
const { pipeline } = require('@huggingface/transformers');
2+
const { HierarchicalNSW } = require('hnswlib-node');
3+
4+
class BiMap {
5+
constructor(entries = []) {
6+
this._forward = new Map(entries);
7+
this._backward = new Map(entries.map(([k, v]) => [v, k]));
8+
}
9+
set(key, value) {
10+
// 删除已有的正向/反向映射,保证一对一
11+
if (this._forward.has(key)) {
12+
const oldVal = this._forward.get(key);
13+
this._backward.delete(oldVal);
14+
}
15+
if (this._backward.has(value)) {
16+
const oldKey = this._backward.get(value);
17+
this._forward.delete(oldKey);
18+
}
19+
this._forward.set(key, value);
20+
this._backward.set(value, key);
21+
}
22+
get(key) { return this._forward.get(key); }
23+
getKey(value) { return this._backward.get(value); }
24+
has(key) { return this._forward.has(key); }
25+
hasValue(value) { return this._backward.has(value); }
26+
delete(key) {
27+
if (!this._forward.has(key)) return false;
28+
const value = this._forward.get(key);
29+
this._forward.delete(key);
30+
this._backward.delete(value);
31+
return true;
32+
}
33+
get length() { return this._forward.size; }
34+
}
35+
36+
const numDimensions = 384; // the length of data point vector that will be indexed.
37+
const maxElements = 1024; // the maximum number of data points.
38+
39+
// declaring and intializing index.
40+
const index = new HierarchicalNSW('l2', numDimensions);
41+
index.initIndex(maxElements);
42+
43+
let extractor;
44+
const labelMapping = new BiMap();
45+
46+
hexo.extend.filter.register('after_init', async function() {
47+
extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
48+
});
49+
50+
hexo.extend.filter.register('before_post_render', async function(data) {
51+
const embeddings = await extractor([data._content], { pooling: 'mean', normalize: true });
52+
data.embedding_vector = embeddings.tolist()[0];
53+
// Create a new id if data.path doesn't exist in labelMapping
54+
// Else use the existing id
55+
let id;
56+
if (!labelMapping.hasValue(data.path)) {
57+
id = labelMapping.length;
58+
labelMapping.set(id, data.path);
59+
} else {
60+
id = labelMapping.getKey(data.path);
61+
}
62+
index.addPoint(data.embedding_vector, id);
63+
return data;
64+
});
65+
66+
hexo.extend.helper.register('related_posts', function(post) {
67+
const result = [];
68+
if (!post.embedding_vector) {
69+
post.related_posts = result;
70+
return result;
71+
}
72+
const numNeighbors = 5;
73+
const query = post.embedding_vector;
74+
const { neighbors } = index.searchKnn(query, numNeighbors);
75+
// Skip the first result as it is the query itself
76+
for (let i = 1; i < neighbors.length; i++) {
77+
const neighbor = neighbors[i];
78+
result.push(labelMapping.get(neighbor));
79+
}
80+
post.related_posts = result;
81+
return result;
82+
});

package.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"name": "hexo-posts-embedding",
3+
"version": "0.0.1",
4+
"description": "Posts embedding for Hexo.",
5+
"main": "index.js",
6+
"scripts": {
7+
"test": "echo \"Error: no test specified\" && exit 1"
8+
},
9+
"repository": "next-theme/hexo-posts-embedding",
10+
"keywords": [
11+
"Hexo",
12+
"NexT"
13+
],
14+
"author": "Mimi <stevenjoezhang@gmail.com> (https://zhangshuqiao.org)",
15+
"readme": "https://github.com/next-theme/hexo-posts-embedding#readme",
16+
"license": "MIT",
17+
"bugs": {
18+
"url": "https://github.com/next-theme/hexo-posts-embedding/issues"
19+
},
20+
"homepage": "https://github.com/next-theme/hexo-posts-embedding#readme",
21+
"dependencies": {
22+
"@huggingface/transformers": "^3.6.3",
23+
"hnswlib-node": "^3.0.0"
24+
}
25+
}

0 commit comments

Comments
 (0)