|
| 1 | +const { pipeline } = require('@huggingface/transformers'); |
| 2 | +const { HierarchicalNSW } = require('hnswlib-node'); |
| 3 | + |
| 4 | +class BiMap { |
| 5 | + constructor(entries = []) { |
| 6 | + this._forward = new Map(entries); |
| 7 | + this._backward = new Map(entries.map(([k, v]) => [v, k])); |
| 8 | + } |
| 9 | + set(key, value) { |
| 10 | + // 删除已有的正向/反向映射,保证一对一 |
| 11 | + if (this._forward.has(key)) { |
| 12 | + const oldVal = this._forward.get(key); |
| 13 | + this._backward.delete(oldVal); |
| 14 | + } |
| 15 | + if (this._backward.has(value)) { |
| 16 | + const oldKey = this._backward.get(value); |
| 17 | + this._forward.delete(oldKey); |
| 18 | + } |
| 19 | + this._forward.set(key, value); |
| 20 | + this._backward.set(value, key); |
| 21 | + } |
| 22 | + get(key) { return this._forward.get(key); } |
| 23 | + getKey(value) { return this._backward.get(value); } |
| 24 | + has(key) { return this._forward.has(key); } |
| 25 | + hasValue(value) { return this._backward.has(value); } |
| 26 | + delete(key) { |
| 27 | + if (!this._forward.has(key)) return false; |
| 28 | + const value = this._forward.get(key); |
| 29 | + this._forward.delete(key); |
| 30 | + this._backward.delete(value); |
| 31 | + return true; |
| 32 | + } |
| 33 | + get length() { return this._forward.size; } |
| 34 | +} |
| 35 | + |
| 36 | +const numDimensions = 384; // the length of data point vector that will be indexed. |
| 37 | +const maxElements = 1024; // the maximum number of data points. |
| 38 | + |
| 39 | +// declaring and intializing index. |
| 40 | +const index = new HierarchicalNSW('l2', numDimensions); |
| 41 | +index.initIndex(maxElements); |
| 42 | + |
| 43 | +let extractor; |
| 44 | +const labelMapping = new BiMap(); |
| 45 | + |
| 46 | +hexo.extend.filter.register('after_init', async function() { |
| 47 | + extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); |
| 48 | +}); |
| 49 | + |
| 50 | +hexo.extend.filter.register('before_post_render', async function(data) { |
| 51 | + const embeddings = await extractor([data._content], { pooling: 'mean', normalize: true }); |
| 52 | + data.embedding_vector = embeddings.tolist()[0]; |
| 53 | + // Create a new id if data.path doesn't exist in labelMapping |
| 54 | + // Else use the existing id |
| 55 | + let id; |
| 56 | + if (!labelMapping.hasValue(data.path)) { |
| 57 | + id = labelMapping.length; |
| 58 | + labelMapping.set(id, data.path); |
| 59 | + } else { |
| 60 | + id = labelMapping.getKey(data.path); |
| 61 | + } |
| 62 | + index.addPoint(data.embedding_vector, id); |
| 63 | + return data; |
| 64 | +}); |
| 65 | + |
| 66 | +hexo.extend.helper.register('related_posts', function(post) { |
| 67 | + const result = []; |
| 68 | + if (!post.embedding_vector) { |
| 69 | + post.related_posts = result; |
| 70 | + return result; |
| 71 | + } |
| 72 | + const numNeighbors = 5; |
| 73 | + const query = post.embedding_vector; |
| 74 | + const { neighbors } = index.searchKnn(query, numNeighbors); |
| 75 | + // Skip the first result as it is the query itself |
| 76 | + for (let i = 1; i < neighbors.length; i++) { |
| 77 | + const neighbor = neighbors[i]; |
| 78 | + result.push(labelMapping.get(neighbor)); |
| 79 | + } |
| 80 | + post.related_posts = result; |
| 81 | + return result; |
| 82 | +}); |
0 commit comments