/** * Vector database manager * Uses hnswlib-wasm for high-performance vector similarity search * Implements singleton pattern to avoid duplicate WASM module initialization */ import { loadHnswlib } from 'hnswlib-wasm-static'; import type { TextChunk } from './text-chunker'; export interface VectorDocument { id: string; tabId: number; url: string; title: string; chunk: TextChunk; embedding: Float32Array; timestamp: number; } export interface SearchResult { document: VectorDocument; similarity: number; distance: number; } export interface VectorDatabaseConfig { dimension: number; maxElements: number; efConstruction: number; M: number; efSearch: number; indexFileName: string; enableAutoCleanup?: boolean; maxRetentionDays?: number; } let globalHnswlib: any = null; let globalHnswlibInitPromise: Promise | null = null; let globalHnswlibInitialized = false; let syncInProgress = false; let pendingSyncPromise: Promise | null = null; const DB_NAME = 'VectorDatabaseStorage'; const DB_VERSION = 1; const STORE_NAME = 'documentMappings'; /** * IndexedDB helper functions */ class IndexedDBHelper { private static dbPromise: Promise | null = null; static async getDB(): Promise { if (!this.dbPromise) { this.dbPromise = new Promise((resolve, reject) => { const request = indexedDB.open(DB_NAME, DB_VERSION); request.onerror = () => reject(request.error); request.onsuccess = () => resolve(request.result); request.onupgradeneeded = (event) => { const db = (event.target as IDBOpenDBRequest).result; if (!db.objectStoreNames.contains(STORE_NAME)) { const store = db.createObjectStore(STORE_NAME, { keyPath: 'id' }); store.createIndex('indexFileName', 'indexFileName', { unique: false }); } }; }); } return this.dbPromise; } static async saveData(indexFileName: string, data: any): Promise { const db = await this.getDB(); const transaction = db.transaction([STORE_NAME], 'readwrite'); const store = transaction.objectStore(STORE_NAME); await new Promise((resolve, reject) => { const request = store.put({ id: indexFileName, indexFileName, data, timestamp: Date.now(), }); request.onsuccess = () => resolve(); request.onerror = () => reject(request.error); }); } static async loadData(indexFileName: string): Promise { const db = await this.getDB(); const transaction = db.transaction([STORE_NAME], 'readonly'); const store = transaction.objectStore(STORE_NAME); return new Promise((resolve, reject) => { const request = store.get(indexFileName); request.onsuccess = () => { const result = request.result; resolve(result ? result.data : null); }; request.onerror = () => reject(request.error); }); } static async deleteData(indexFileName: string): Promise { const db = await this.getDB(); const transaction = db.transaction([STORE_NAME], 'readwrite'); const store = transaction.objectStore(STORE_NAME); await new Promise((resolve, reject) => { const request = store.delete(indexFileName); request.onsuccess = () => resolve(); request.onerror = () => reject(request.error); }); } /** * Clear all IndexedDB data (for complete cleanup during model switching) */ static async clearAllData(): Promise { try { const db = await this.getDB(); const transaction = db.transaction([STORE_NAME], 'readwrite'); const store = transaction.objectStore(STORE_NAME); await new Promise((resolve, reject) => { const request = store.clear(); request.onsuccess = () => { console.log('IndexedDBHelper: All data cleared from IndexedDB'); resolve(); }; request.onerror = () => reject(request.error); }); } catch (error) { console.error('IndexedDBHelper: Failed to clear all data:', error); throw error; } } /** * Get all stored keys (for debugging) */ static async getAllKeys(): Promise { try { const db = await this.getDB(); const transaction = db.transaction([STORE_NAME], 'readonly'); const store = transaction.objectStore(STORE_NAME); return new Promise((resolve, reject) => { const request = store.getAllKeys(); request.onsuccess = () => resolve(request.result as string[]); request.onerror = () => reject(request.error); }); } catch (error) { console.error('IndexedDBHelper: Failed to get all keys:', error); return []; } } } /** * Global hnswlib-wasm initialization function * Ensures initialization only once across the entire application */ async function initializeGlobalHnswlib(): Promise { if (globalHnswlibInitialized && globalHnswlib) { return globalHnswlib; } if (globalHnswlibInitPromise) { return globalHnswlibInitPromise; } globalHnswlibInitPromise = (async () => { try { console.log('VectorDatabase: Initializing global hnswlib-wasm instance...'); globalHnswlib = await loadHnswlib(); globalHnswlibInitialized = true; console.log('VectorDatabase: Global hnswlib-wasm instance initialized successfully'); return globalHnswlib; } catch (error) { console.error('VectorDatabase: Failed to initialize global hnswlib-wasm:', error); globalHnswlibInitPromise = null; throw error; } })(); return globalHnswlibInitPromise; } export class VectorDatabase { private index: any = null; private isInitialized = false; private isInitializing = false; private initPromise: Promise | null = null; private documents = new Map(); private tabDocuments = new Map>(); private nextLabel = 0; private readonly config: VectorDatabaseConfig; constructor(config?: Partial) { this.config = { dimension: 384, maxElements: 100000, efConstruction: 200, M: 48, efSearch: 50, indexFileName: 'tab_content_index.dat', enableAutoCleanup: true, maxRetentionDays: 30, ...config, }; console.log('VectorDatabase: Initialized with config:', { dimension: this.config.dimension, efSearch: this.config.efSearch, M: this.config.M, efConstruction: this.config.efConstruction, enableAutoCleanup: this.config.enableAutoCleanup, maxRetentionDays: this.config.maxRetentionDays, }); } /** * Initialize vector database */ public async initialize(): Promise { if (this.isInitialized) return; if (this.isInitializing && this.initPromise) return this.initPromise; this.isInitializing = true; this.initPromise = this._doInitialize().finally(() => { this.isInitializing = false; }); return this.initPromise; } private async _doInitialize(): Promise { try { console.log('VectorDatabase: Initializing...'); const hnswlib = await initializeGlobalHnswlib(); hnswlib.EmscriptenFileSystemManager.setDebugLogs(true); this.index = new hnswlib.HierarchicalNSW( 'cosine', this.config.dimension, this.config.indexFileName, ); await this.syncFileSystem('read'); const indexExists = hnswlib.EmscriptenFileSystemManager.checkFileExists( this.config.indexFileName, ); if (indexExists) { console.log('VectorDatabase: Loading existing index...'); try { await this.index.readIndex(this.config.indexFileName, this.config.maxElements); this.index.setEfSearch(this.config.efSearch); await this.loadDocumentMappings(); if (this.documents.size > 0) { const maxLabel = Math.max(...Array.from(this.documents.keys())); this.nextLabel = maxLabel + 1; console.log( `VectorDatabase: Loaded existing index with ${this.documents.size} documents, next label: ${this.nextLabel}`, ); } else { const indexCount = this.index.getCurrentCount(); if (indexCount > 0) { console.warn( `VectorDatabase: Index has ${indexCount} vectors but no document mappings found. This may cause label mismatch.`, ); this.nextLabel = indexCount; } else { this.nextLabel = 0; } console.log( `VectorDatabase: No document mappings found, starting with next label: ${this.nextLabel}`, ); } } catch (loadError) { console.warn( 'VectorDatabase: Failed to load existing index, creating new one:', loadError, ); this.index.initIndex( this.config.maxElements, this.config.M, this.config.efConstruction, 200, ); this.index.setEfSearch(this.config.efSearch); this.nextLabel = 0; } } else { console.log('VectorDatabase: Creating new index...'); this.index.initIndex( this.config.maxElements, this.config.M, this.config.efConstruction, 200, ); this.index.setEfSearch(this.config.efSearch); this.nextLabel = 0; } this.isInitialized = true; console.log('VectorDatabase: Initialization completed successfully'); } catch (error) { console.error('VectorDatabase: Initialization failed:', error); this.isInitialized = false; throw error; } } /** * Add document to vector database */ public async addDocument( tabId: number, url: string, title: string, chunk: TextChunk, embedding: Float32Array, ): Promise { if (!this.isInitialized) { await this.initialize(); } const documentId = this.generateDocumentId(tabId, chunk.index); const document: VectorDocument = { id: documentId, tabId, url, title, chunk, embedding, timestamp: Date.now(), }; try { // Validate vector data if (!embedding || embedding.length !== this.config.dimension) { const errorMsg = `Invalid embedding dimension: expected ${this.config.dimension}, got ${embedding?.length || 0}`; console.error('VectorDatabase: Dimension mismatch detected!', { expectedDimension: this.config.dimension, actualDimension: embedding?.length || 0, documentId, tabId, url, title: title.substring(0, 50) + '...', }); // This might be caused by model switching, suggest reinitialization console.warn( 'VectorDatabase: This might be caused by model switching. Consider reinitializing the vector database with the correct dimension.', ); throw new Error(errorMsg); } // Check if vector data contains invalid values for (let i = 0; i < embedding.length; i++) { if (!isFinite(embedding[i])) { throw new Error(`Invalid embedding value at index ${i}: ${embedding[i]}`); } } // Ensure we have a clean Float32Array let cleanEmbedding: Float32Array; if (embedding instanceof Float32Array) { cleanEmbedding = embedding; } else { cleanEmbedding = new Float32Array(embedding); } // Use current nextLabel as label const label = this.nextLabel++; console.log( `VectorDatabase: Adding document with label ${label}, embedding dimension: ${embedding.length}`, ); // Add vector to index // According to hnswlib-wasm-static emscripten binding requirements, need to create VectorFloat type console.log(`VectorDatabase: 🔧 DEBUGGING - About to call addPoint with:`, { embeddingType: typeof cleanEmbedding, isFloat32Array: cleanEmbedding instanceof Float32Array, length: cleanEmbedding.length, firstFewValues: Array.from(cleanEmbedding.slice(0, 3)), label: label, replaceDeleted: false, }); // Method 1: Try using VectorFloat constructor (if available) let vectorToAdd; try { // Check if VectorFloat constructor exists if (globalHnswlib && globalHnswlib.VectorFloat) { console.log('VectorDatabase: Using VectorFloat constructor'); vectorToAdd = new globalHnswlib.VectorFloat(); // Add elements to VectorFloat one by one for (let i = 0; i < cleanEmbedding.length; i++) { vectorToAdd.push_back(cleanEmbedding[i]); } } else { // Method 2: Use plain JS array (fallback) console.log('VectorDatabase: Using plain JS array as fallback'); vectorToAdd = Array.from(cleanEmbedding); } // Call addPoint with constructed vector this.index.addPoint(vectorToAdd, label, false); // Clean up VectorFloat object (if manually created) if (vectorToAdd && typeof vectorToAdd.delete === 'function') { vectorToAdd.delete(); } } catch (vectorError) { console.error( 'VectorDatabase: VectorFloat approach failed, trying alternatives:', vectorError, ); // Method 3: Try passing Float32Array directly try { console.log('VectorDatabase: Trying Float32Array directly'); this.index.addPoint(cleanEmbedding, label, false); } catch (float32Error) { console.error('VectorDatabase: Float32Array approach failed:', float32Error); // Method 4: Last resort - use spread operator console.log('VectorDatabase: Trying spread operator as last resort'); this.index.addPoint([...cleanEmbedding], label, false); } } console.log(`VectorDatabase: ✅ Successfully added document with label ${label}`); // Store document mapping this.documents.set(label, document); // Update tab document mapping if (!this.tabDocuments.has(tabId)) { this.tabDocuments.set(tabId, new Set()); } this.tabDocuments.get(tabId)!.add(label); // Save index and mappings await this.saveIndex(); await this.saveDocumentMappings(); // Check if auto cleanup is needed if (this.config.enableAutoCleanup) { await this.checkAndPerformAutoCleanup(); } console.log(`VectorDatabase: Successfully added document ${documentId} with label ${label}`); return label; } catch (error) { console.error('VectorDatabase: Failed to add document:', error); console.error('VectorDatabase: Embedding info:', { type: typeof embedding, constructor: embedding?.constructor?.name, length: embedding?.length, isFloat32Array: embedding instanceof Float32Array, firstFewValues: embedding ? Array.from(embedding.slice(0, 5)) : null, }); throw error; } } /** * Search similar documents */ public async search(queryEmbedding: Float32Array, topK: number = 10): Promise { if (!this.isInitialized) { await this.initialize(); } try { // Validate query vector if (!queryEmbedding || queryEmbedding.length !== this.config.dimension) { throw new Error( `Invalid query embedding dimension: expected ${this.config.dimension}, got ${queryEmbedding?.length || 0}`, ); } // Check if query vector contains invalid values for (let i = 0; i < queryEmbedding.length; i++) { if (!isFinite(queryEmbedding[i])) { throw new Error(`Invalid query embedding value at index ${i}: ${queryEmbedding[i]}`); } } console.log( `VectorDatabase: Searching with query embedding dimension: ${queryEmbedding.length}, topK: ${topK}`, ); // Check if index is empty const currentCount = this.index.getCurrentCount(); if (currentCount === 0) { console.log('VectorDatabase: Index is empty, returning no results'); return []; } console.log(`VectorDatabase: Index contains ${currentCount} vectors`); // Check if document mapping and index are synchronized const mappingCount = this.documents.size; if (mappingCount === 0 && currentCount > 0) { console.warn( `VectorDatabase: Index has ${currentCount} vectors but document mapping is empty. Attempting to reload mappings...`, ); await this.loadDocumentMappings(); if (this.documents.size === 0) { console.error( 'VectorDatabase: Failed to load document mappings. Index and mappings are out of sync.', ); return []; } console.log( `VectorDatabase: Successfully reloaded ${this.documents.size} document mappings`, ); } // Process query vector according to hnswlib-wasm-static emscripten binding requirements let queryVector; let searchResult; try { // Method 1: Try using VectorFloat constructor (if available) if (globalHnswlib && globalHnswlib.VectorFloat) { console.log('VectorDatabase: Using VectorFloat for search query'); queryVector = new globalHnswlib.VectorFloat(); // Add elements to VectorFloat one by one for (let i = 0; i < queryEmbedding.length; i++) { queryVector.push_back(queryEmbedding[i]); } searchResult = this.index.searchKnn(queryVector, topK, undefined); // Clean up VectorFloat object if (queryVector && typeof queryVector.delete === 'function') { queryVector.delete(); } } else { // Method 2: Use plain JS array (fallback) console.log('VectorDatabase: Using plain JS array for search query'); const queryArray = Array.from(queryEmbedding); searchResult = this.index.searchKnn(queryArray, topK, undefined); } } catch (vectorError) { console.error( 'VectorDatabase: VectorFloat search failed, trying alternatives:', vectorError, ); // Method 3: Try passing Float32Array directly try { console.log('VectorDatabase: Trying Float32Array directly for search'); searchResult = this.index.searchKnn(queryEmbedding, topK, undefined); } catch (float32Error) { console.error('VectorDatabase: Float32Array search failed:', float32Error); // Method 4: Last resort - use spread operator console.log('VectorDatabase: Trying spread operator for search as last resort'); searchResult = this.index.searchKnn([...queryEmbedding], topK, undefined); } } const results: SearchResult[] = []; console.log(`VectorDatabase: Processing ${searchResult.neighbors.length} search neighbors`); console.log(`VectorDatabase: Available documents in mapping: ${this.documents.size}`); console.log(`VectorDatabase: Index current count: ${this.index.getCurrentCount()}`); for (let i = 0; i < searchResult.neighbors.length; i++) { const label = searchResult.neighbors[i]; const distance = searchResult.distances[i]; const similarity = 1 - distance; // Convert cosine distance to similarity console.log( `VectorDatabase: Processing neighbor ${i}: label=${label}, distance=${distance}, similarity=${similarity}`, ); // Find corresponding document by label const document = this.findDocumentByLabel(label); if (document) { console.log(`VectorDatabase: Found document for label ${label}: ${document.id}`); results.push({ document, similarity, distance, }); } else { console.warn(`VectorDatabase: No document found for label ${label}`); // Detailed debug information if (i < 5) { // Only show detailed info for first 5 neighbors to avoid log spam console.warn( `VectorDatabase: Available labels (first 20): ${Array.from(this.documents.keys()).slice(0, 20).join(', ')}`, ); console.warn(`VectorDatabase: Total available labels: ${this.documents.size}`); console.warn( `VectorDatabase: Label type: ${typeof label}, Available label types: ${Array.from( this.documents.keys(), ) .slice(0, 3) .map((k) => typeof k) .join(', ')}`, ); } } } console.log( `VectorDatabase: Found ${results.length} search results out of ${searchResult.neighbors.length} neighbors`, ); // If no results found but index has data, indicates label mismatch if (results.length === 0 && searchResult.neighbors.length > 0) { console.error( 'VectorDatabase: Label mismatch detected! Index has vectors but no matching documents found.', ); console.error( 'VectorDatabase: This usually indicates the index and document mappings are out of sync.', ); console.error('VectorDatabase: Consider rebuilding the index to fix this issue.'); // Provide some diagnostic information const sampleLabels = searchResult.neighbors.slice(0, 5); const availableLabels = Array.from(this.documents.keys()).slice(0, 5); console.error('VectorDatabase: Sample search labels:', sampleLabels); console.error('VectorDatabase: Sample available labels:', availableLabels); } return results.sort((a, b) => b.similarity - a.similarity); } catch (error) { console.error('VectorDatabase: Search failed:', error); console.error('VectorDatabase: Query embedding info:', { type: typeof queryEmbedding, constructor: queryEmbedding?.constructor?.name, length: queryEmbedding?.length, isFloat32Array: queryEmbedding instanceof Float32Array, firstFewValues: queryEmbedding ? Array.from(queryEmbedding.slice(0, 5)) : null, }); throw error; } } /** * Remove all documents for a tab */ public async removeTabDocuments(tabId: number): Promise { if (!this.isInitialized) { await this.initialize(); } const documentLabels = this.tabDocuments.get(tabId); if (!documentLabels) { return; } try { // Remove documents from mapping (hnswlib-wasm doesn't support direct deletion, only mark as deleted) for (const label of documentLabels) { this.documents.delete(label); } // Clean up tab mapping this.tabDocuments.delete(tabId); // Save changes await this.saveDocumentMappings(); console.log(`VectorDatabase: Removed ${documentLabels.size} documents for tab ${tabId}`); } catch (error) { console.error('VectorDatabase: Failed to remove tab documents:', error); throw error; } } /** * Get database statistics */ public getStats(): { totalDocuments: number; totalTabs: number; indexSize: number; isInitialized: boolean; } { return { totalDocuments: this.documents.size, totalTabs: this.tabDocuments.size, indexSize: this.calculateStorageSize(), isInitialized: this.isInitialized, }; } /** * Calculate actual storage size (bytes) */ private calculateStorageSize(): number { let totalSize = 0; try { // 1. 计算文档映射的大小 const documentsSize = this.calculateDocumentMappingsSize(); totalSize += documentsSize; // 2. 计算向量数据的大小 const vectorsSize = this.calculateVectorsSize(); totalSize += vectorsSize; // 3. 估算索引结构的大小 const indexStructureSize = this.calculateIndexStructureSize(); totalSize += indexStructureSize; console.log( `VectorDatabase: Storage size breakdown - Documents: ${documentsSize}, Vectors: ${vectorsSize}, Index: ${indexStructureSize}, Total: ${totalSize} bytes`, ); } catch (error) { console.warn('VectorDatabase: Failed to calculate storage size:', error); // 返回一个基于文档数量的估算值 totalSize = this.documents.size * 1024; // 每个文档估算1KB } return totalSize; } /** * Calculate document mappings size */ private calculateDocumentMappingsSize(): number { let size = 0; // Calculate documents Map size for (const [label, document] of this.documents.entries()) { // label (number): 8 bytes size += 8; // document object size += this.calculateObjectSize(document); } // Calculate tabDocuments Map size for (const [tabId, labels] of this.tabDocuments.entries()) { // tabId (number): 8 bytes size += 8; // Set of labels: 8 bytes per label + Set overhead size += labels.size * 8 + 32; // 32 bytes Set overhead } return size; } /** * Calculate vectors data size */ private calculateVectorsSize(): number { const documentCount = this.documents.size; const dimension = this.config.dimension; // Each vector: dimension * 4 bytes (Float32) const vectorSize = dimension * 4; return documentCount * vectorSize; } /** * Estimate index structure size */ private calculateIndexStructureSize(): number { const documentCount = this.documents.size; if (documentCount === 0) return 0; // HNSW index size estimation // Based on papers and actual testing, HNSW index size is about 20-40% of vector data const vectorsSize = this.calculateVectorsSize(); const indexOverhead = Math.floor(vectorsSize * 0.3); // 30% overhead // Additional graph structure overhead const graphOverhead = documentCount * 64; // About 64 bytes graph structure overhead per node return indexOverhead + graphOverhead; } /** * Calculate object size (rough estimation) */ private calculateObjectSize(obj: any): number { let size = 0; try { const jsonString = JSON.stringify(obj); // UTF-8 encoding, most characters 1 byte, Chinese etc 3 bytes, average 2 bytes size = jsonString.length * 2; } catch (error) { // If JSON serialization fails, use default estimation size = 512; // Default 512 bytes } return size; } /** * Clear entire database */ public async clear(): Promise { console.log('VectorDatabase: Starting complete database clear...'); try { // Clear in-memory data structures this.documents.clear(); this.tabDocuments.clear(); this.nextLabel = 0; // Clear HNSW index file (in hnswlib-index database) if (this.isInitialized && this.index) { try { console.log('VectorDatabase: Clearing HNSW index file from IndexedDB...'); // 1. First try to physically delete index file (using EmscriptenFileSystemManager) try { if ( globalHnswlib && globalHnswlib.EmscriptenFileSystemManager.checkFileExists(this.config.indexFileName) ) { console.log( `VectorDatabase: Deleting physical index file: ${this.config.indexFileName}`, ); globalHnswlib.EmscriptenFileSystemManager.deleteFile(this.config.indexFileName); await this.syncFileSystem('write'); // Ensure deletion is synced to persistent storage console.log( `VectorDatabase: Physical index file ${this.config.indexFileName} deleted successfully`, ); } else { console.log( `VectorDatabase: Physical index file ${this.config.indexFileName} does not exist or already deleted`, ); } } catch (fileError) { console.warn( `VectorDatabase: Failed to delete physical index file ${this.config.indexFileName}:`, fileError, ); // Continue with other cleanup operations, don't block the process } // 2. Delete index file from IndexedDB await this.index.deleteIndex(this.config.indexFileName); console.log('VectorDatabase: HNSW index file cleared from IndexedDB'); // 3. Reinitialize empty index console.log('VectorDatabase: Reinitializing empty HNSW index...'); this.index.initIndex( this.config.maxElements, this.config.M, this.config.efConstruction, 200, ); this.index.setEfSearch(this.config.efSearch); // 4. Force save empty index await this.forceSaveIndex(); } catch (indexError) { console.warn('VectorDatabase: Failed to clear HNSW index file:', indexError); // Continue with other cleanup operations } } // Clear document mappings from IndexedDB (in VectorDatabaseStorage database) try { console.log('VectorDatabase: Clearing document mappings from IndexedDB...'); await IndexedDBHelper.deleteData(this.config.indexFileName); console.log('VectorDatabase: Document mappings cleared from IndexedDB'); } catch (idbError) { console.warn( 'VectorDatabase: Failed to clear document mappings from IndexedDB, trying chrome.storage fallback:', idbError, ); // Clear backup data from chrome.storage try { const storageKey = `hnswlib_document_mappings_${this.config.indexFileName}`; await chrome.storage.local.remove([storageKey]); console.log('VectorDatabase: Chrome storage fallback cleared'); } catch (storageError) { console.warn('VectorDatabase: Failed to clear chrome.storage fallback:', storageError); } } // Save empty document mappings to ensure consistency await this.saveDocumentMappings(); console.log('VectorDatabase: Complete database clear finished successfully'); } catch (error) { console.error('VectorDatabase: Failed to clear database:', error); throw error; } } /** * Force save index and sync filesystem */ private async forceSaveIndex(): Promise { try { await this.index.writeIndex(this.config.indexFileName); await this.syncFileSystem('write'); // Force sync } catch (error) { console.error('VectorDatabase: Failed to force save index:', error); } } /** * Check and perform auto cleanup */ private async checkAndPerformAutoCleanup(): Promise { try { const currentCount = this.documents.size; const maxElements = this.config.maxElements; console.log( `VectorDatabase: Auto cleanup check - current: ${currentCount}, max: ${maxElements}`, ); // Check if maximum element count is exceeded if (currentCount >= maxElements) { console.log('VectorDatabase: Document count reached limit, performing cleanup...'); await this.performLRUCleanup(Math.floor(maxElements * 0.2)); // Clean up 20% of data } // Check if there's expired data if (this.config.maxRetentionDays && this.config.maxRetentionDays > 0) { await this.performTimeBasedCleanup(); } } catch (error) { console.error('VectorDatabase: Auto cleanup failed:', error); } } /** * Perform LRU-based cleanup (delete oldest documents) */ private async performLRUCleanup(cleanupCount: number): Promise { try { console.log( `VectorDatabase: Starting LRU cleanup, removing ${cleanupCount} oldest documents`, ); // Get all documents and sort by timestamp const allDocuments = Array.from(this.documents.entries()); allDocuments.sort((a, b) => a[1].timestamp - b[1].timestamp); // Select documents to delete const documentsToDelete = allDocuments.slice(0, cleanupCount); for (const [label, _document] of documentsToDelete) { await this.removeDocumentByLabel(label); } // Save updated index and mappings await this.saveIndex(); await this.saveDocumentMappings(); console.log( `VectorDatabase: LRU cleanup completed, removed ${documentsToDelete.length} documents`, ); } catch (error) { console.error('VectorDatabase: LRU cleanup failed:', error); } } /** * Perform time-based cleanup (delete expired documents) */ private async performTimeBasedCleanup(): Promise { try { const maxRetentionMs = this.config.maxRetentionDays! * 24 * 60 * 60 * 1000; const cutoffTime = Date.now() - maxRetentionMs; console.log( `VectorDatabase: Starting time-based cleanup, removing documents older than ${this.config.maxRetentionDays} days`, ); const documentsToDelete: number[] = []; for (const [label, document] of this.documents.entries()) { if (document.timestamp < cutoffTime) { documentsToDelete.push(label); } } for (const label of documentsToDelete) { await this.removeDocumentByLabel(label); } // Save updated index and mappings if (documentsToDelete.length > 0) { await this.saveIndex(); await this.saveDocumentMappings(); } console.log( `VectorDatabase: Time-based cleanup completed, removed ${documentsToDelete.length} expired documents`, ); } catch (error) { console.error('VectorDatabase: Time-based cleanup failed:', error); } } /** * Remove single document by label */ private async removeDocumentByLabel(label: number): Promise { try { const document = this.documents.get(label); if (!document) { console.warn(`VectorDatabase: Document with label ${label} not found`); return; } // Remove vector from HNSW index if (this.index) { try { this.index.markDelete(label); } catch (indexError) { console.warn( `VectorDatabase: Failed to mark delete in index for label ${label}:`, indexError, ); } } // Remove from memory mapping this.documents.delete(label); // Remove from tab mapping const tabId = document.tabId; if (this.tabDocuments.has(tabId)) { this.tabDocuments.get(tabId)!.delete(label); // If tab has no other documents, delete entire tab mapping if (this.tabDocuments.get(tabId)!.size === 0) { this.tabDocuments.delete(tabId); } } console.log(`VectorDatabase: Removed document with label ${label} from tab ${tabId}`); } catch (error) { console.error(`VectorDatabase: Failed to remove document with label ${label}:`, error); } } // 私有辅助方法 private generateDocumentId(tabId: number, chunkIndex: number): string { return `tab_${tabId}_chunk_${chunkIndex}_${Date.now()}`; } private findDocumentByLabel(label: number): VectorDocument | null { return this.documents.get(label) || null; } private async syncFileSystem(direction: 'read' | 'write'): Promise { try { if (!globalHnswlib) { return; } // If sync operation is already in progress, wait for it to complete if (syncInProgress && pendingSyncPromise) { console.log(`VectorDatabase: Sync already in progress, waiting...`); await pendingSyncPromise; return; } // Mark sync start syncInProgress = true; // Create sync Promise with timeout mechanism pendingSyncPromise = new Promise((resolve, reject) => { const timeout = setTimeout(() => { console.warn(`VectorDatabase: Filesystem sync (${direction}) timeout`); syncInProgress = false; pendingSyncPromise = null; reject(new Error('Sync timeout')); }, 5000); // 5 second timeout try { globalHnswlib.EmscriptenFileSystemManager.syncFS(direction === 'read', () => { clearTimeout(timeout); console.log(`VectorDatabase: Filesystem sync (${direction}) completed`); syncInProgress = false; pendingSyncPromise = null; resolve(); }); } catch (error) { clearTimeout(timeout); console.warn(`VectorDatabase: Failed to sync filesystem (${direction}):`, error); syncInProgress = false; pendingSyncPromise = null; reject(error); } }); await pendingSyncPromise; } catch (error) { console.warn(`VectorDatabase: Failed to sync filesystem (${direction}):`, error); syncInProgress = false; pendingSyncPromise = null; } } private async saveIndex(): Promise { try { await this.index.writeIndex(this.config.indexFileName); // Reduce sync frequency, only sync when necessary if (this.documents.size % 10 === 0) { // Sync every 10 documents await this.syncFileSystem('write'); } } catch (error) { console.error('VectorDatabase: Failed to save index:', error); } } private async saveDocumentMappings(): Promise { try { // Save document mappings to IndexedDB const mappingData = { documents: Array.from(this.documents.entries()), tabDocuments: Array.from(this.tabDocuments.entries()).map(([tabId, labels]) => [ tabId, Array.from(labels), ]), nextLabel: this.nextLabel, }; try { // Use IndexedDB to save data, supports larger storage capacity await IndexedDBHelper.saveData(this.config.indexFileName, mappingData); console.log('VectorDatabase: Document mappings saved to IndexedDB'); } catch (idbError) { console.warn( 'VectorDatabase: Failed to save to IndexedDB, falling back to chrome.storage:', idbError, ); // Fall back to chrome.storage.local try { const storageKey = `hnswlib_document_mappings_${this.config.indexFileName}`; await chrome.storage.local.set({ [storageKey]: mappingData }); console.log('VectorDatabase: Document mappings saved to chrome.storage.local (fallback)'); } catch (storageError) { console.error( 'VectorDatabase: Failed to save to both IndexedDB and chrome.storage:', storageError, ); } } } catch (error) { console.error('VectorDatabase: Failed to save document mappings:', error); } } public async loadDocumentMappings(): Promise { try { // Load document mappings from IndexedDB if (!globalHnswlib) { return; } let mappingData = null; try { // First try to read from IndexedDB mappingData = await IndexedDBHelper.loadData(this.config.indexFileName); if (mappingData) { console.log(`VectorDatabase: Loaded document mappings from IndexedDB`); } } catch (idbError) { console.warn( 'VectorDatabase: Failed to read from IndexedDB, trying chrome.storage:', idbError, ); } // If IndexedDB has no data, try reading from chrome.storage.local (backward compatibility) if (!mappingData) { try { const storageKey = `hnswlib_document_mappings_${this.config.indexFileName}`; const result = await chrome.storage.local.get([storageKey]); mappingData = result[storageKey]; if (mappingData) { console.log( `VectorDatabase: Loaded document mappings from chrome.storage.local (fallback)`, ); // Migrate to IndexedDB try { await IndexedDBHelper.saveData(this.config.indexFileName, mappingData); console.log('VectorDatabase: Migrated data from chrome.storage to IndexedDB'); } catch (migrationError) { console.warn('VectorDatabase: Failed to migrate data to IndexedDB:', migrationError); } } } catch (storageError) { console.warn('VectorDatabase: Failed to read from chrome.storage.local:', storageError); } } if (mappingData) { // Restore document mappings this.documents.clear(); for (const [label, doc] of mappingData.documents) { this.documents.set(label, doc); } // Restore tab mappings this.tabDocuments.clear(); for (const [tabId, labels] of mappingData.tabDocuments) { this.tabDocuments.set(tabId, new Set(labels)); } // Restore nextLabel - use saved value or calculate max label + 1 if (mappingData.nextLabel !== undefined) { this.nextLabel = mappingData.nextLabel; } else if (this.documents.size > 0) { // If no saved nextLabel, calculate max label + 1 const maxLabel = Math.max(...Array.from(this.documents.keys())); this.nextLabel = maxLabel + 1; } else { this.nextLabel = 0; } console.log( `VectorDatabase: Loaded ${this.documents.size} document mappings, next label: ${this.nextLabel}`, ); } else { console.log('VectorDatabase: No existing document mappings found'); } } catch (error) { console.error('VectorDatabase: Failed to load document mappings:', error); } } } // Global VectorDatabase singleton let globalVectorDatabase: VectorDatabase | null = null; let currentDimension: number | null = null; /** * Get global VectorDatabase singleton instance * If dimension changes, will recreate instance to ensure compatibility */ export async function getGlobalVectorDatabase( config?: Partial, ): Promise { const newDimension = config?.dimension || 384; // If dimension changes, need to recreate vector database if (globalVectorDatabase && currentDimension !== null && currentDimension !== newDimension) { console.log( `VectorDatabase: Dimension changed from ${currentDimension} to ${newDimension}, recreating instance`, ); // Clean up old instance - this will clean up index files and document mappings try { await globalVectorDatabase.clear(); console.log('VectorDatabase: Successfully cleared old instance for dimension change'); } catch (error) { console.warn('VectorDatabase: Error during cleanup:', error); } globalVectorDatabase = null; currentDimension = null; } if (!globalVectorDatabase) { globalVectorDatabase = new VectorDatabase(config); currentDimension = newDimension; console.log( `VectorDatabase: Created global singleton instance with dimension ${currentDimension}`, ); } return globalVectorDatabase; } /** * Synchronous version of getting global VectorDatabase instance (for backward compatibility) * Note: If dimension change is needed, recommend using async version */ export function getGlobalVectorDatabaseSync( config?: Partial, ): VectorDatabase { const newDimension = config?.dimension || 384; // If dimension changes, log warning but don't clean up (avoid race conditions) if (globalVectorDatabase && currentDimension !== null && currentDimension !== newDimension) { console.warn( `VectorDatabase: Dimension mismatch detected (${currentDimension} vs ${newDimension}). Consider using async version for proper cleanup.`, ); } if (!globalVectorDatabase) { globalVectorDatabase = new VectorDatabase(config); currentDimension = newDimension; console.log( `VectorDatabase: Created global singleton instance with dimension ${currentDimension}`, ); } return globalVectorDatabase; } /** * Reset global VectorDatabase instance (mainly for testing or model switching) */ export async function resetGlobalVectorDatabase(): Promise { console.log('VectorDatabase: Starting global instance reset...'); if (globalVectorDatabase) { try { console.log('VectorDatabase: Clearing existing global instance...'); await globalVectorDatabase.clear(); console.log('VectorDatabase: Global instance cleared successfully'); } catch (error) { console.warn('VectorDatabase: Failed to clear during reset:', error); } } // Additional cleanup: ensure all possible IndexedDB data is cleared try { console.log('VectorDatabase: Performing comprehensive IndexedDB cleanup...'); // Clear all data in VectorDatabaseStorage database await IndexedDBHelper.clearAllData(); // Clear index files from hnswlib-index database try { console.log('VectorDatabase: Clearing HNSW index files from IndexedDB...'); // Try to clean up possible existing index files const possibleIndexFiles = ['tab_content_index.dat', 'content_index.dat', 'vector_index.dat']; // If global hnswlib instance exists, try to delete known index files if (typeof globalHnswlib !== 'undefined' && globalHnswlib) { for (const fileName of possibleIndexFiles) { try { // 1. First try to physically delete index file (using EmscriptenFileSystemManager) try { if (globalHnswlib.EmscriptenFileSystemManager.checkFileExists(fileName)) { console.log(`VectorDatabase: Deleting physical index file: ${fileName}`); globalHnswlib.EmscriptenFileSystemManager.deleteFile(fileName); console.log(`VectorDatabase: Physical index file ${fileName} deleted successfully`); } } catch (fileError) { console.log( `VectorDatabase: Physical index file ${fileName} not found or failed to delete:`, fileError, ); } // 2. Delete index file from IndexedDB const tempIndex = new globalHnswlib.HierarchicalNSW('cosine', 384); await tempIndex.deleteIndex(fileName); console.log(`VectorDatabase: Deleted IndexedDB index file: ${fileName}`); } catch (deleteError) { // File might not exist, this is normal console.log(`VectorDatabase: Index file ${fileName} not found or already deleted`); } } // 3. Force sync filesystem to ensure deletion takes effect try { await new Promise((resolve) => { const timeout = setTimeout(() => { console.warn('VectorDatabase: Filesystem sync timeout during cleanup'); resolve(); // Don't block the process }, 3000); globalHnswlib.EmscriptenFileSystemManager.syncFS(false, () => { clearTimeout(timeout); console.log('VectorDatabase: Filesystem sync completed during cleanup'); resolve(); }); }); } catch (syncError) { console.warn('VectorDatabase: Failed to sync filesystem during cleanup:', syncError); } } } catch (hnswError) { console.warn('VectorDatabase: Failed to clear HNSW index files:', hnswError); } // Clear possible chrome.storage backup data (only clear vector database related data, preserve user preferences) const possibleKeys = [ 'hnswlib_document_mappings_tab_content_index.dat', 'hnswlib_document_mappings_content_index.dat', 'hnswlib_document_mappings_vector_index.dat', // Note: Don't clear selectedModel and selectedVersion, these are user preference settings // Note: Don't clear modelState, this contains model state info and should be handled by model management logic ]; if (possibleKeys.length > 0) { try { await chrome.storage.local.remove(possibleKeys); console.log('VectorDatabase: Chrome storage backup data cleared'); } catch (storageError) { console.warn('VectorDatabase: Failed to clear chrome.storage backup:', storageError); } } console.log('VectorDatabase: Comprehensive cleanup completed'); } catch (cleanupError) { console.warn('VectorDatabase: Comprehensive cleanup failed:', cleanupError); } globalVectorDatabase = null; currentDimension = null; console.log('VectorDatabase: Global singleton instance reset completed'); } /** * Specifically for data cleanup during model switching * Clear all IndexedDB data, including HNSW index files and document mappings */ export async function clearAllVectorData(): Promise { console.log('VectorDatabase: Starting comprehensive vector data cleanup for model switch...'); try { // 1. Clear global instance if (globalVectorDatabase) { try { await globalVectorDatabase.clear(); } catch (error) { console.warn('VectorDatabase: Failed to clear global instance:', error); } } // 2. Clear VectorDatabaseStorage database try { console.log('VectorDatabase: Clearing VectorDatabaseStorage database...'); await IndexedDBHelper.clearAllData(); } catch (error) { console.warn('VectorDatabase: Failed to clear VectorDatabaseStorage:', error); } // 3. Clear hnswlib-index database and physical files try { console.log('VectorDatabase: Clearing hnswlib-index database and physical files...'); // 3.1 First try to physically delete index files (using EmscriptenFileSystemManager) if (typeof globalHnswlib !== 'undefined' && globalHnswlib) { const possibleIndexFiles = [ 'tab_content_index.dat', 'content_index.dat', 'vector_index.dat', ]; for (const fileName of possibleIndexFiles) { try { if (globalHnswlib.EmscriptenFileSystemManager.checkFileExists(fileName)) { console.log(`VectorDatabase: Deleting physical index file: ${fileName}`); globalHnswlib.EmscriptenFileSystemManager.deleteFile(fileName); console.log(`VectorDatabase: Physical index file ${fileName} deleted successfully`); } } catch (fileError) { console.log( `VectorDatabase: Physical index file ${fileName} not found or failed to delete:`, fileError, ); } } // Force sync filesystem try { await new Promise((resolve) => { const timeout = setTimeout(() => { console.warn('VectorDatabase: Filesystem sync timeout during model switch cleanup'); resolve(); }, 3000); globalHnswlib.EmscriptenFileSystemManager.syncFS(false, () => { clearTimeout(timeout); console.log('VectorDatabase: Filesystem sync completed during model switch cleanup'); resolve(); }); }); } catch (syncError) { console.warn( 'VectorDatabase: Failed to sync filesystem during model switch cleanup:', syncError, ); } } // 3.2 Delete entire hnswlib-index database await new Promise((resolve) => { const deleteRequest = indexedDB.deleteDatabase('/hnswlib-index'); deleteRequest.onsuccess = () => { console.log('VectorDatabase: Successfully deleted /hnswlib-index database'); resolve(); }; deleteRequest.onerror = () => { console.warn( 'VectorDatabase: Failed to delete /hnswlib-index database:', deleteRequest.error, ); resolve(); // Don't block the process }; deleteRequest.onblocked = () => { console.warn('VectorDatabase: Deletion of /hnswlib-index database was blocked'); resolve(); // Don't block the process }; }); } catch (error) { console.warn( 'VectorDatabase: Failed to clear hnswlib-index database and physical files:', error, ); } // 4. Clear backup data from chrome.storage try { const storageKeys = [ 'hnswlib_document_mappings_tab_content_index.dat', 'hnswlib_document_mappings_content_index.dat', 'hnswlib_document_mappings_vector_index.dat', ]; await chrome.storage.local.remove(storageKeys); console.log('VectorDatabase: Chrome storage backup data cleared'); } catch (error) { console.warn('VectorDatabase: Failed to clear chrome.storage backup:', error); } // 5. Reset global state globalVectorDatabase = null; currentDimension = null; console.log('VectorDatabase: Comprehensive vector data cleanup completed successfully'); } catch (error) { console.error('VectorDatabase: Comprehensive vector data cleanup failed:', error); throw error; } }