first commit
This commit is contained in:
586
app/chrome-extension/utils/content-indexer.ts
Normal file
586
app/chrome-extension/utils/content-indexer.ts
Normal file
@@ -0,0 +1,586 @@
|
||||
/**
|
||||
* Content index manager
|
||||
* Responsible for automatically extracting, chunking and indexing tab content
|
||||
*/
|
||||
|
||||
import { TextChunker } from './text-chunker';
|
||||
import { VectorDatabase, getGlobalVectorDatabase } from './vector-database';
|
||||
import {
|
||||
SemanticSimilarityEngine,
|
||||
SemanticSimilarityEngineProxy,
|
||||
PREDEFINED_MODELS,
|
||||
type ModelPreset,
|
||||
} from './semantic-similarity-engine';
|
||||
import { TOOL_MESSAGE_TYPES } from '@/common/message-types';
|
||||
|
||||
export interface IndexingOptions {
|
||||
autoIndex?: boolean;
|
||||
maxChunksPerPage?: number;
|
||||
skipDuplicates?: boolean;
|
||||
}
|
||||
|
||||
export class ContentIndexer {
|
||||
private textChunker: TextChunker;
|
||||
private vectorDatabase!: VectorDatabase;
|
||||
private semanticEngine!: SemanticSimilarityEngine | SemanticSimilarityEngineProxy;
|
||||
private isInitialized = false;
|
||||
private isInitializing = false;
|
||||
private initPromise: Promise<void> | null = null;
|
||||
private indexedPages = new Set<string>();
|
||||
private readonly options: Required<IndexingOptions>;
|
||||
|
||||
constructor(options?: IndexingOptions) {
|
||||
this.options = {
|
||||
autoIndex: true,
|
||||
maxChunksPerPage: 50,
|
||||
skipDuplicates: true,
|
||||
...options,
|
||||
};
|
||||
|
||||
this.textChunker = new TextChunker();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current selected model configuration
|
||||
*/
|
||||
private async getCurrentModelConfig() {
|
||||
try {
|
||||
const result = await chrome.storage.local.get(['selectedModel', 'selectedVersion']);
|
||||
const selectedModel = (result.selectedModel as ModelPreset) || 'multilingual-e5-small';
|
||||
const selectedVersion =
|
||||
(result.selectedVersion as 'full' | 'quantized' | 'compressed') || 'quantized';
|
||||
|
||||
const modelInfo = PREDEFINED_MODELS[selectedModel];
|
||||
|
||||
return {
|
||||
modelPreset: selectedModel,
|
||||
modelIdentifier: modelInfo.modelIdentifier,
|
||||
dimension: modelInfo.dimension,
|
||||
modelVersion: selectedVersion,
|
||||
useLocalFiles: false,
|
||||
maxLength: 256,
|
||||
cacheSize: 1000,
|
||||
forceOffscreen: true,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Failed to get current model config, using default:', error);
|
||||
return {
|
||||
modelPreset: 'multilingual-e5-small' as const,
|
||||
modelIdentifier: 'Xenova/multilingual-e5-small',
|
||||
dimension: 384,
|
||||
modelVersion: 'quantized' as const,
|
||||
useLocalFiles: false,
|
||||
maxLength: 256,
|
||||
cacheSize: 1000,
|
||||
forceOffscreen: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize content indexer
|
||||
*/
|
||||
public async initialize(): Promise<void> {
|
||||
if (this.isInitialized) return;
|
||||
if (this.isInitializing && this.initPromise) return this.initPromise;
|
||||
|
||||
this.isInitializing = true;
|
||||
this.initPromise = this._doInitialize().finally(() => {
|
||||
this.isInitializing = false;
|
||||
});
|
||||
|
||||
return this.initPromise;
|
||||
}
|
||||
|
||||
private async _doInitialize(): Promise<void> {
|
||||
try {
|
||||
// Get current selected model configuration
|
||||
const engineConfig = await this.getCurrentModelConfig();
|
||||
|
||||
// Use proxy class to reuse engine instance in offscreen
|
||||
this.semanticEngine = new SemanticSimilarityEngineProxy(engineConfig);
|
||||
await this.semanticEngine.initialize();
|
||||
|
||||
this.vectorDatabase = await getGlobalVectorDatabase({
|
||||
dimension: engineConfig.dimension,
|
||||
efSearch: 50,
|
||||
});
|
||||
await this.vectorDatabase.initialize();
|
||||
|
||||
this.setupTabEventListeners();
|
||||
|
||||
this.isInitialized = true;
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Initialization failed:', error);
|
||||
this.isInitialized = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Index content of specified tab
|
||||
*/
|
||||
public async indexTabContent(tabId: number): Promise<void> {
|
||||
// Check if semantic engine is ready before attempting to index
|
||||
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
|
||||
console.log(
|
||||
`ContentIndexer: Skipping tab ${tabId} - semantic engine not ready and not initializing`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.isInitialized) {
|
||||
// Only initialize if semantic engine is already ready
|
||||
if (!this.isSemanticEngineReady()) {
|
||||
console.log(
|
||||
`ContentIndexer: Skipping tab ${tabId} - ContentIndexer not initialized and semantic engine not ready`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
try {
|
||||
const tab = await chrome.tabs.get(tabId);
|
||||
if (!tab.url || !this.shouldIndexUrl(tab.url)) {
|
||||
console.log(`ContentIndexer: Skipping tab ${tabId} - URL not indexable`);
|
||||
return;
|
||||
}
|
||||
|
||||
const pageKey = `${tab.url}_${tab.title}`;
|
||||
if (this.options.skipDuplicates && this.indexedPages.has(pageKey)) {
|
||||
console.log(`ContentIndexer: Skipping tab ${tabId} - already indexed`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`ContentIndexer: Starting to index tab ${tabId}: ${tab.title}`);
|
||||
|
||||
const content = await this.extractTabContent(tabId);
|
||||
if (!content) {
|
||||
console.log(`ContentIndexer: No content extracted from tab ${tabId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const chunks = this.textChunker.chunkText(content.textContent, content.title);
|
||||
console.log(`ContentIndexer: Generated ${chunks.length} chunks for tab ${tabId}`);
|
||||
|
||||
const chunksToIndex = chunks.slice(0, this.options.maxChunksPerPage);
|
||||
if (chunks.length > this.options.maxChunksPerPage) {
|
||||
console.log(
|
||||
`ContentIndexer: Limited chunks from ${chunks.length} to ${this.options.maxChunksPerPage}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const chunk of chunksToIndex) {
|
||||
try {
|
||||
const embedding = await this.semanticEngine.getEmbedding(chunk.text);
|
||||
const label = await this.vectorDatabase.addDocument(
|
||||
tabId,
|
||||
tab.url!,
|
||||
tab.title || '',
|
||||
chunk,
|
||||
embedding,
|
||||
);
|
||||
console.log(`ContentIndexer: Indexed chunk ${chunk.index} with label ${label}`);
|
||||
} catch (error) {
|
||||
console.error(`ContentIndexer: Failed to index chunk ${chunk.index}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
this.indexedPages.add(pageKey);
|
||||
|
||||
console.log(
|
||||
`ContentIndexer: Successfully indexed ${chunksToIndex.length} chunks for tab ${tabId}`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.error(`ContentIndexer: Failed to index tab ${tabId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search content
|
||||
*/
|
||||
public async searchContent(query: string, topK: number = 10) {
|
||||
// Check if semantic engine is ready before attempting to search
|
||||
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
|
||||
throw new Error(
|
||||
'Semantic engine is not ready yet. Please initialize the semantic engine first.',
|
||||
);
|
||||
}
|
||||
|
||||
if (!this.isInitialized) {
|
||||
// Only initialize if semantic engine is already ready
|
||||
if (!this.isSemanticEngineReady()) {
|
||||
throw new Error(
|
||||
'ContentIndexer not initialized and semantic engine not ready. Please initialize the semantic engine first.',
|
||||
);
|
||||
}
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
try {
|
||||
const queryEmbedding = await this.semanticEngine.getEmbedding(query);
|
||||
const results = await this.vectorDatabase.search(queryEmbedding, topK);
|
||||
|
||||
console.log(`ContentIndexer: Found ${results.length} results for query: "${query}"`);
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Search failed:', error);
|
||||
|
||||
if (error instanceof Error && error.message.includes('not initialized')) {
|
||||
console.log(
|
||||
'ContentIndexer: Attempting to reinitialize semantic engine and retry search...',
|
||||
);
|
||||
try {
|
||||
await this.semanticEngine.initialize();
|
||||
const queryEmbedding = await this.semanticEngine.getEmbedding(query);
|
||||
const results = await this.vectorDatabase.search(queryEmbedding, topK);
|
||||
|
||||
console.log(
|
||||
`ContentIndexer: Retry successful, found ${results.length} results for query: "${query}"`,
|
||||
);
|
||||
return results;
|
||||
} catch (retryError) {
|
||||
console.error('ContentIndexer: Retry after reinitialization also failed:', retryError);
|
||||
throw retryError;
|
||||
}
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove tab index
|
||||
*/
|
||||
public async removeTabIndex(tabId: number): Promise<void> {
|
||||
if (!this.isInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.vectorDatabase.removeTabDocuments(tabId);
|
||||
|
||||
for (const pageKey of this.indexedPages) {
|
||||
if (pageKey.includes(`tab_${tabId}_`)) {
|
||||
this.indexedPages.delete(pageKey);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`ContentIndexer: Removed index for tab ${tabId}`);
|
||||
} catch (error) {
|
||||
console.error(`ContentIndexer: Failed to remove index for tab ${tabId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if semantic engine is ready (checks both local and global state)
|
||||
*/
|
||||
public isSemanticEngineReady(): boolean {
|
||||
return this.semanticEngine && this.semanticEngine.isInitialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if global semantic engine is ready (in background/offscreen)
|
||||
*/
|
||||
public async isGlobalSemanticEngineReady(): Promise<boolean> {
|
||||
try {
|
||||
// Since ContentIndexer runs in background script, directly call the function instead of sending message
|
||||
const { handleGetModelStatus } = await import('@/entrypoints/background/semantic-similarity');
|
||||
const response = await handleGetModelStatus();
|
||||
return (
|
||||
response &&
|
||||
response.success &&
|
||||
response.status &&
|
||||
response.status.initializationStatus === 'ready'
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Failed to check global semantic engine status:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if semantic engine is initializing
|
||||
*/
|
||||
public isSemanticEngineInitializing(): boolean {
|
||||
return (
|
||||
this.isInitializing || (this.semanticEngine && (this.semanticEngine as any).isInitializing)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reinitialize content indexer (for model switching)
|
||||
*/
|
||||
public async reinitialize(): Promise<void> {
|
||||
console.log('ContentIndexer: Reinitializing for model switch...');
|
||||
|
||||
this.isInitialized = false;
|
||||
this.isInitializing = false;
|
||||
this.initPromise = null;
|
||||
|
||||
await this.performCompleteDataCleanupForModelSwitch();
|
||||
|
||||
this.indexedPages.clear();
|
||||
console.log('ContentIndexer: Cleared indexed pages cache');
|
||||
|
||||
try {
|
||||
console.log('ContentIndexer: Creating new semantic engine proxy...');
|
||||
const newEngineConfig = await this.getCurrentModelConfig();
|
||||
console.log('ContentIndexer: New engine config:', newEngineConfig);
|
||||
|
||||
this.semanticEngine = new SemanticSimilarityEngineProxy(newEngineConfig);
|
||||
console.log('ContentIndexer: New semantic engine proxy created');
|
||||
|
||||
await this.semanticEngine.initialize();
|
||||
console.log('ContentIndexer: Semantic engine proxy initialization completed');
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Failed to create new semantic engine proxy:', error);
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(
|
||||
'ContentIndexer: New semantic engine proxy is ready, proceeding with initialization',
|
||||
);
|
||||
|
||||
await this.initialize();
|
||||
|
||||
console.log('ContentIndexer: Reinitialization completed successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform complete data cleanup for model switching
|
||||
*/
|
||||
private async performCompleteDataCleanupForModelSwitch(): Promise<void> {
|
||||
console.log('ContentIndexer: Starting complete data cleanup for model switch...');
|
||||
|
||||
try {
|
||||
// Clear existing vector database instance
|
||||
if (this.vectorDatabase) {
|
||||
try {
|
||||
console.log('ContentIndexer: Clearing existing vector database instance...');
|
||||
await this.vectorDatabase.clear();
|
||||
console.log('ContentIndexer: Vector database instance cleared successfully');
|
||||
} catch (error) {
|
||||
console.warn('ContentIndexer: Failed to clear vector database instance:', error);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const { clearAllVectorData } = await import('./vector-database');
|
||||
await clearAllVectorData();
|
||||
console.log('ContentIndexer: Cleared all vector data for model switch');
|
||||
} catch (error) {
|
||||
console.warn('ContentIndexer: Failed to clear vector data:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const keysToRemove = [
|
||||
'hnswlib_document_mappings_tab_content_index.dat',
|
||||
'hnswlib_document_mappings_content_index.dat',
|
||||
'hnswlib_document_mappings_vector_index.dat',
|
||||
'vectorDatabaseStats',
|
||||
'lastCleanupTime',
|
||||
];
|
||||
await chrome.storage.local.remove(keysToRemove);
|
||||
console.log('ContentIndexer: Cleared chrome.storage model-related data');
|
||||
} catch (error) {
|
||||
console.warn('ContentIndexer: Failed to clear chrome.storage data:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const deleteVectorDB = indexedDB.deleteDatabase('VectorDatabaseStorage');
|
||||
await new Promise<void>((resolve) => {
|
||||
deleteVectorDB.onsuccess = () => {
|
||||
console.log('ContentIndexer: VectorDatabaseStorage database deleted');
|
||||
resolve();
|
||||
};
|
||||
deleteVectorDB.onerror = () => {
|
||||
console.warn('ContentIndexer: Failed to delete VectorDatabaseStorage database');
|
||||
resolve(); // Don't block the process
|
||||
};
|
||||
deleteVectorDB.onblocked = () => {
|
||||
console.warn('ContentIndexer: VectorDatabaseStorage database deletion blocked');
|
||||
resolve(); // Don't block the process
|
||||
};
|
||||
});
|
||||
|
||||
// Clean up hnswlib-index database
|
||||
const deleteHnswDB = indexedDB.deleteDatabase('/hnswlib-index');
|
||||
await new Promise<void>((resolve) => {
|
||||
deleteHnswDB.onsuccess = () => {
|
||||
console.log('ContentIndexer: /hnswlib-index database deleted');
|
||||
resolve();
|
||||
};
|
||||
deleteHnswDB.onerror = () => {
|
||||
console.warn('ContentIndexer: Failed to delete /hnswlib-index database');
|
||||
resolve(); // Don't block the process
|
||||
};
|
||||
deleteHnswDB.onblocked = () => {
|
||||
console.warn('ContentIndexer: /hnswlib-index database deletion blocked');
|
||||
resolve(); // Don't block the process
|
||||
};
|
||||
});
|
||||
|
||||
console.log('ContentIndexer: All IndexedDB databases cleared for model switch');
|
||||
} catch (error) {
|
||||
console.warn('ContentIndexer: Failed to clear IndexedDB databases:', error);
|
||||
}
|
||||
|
||||
console.log('ContentIndexer: Complete data cleanup for model switch finished successfully');
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Complete data cleanup for model switch failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually trigger semantic engine initialization (async, don't wait for completion)
|
||||
* Note: This should only be called after the semantic engine is already initialized
|
||||
*/
|
||||
public startSemanticEngineInitialization(): void {
|
||||
if (!this.isInitialized && !this.isInitializing) {
|
||||
console.log('ContentIndexer: Checking if semantic engine is ready...');
|
||||
|
||||
// Check if global semantic engine is ready before initializing ContentIndexer
|
||||
this.isGlobalSemanticEngineReady()
|
||||
.then((isReady) => {
|
||||
if (isReady) {
|
||||
console.log('ContentIndexer: Starting initialization (semantic engine ready)...');
|
||||
this.initialize().catch((error) => {
|
||||
console.error('ContentIndexer: Background initialization failed:', error);
|
||||
});
|
||||
} else {
|
||||
console.log('ContentIndexer: Semantic engine not ready, skipping initialization');
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('ContentIndexer: Failed to check semantic engine status:', error);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get indexing statistics
|
||||
*/
|
||||
public getStats() {
|
||||
const vectorStats = this.vectorDatabase
|
||||
? this.vectorDatabase.getStats()
|
||||
: {
|
||||
totalDocuments: 0,
|
||||
totalTabs: 0,
|
||||
indexSize: 0,
|
||||
};
|
||||
|
||||
return {
|
||||
...vectorStats,
|
||||
indexedPages: this.indexedPages.size,
|
||||
isInitialized: this.isInitialized,
|
||||
semanticEngineReady: this.isSemanticEngineReady(),
|
||||
semanticEngineInitializing: this.isSemanticEngineInitializing(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all indexes
|
||||
*/
|
||||
public async clearAllIndexes(): Promise<void> {
|
||||
if (!this.isInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.vectorDatabase.clear();
|
||||
this.indexedPages.clear();
|
||||
console.log('ContentIndexer: All indexes cleared');
|
||||
} catch (error) {
|
||||
console.error('ContentIndexer: Failed to clear indexes:', error);
|
||||
}
|
||||
}
|
||||
private setupTabEventListeners(): void {
|
||||
chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
|
||||
if (this.options.autoIndex && changeInfo.status === 'complete' && tab.url) {
|
||||
setTimeout(() => {
|
||||
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
|
||||
console.log(
|
||||
`ContentIndexer: Skipping auto-index for tab ${tabId} - semantic engine not ready`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
this.indexTabContent(tabId).catch((error) => {
|
||||
console.error(`ContentIndexer: Auto-indexing failed for tab ${tabId}:`, error);
|
||||
});
|
||||
}, 2000);
|
||||
}
|
||||
});
|
||||
|
||||
chrome.tabs.onRemoved.addListener(async (tabId) => {
|
||||
await this.removeTabIndex(tabId);
|
||||
});
|
||||
|
||||
if (chrome.webNavigation) {
|
||||
chrome.webNavigation.onCommitted.addListener(async (details) => {
|
||||
if (details.frameId === 0) {
|
||||
await this.removeTabIndex(details.tabId);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private shouldIndexUrl(url: string): boolean {
|
||||
const excludePatterns = [
|
||||
/^chrome:\/\//,
|
||||
/^chrome-extension:\/\//,
|
||||
/^edge:\/\//,
|
||||
/^about:/,
|
||||
/^moz-extension:\/\//,
|
||||
/^file:\/\//,
|
||||
];
|
||||
|
||||
return !excludePatterns.some((pattern) => pattern.test(url));
|
||||
}
|
||||
|
||||
private async extractTabContent(
|
||||
tabId: number,
|
||||
): Promise<{ textContent: string; title: string } | null> {
|
||||
try {
|
||||
await chrome.scripting.executeScript({
|
||||
target: { tabId },
|
||||
files: ['inject-scripts/web-fetcher-helper.js'],
|
||||
});
|
||||
|
||||
const response = await chrome.tabs.sendMessage(tabId, {
|
||||
action: TOOL_MESSAGE_TYPES.WEB_FETCHER_GET_TEXT_CONTENT,
|
||||
});
|
||||
|
||||
if (response.success && response.textContent) {
|
||||
return {
|
||||
textContent: response.textContent,
|
||||
title: response.title || '',
|
||||
};
|
||||
} else {
|
||||
console.error(
|
||||
`ContentIndexer: Failed to extract content from tab ${tabId}:`,
|
||||
response.error,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`ContentIndexer: Error extracting content from tab ${tabId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let globalContentIndexer: ContentIndexer | null = null;
|
||||
|
||||
/**
|
||||
* Get global ContentIndexer instance
|
||||
*/
|
||||
export function getGlobalContentIndexer(): ContentIndexer {
|
||||
if (!globalContentIndexer) {
|
||||
globalContentIndexer = new ContentIndexer();
|
||||
}
|
||||
return globalContentIndexer;
|
||||
}
|
273
app/chrome-extension/utils/i18n.ts
Normal file
273
app/chrome-extension/utils/i18n.ts
Normal file
@@ -0,0 +1,273 @@
|
||||
/**
|
||||
* Chrome Extension i18n utility
|
||||
* Provides safe access to chrome.i18n.getMessage with fallbacks
|
||||
*/
|
||||
|
||||
// Fallback messages for when Chrome APIs aren't available (English)
|
||||
const fallbackMessages: Record<string, string> = {
|
||||
// Extension metadata
|
||||
extensionName: 'chrome-mcp-server',
|
||||
extensionDescription: 'Exposes browser capabilities with your own chrome',
|
||||
|
||||
// Section headers
|
||||
nativeServerConfigLabel: 'Native Server Configuration',
|
||||
semanticEngineLabel: 'Semantic Engine',
|
||||
embeddingModelLabel: 'Embedding Model',
|
||||
indexDataManagementLabel: 'Index Data Management',
|
||||
modelCacheManagementLabel: 'Model Cache Management',
|
||||
|
||||
// Status labels
|
||||
statusLabel: 'Status',
|
||||
runningStatusLabel: 'Running Status',
|
||||
connectionStatusLabel: 'Connection Status',
|
||||
lastUpdatedLabel: 'Last Updated:',
|
||||
|
||||
// Connection states
|
||||
connectButton: 'Connect',
|
||||
disconnectButton: 'Disconnect',
|
||||
connectingStatus: 'Connecting...',
|
||||
connectedStatus: 'Connected',
|
||||
disconnectedStatus: 'Disconnected',
|
||||
detectingStatus: 'Detecting...',
|
||||
|
||||
// Server states
|
||||
serviceRunningStatus: 'Service Running (Port: {0})',
|
||||
serviceNotConnectedStatus: 'Service Not Connected',
|
||||
connectedServiceNotStartedStatus: 'Connected, Service Not Started',
|
||||
|
||||
// Configuration labels
|
||||
mcpServerConfigLabel: 'MCP Server Configuration',
|
||||
connectionPortLabel: 'Connection Port',
|
||||
refreshStatusButton: 'Refresh Status',
|
||||
copyConfigButton: 'Copy Configuration',
|
||||
|
||||
// Action buttons
|
||||
retryButton: 'Retry',
|
||||
cancelButton: 'Cancel',
|
||||
confirmButton: 'Confirm',
|
||||
saveButton: 'Save',
|
||||
closeButton: 'Close',
|
||||
resetButton: 'Reset',
|
||||
|
||||
// Progress states
|
||||
initializingStatus: 'Initializing...',
|
||||
processingStatus: 'Processing...',
|
||||
loadingStatus: 'Loading...',
|
||||
clearingStatus: 'Clearing...',
|
||||
cleaningStatus: 'Cleaning...',
|
||||
downloadingStatus: 'Downloading...',
|
||||
|
||||
// Semantic engine states
|
||||
semanticEngineReadyStatus: 'Semantic Engine Ready',
|
||||
semanticEngineInitializingStatus: 'Semantic Engine Initializing...',
|
||||
semanticEngineInitFailedStatus: 'Semantic Engine Initialization Failed',
|
||||
semanticEngineNotInitStatus: 'Semantic Engine Not Initialized',
|
||||
initSemanticEngineButton: 'Initialize Semantic Engine',
|
||||
reinitializeButton: 'Reinitialize',
|
||||
|
||||
// Model states
|
||||
downloadingModelStatus: 'Downloading Model... {0}%',
|
||||
switchingModelStatus: 'Switching Model...',
|
||||
modelLoadedStatus: 'Model Loaded',
|
||||
modelFailedStatus: 'Model Failed to Load',
|
||||
|
||||
// Model descriptions
|
||||
lightweightModelDescription: 'Lightweight Multilingual Model',
|
||||
betterThanSmallDescription: 'Slightly larger than e5-small, but better performance',
|
||||
multilingualModelDescription: 'Multilingual Semantic Model',
|
||||
|
||||
// Performance levels
|
||||
fastPerformance: 'Fast',
|
||||
balancedPerformance: 'Balanced',
|
||||
accuratePerformance: 'Accurate',
|
||||
|
||||
// Error messages
|
||||
networkErrorMessage: 'Network connection error, please check network and retry',
|
||||
modelCorruptedErrorMessage: 'Model file corrupted or incomplete, please retry download',
|
||||
unknownErrorMessage: 'Unknown error, please check if your network can access HuggingFace',
|
||||
permissionDeniedErrorMessage: 'Permission denied',
|
||||
timeoutErrorMessage: 'Operation timed out',
|
||||
|
||||
// Data statistics
|
||||
indexedPagesLabel: 'Indexed Pages',
|
||||
indexSizeLabel: 'Index Size',
|
||||
activeTabsLabel: 'Active Tabs',
|
||||
vectorDocumentsLabel: 'Vector Documents',
|
||||
cacheSizeLabel: 'Cache Size',
|
||||
cacheEntriesLabel: 'Cache Entries',
|
||||
|
||||
// Data management
|
||||
clearAllDataButton: 'Clear All Data',
|
||||
clearAllCacheButton: 'Clear All Cache',
|
||||
cleanExpiredCacheButton: 'Clean Expired Cache',
|
||||
exportDataButton: 'Export Data',
|
||||
importDataButton: 'Import Data',
|
||||
|
||||
// Dialog titles
|
||||
confirmClearDataTitle: 'Confirm Clear Data',
|
||||
settingsTitle: 'Settings',
|
||||
aboutTitle: 'About',
|
||||
helpTitle: 'Help',
|
||||
|
||||
// Dialog messages
|
||||
clearDataWarningMessage:
|
||||
'This operation will clear all indexed webpage content and vector data, including:',
|
||||
clearDataList1: 'All webpage text content index',
|
||||
clearDataList2: 'Vector embedding data',
|
||||
clearDataList3: 'Search history and cache',
|
||||
clearDataIrreversibleWarning:
|
||||
'This operation is irreversible! After clearing, you need to browse webpages again to rebuild the index.',
|
||||
confirmClearButton: 'Confirm Clear',
|
||||
|
||||
// Cache states
|
||||
cacheDetailsLabel: 'Cache Details',
|
||||
noCacheDataMessage: 'No cache data',
|
||||
loadingCacheInfoStatus: 'Loading cache information...',
|
||||
processingCacheStatus: 'Processing cache...',
|
||||
expiredLabel: 'Expired',
|
||||
|
||||
// Browser integration
|
||||
bookmarksBarLabel: 'Bookmarks Bar',
|
||||
newTabLabel: 'New Tab',
|
||||
currentPageLabel: 'Current Page',
|
||||
|
||||
// Accessibility
|
||||
menuLabel: 'Menu',
|
||||
navigationLabel: 'Navigation',
|
||||
mainContentLabel: 'Main Content',
|
||||
|
||||
// Future features
|
||||
languageSelectorLabel: 'Language',
|
||||
themeLabel: 'Theme',
|
||||
lightTheme: 'Light',
|
||||
darkTheme: 'Dark',
|
||||
autoTheme: 'Auto',
|
||||
advancedSettingsLabel: 'Advanced Settings',
|
||||
debugModeLabel: 'Debug Mode',
|
||||
verboseLoggingLabel: 'Verbose Logging',
|
||||
|
||||
// Notifications
|
||||
successNotification: 'Operation completed successfully',
|
||||
warningNotification: 'Warning: Please review before proceeding',
|
||||
infoNotification: 'Information',
|
||||
configCopiedNotification: 'Configuration copied to clipboard',
|
||||
dataClearedNotification: 'Data cleared successfully',
|
||||
|
||||
// Units
|
||||
bytesUnit: 'bytes',
|
||||
kilobytesUnit: 'KB',
|
||||
megabytesUnit: 'MB',
|
||||
gigabytesUnit: 'GB',
|
||||
itemsUnit: 'items',
|
||||
pagesUnit: 'pages',
|
||||
|
||||
// Legacy keys for backwards compatibility
|
||||
nativeServerConfig: 'Native Server Configuration',
|
||||
runningStatus: 'Running Status',
|
||||
refreshStatus: 'Refresh Status',
|
||||
lastUpdated: 'Last Updated:',
|
||||
mcpServerConfig: 'MCP Server Configuration',
|
||||
connectionPort: 'Connection Port',
|
||||
connecting: 'Connecting...',
|
||||
disconnect: 'Disconnect',
|
||||
connect: 'Connect',
|
||||
semanticEngine: 'Semantic Engine',
|
||||
embeddingModel: 'Embedding Model',
|
||||
retry: 'Retry',
|
||||
indexDataManagement: 'Index Data Management',
|
||||
clearing: 'Clearing...',
|
||||
clearAllData: 'Clear All Data',
|
||||
copyConfig: 'Copy Configuration',
|
||||
serviceRunning: 'Service Running (Port: {0})',
|
||||
connectedServiceNotStarted: 'Connected, Service Not Started',
|
||||
serviceNotConnected: 'Service Not Connected',
|
||||
detecting: 'Detecting...',
|
||||
lightweightModel: 'Lightweight Multilingual Model',
|
||||
betterThanSmall: 'Slightly larger than e5-small, but better performance',
|
||||
multilingualModel: 'Multilingual Semantic Model',
|
||||
fast: 'Fast',
|
||||
balanced: 'Balanced',
|
||||
accurate: 'Accurate',
|
||||
semanticEngineReady: 'Semantic Engine Ready',
|
||||
semanticEngineInitializing: 'Semantic Engine Initializing...',
|
||||
semanticEngineInitFailed: 'Semantic Engine Initialization Failed',
|
||||
semanticEngineNotInit: 'Semantic Engine Not Initialized',
|
||||
downloadingModel: 'Downloading Model... {0}%',
|
||||
switchingModel: 'Switching Model...',
|
||||
networkError: 'Network connection error, please check network and retry',
|
||||
modelCorrupted: 'Model file corrupted or incomplete, please retry download',
|
||||
unknownError: 'Unknown error, please check if your network can access HuggingFace',
|
||||
reinitialize: 'Reinitialize',
|
||||
initializing: 'Initializing...',
|
||||
initSemanticEngine: 'Initialize Semantic Engine',
|
||||
indexedPages: 'Indexed Pages',
|
||||
indexSize: 'Index Size',
|
||||
activeTabs: 'Active Tabs',
|
||||
vectorDocuments: 'Vector Documents',
|
||||
confirmClearData: 'Confirm Clear Data',
|
||||
clearDataWarning:
|
||||
'This operation will clear all indexed webpage content and vector data, including:',
|
||||
clearDataIrreversible:
|
||||
'This operation is irreversible! After clearing, you need to browse webpages again to rebuild the index.',
|
||||
confirmClear: 'Confirm Clear',
|
||||
cancel: 'Cancel',
|
||||
confirm: 'Confirm',
|
||||
processing: 'Processing...',
|
||||
modelCacheManagement: 'Model Cache Management',
|
||||
cacheSize: 'Cache Size',
|
||||
cacheEntries: 'Cache Entries',
|
||||
cacheDetails: 'Cache Details',
|
||||
noCacheData: 'No cache data',
|
||||
loadingCacheInfo: 'Loading cache information...',
|
||||
processingCache: 'Processing cache...',
|
||||
cleaning: 'Cleaning...',
|
||||
cleanExpiredCache: 'Clean Expired Cache',
|
||||
clearAllCache: 'Clear All Cache',
|
||||
expired: 'Expired',
|
||||
bookmarksBar: 'Bookmarks Bar',
|
||||
};
|
||||
|
||||
/**
|
||||
* Safe i18n message getter with fallback support
|
||||
* @param key Message key
|
||||
* @param substitutions Optional substitution values
|
||||
* @returns Localized message or fallback
|
||||
*/
|
||||
export function getMessage(key: string, substitutions?: string[]): string {
|
||||
try {
|
||||
// Check if Chrome extension APIs are available
|
||||
if (typeof chrome !== 'undefined' && chrome.i18n && chrome.i18n.getMessage) {
|
||||
const message = chrome.i18n.getMessage(key, substitutions);
|
||||
if (message) {
|
||||
return message;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`Failed to get i18n message for key "${key}":`, error);
|
||||
}
|
||||
|
||||
// Fallback to English messages
|
||||
let fallback = fallbackMessages[key] || key;
|
||||
|
||||
// Handle substitutions in fallback messages
|
||||
if (substitutions && substitutions.length > 0) {
|
||||
substitutions.forEach((value, index) => {
|
||||
fallback = fallback.replace(`{${index}}`, value);
|
||||
});
|
||||
}
|
||||
|
||||
return fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Chrome extension i18n APIs are available
|
||||
*/
|
||||
export function isI18nAvailable(): boolean {
|
||||
try {
|
||||
return (
|
||||
typeof chrome !== 'undefined' && chrome.i18n && typeof chrome.i18n.getMessage === 'function'
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
194
app/chrome-extension/utils/image-utils.ts
Normal file
194
app/chrome-extension/utils/image-utils.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
/**
|
||||
* Image processing utility functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Create ImageBitmap from data URL (for OffscreenCanvas)
|
||||
* @param dataUrl Image data URL
|
||||
* @returns Created ImageBitmap object
|
||||
*/
|
||||
export async function createImageBitmapFromUrl(dataUrl: string): Promise<ImageBitmap> {
|
||||
const response = await fetch(dataUrl);
|
||||
const blob = await response.blob();
|
||||
return await createImageBitmap(blob);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stitch multiple image parts (dataURL) onto a single canvas
|
||||
* @param parts Array of image parts, each containing dataUrl and y coordinate
|
||||
* @param totalWidthPx Total width (pixels)
|
||||
* @param totalHeightPx Total height (pixels)
|
||||
* @returns Stitched canvas
|
||||
*/
|
||||
export async function stitchImages(
|
||||
parts: { dataUrl: string; y: number }[],
|
||||
totalWidthPx: number,
|
||||
totalHeightPx: number,
|
||||
): Promise<OffscreenCanvas> {
|
||||
const canvas = new OffscreenCanvas(totalWidthPx, totalHeightPx);
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
throw new Error('Unable to get canvas context');
|
||||
}
|
||||
|
||||
ctx.fillStyle = '#FFFFFF';
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
|
||||
for (const part of parts) {
|
||||
try {
|
||||
const img = await createImageBitmapFromUrl(part.dataUrl);
|
||||
const sx = 0;
|
||||
const sy = 0;
|
||||
const sWidth = img.width;
|
||||
let sHeight = img.height;
|
||||
const dy = part.y;
|
||||
|
||||
if (dy + sHeight > totalHeightPx) {
|
||||
sHeight = totalHeightPx - dy;
|
||||
}
|
||||
|
||||
if (sHeight <= 0) continue;
|
||||
|
||||
ctx.drawImage(img, sx, sy, sWidth, sHeight, 0, dy, sWidth, sHeight);
|
||||
} catch (error) {
|
||||
console.error('Error stitching image part:', error, part);
|
||||
}
|
||||
}
|
||||
return canvas;
|
||||
}
|
||||
|
||||
/**
|
||||
* Crop image (from dataURL) to specified rectangle and resize
|
||||
* @param originalDataUrl Original image data URL
|
||||
* @param cropRectPx Crop rectangle (physical pixels)
|
||||
* @param dpr Device pixel ratio
|
||||
* @param targetWidthOpt Optional target output width (CSS pixels)
|
||||
* @param targetHeightOpt Optional target output height (CSS pixels)
|
||||
* @returns Cropped canvas
|
||||
*/
|
||||
export async function cropAndResizeImage(
|
||||
originalDataUrl: string,
|
||||
cropRectPx: { x: number; y: number; width: number; height: number },
|
||||
dpr: number = 1,
|
||||
targetWidthOpt?: number,
|
||||
targetHeightOpt?: number,
|
||||
): Promise<OffscreenCanvas> {
|
||||
const img = await createImageBitmapFromUrl(originalDataUrl);
|
||||
|
||||
let sx = cropRectPx.x;
|
||||
let sy = cropRectPx.y;
|
||||
let sWidth = cropRectPx.width;
|
||||
let sHeight = cropRectPx.height;
|
||||
|
||||
// Ensure crop area is within image boundaries
|
||||
if (sx < 0) {
|
||||
sWidth += sx;
|
||||
sx = 0;
|
||||
}
|
||||
if (sy < 0) {
|
||||
sHeight += sy;
|
||||
sy = 0;
|
||||
}
|
||||
if (sx + sWidth > img.width) {
|
||||
sWidth = img.width - sx;
|
||||
}
|
||||
if (sy + sHeight > img.height) {
|
||||
sHeight = img.height - sy;
|
||||
}
|
||||
|
||||
if (sWidth <= 0 || sHeight <= 0) {
|
||||
throw new Error(
|
||||
'Invalid calculated crop size (<=0). Element may not be visible or fully captured.',
|
||||
);
|
||||
}
|
||||
|
||||
const finalCanvasWidthPx = targetWidthOpt ? targetWidthOpt * dpr : sWidth;
|
||||
const finalCanvasHeightPx = targetHeightOpt ? targetHeightOpt * dpr : sHeight;
|
||||
|
||||
const canvas = new OffscreenCanvas(finalCanvasWidthPx, finalCanvasHeightPx);
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
throw new Error('Unable to get canvas context');
|
||||
}
|
||||
|
||||
ctx.drawImage(img, sx, sy, sWidth, sHeight, 0, 0, finalCanvasWidthPx, finalCanvasHeightPx);
|
||||
|
||||
return canvas;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert canvas to data URL
|
||||
* @param canvas Canvas
|
||||
* @param format Image format
|
||||
* @param quality JPEG quality (0-1)
|
||||
* @returns Data URL
|
||||
*/
|
||||
export async function canvasToDataURL(
|
||||
canvas: OffscreenCanvas,
|
||||
format: string = 'image/png',
|
||||
quality?: number,
|
||||
): Promise<string> {
|
||||
const blob = await canvas.convertToBlob({
|
||||
type: format,
|
||||
quality: format === 'image/jpeg' ? quality : undefined,
|
||||
});
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result as string);
|
||||
reader.onerror = reject;
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Compresses an image by scaling it and converting it to a target format with a specific quality.
|
||||
* This is the most effective way to reduce image data size for transport or storage.
|
||||
*
|
||||
* @param {string} imageDataUrl - The original image data URL (e.g., from captureVisibleTab).
|
||||
* @param {object} options - Compression options.
|
||||
* @param {number} [options.scale=1.0] - The scaling factor for dimensions (e.g., 0.7 for 70%).
|
||||
* @param {number} [options.quality=0.8] - The quality for lossy formats like JPEG (0.0 to 1.0).
|
||||
* @param {string} [options.format='image/jpeg'] - The target image format.
|
||||
* @returns {Promise<{dataUrl: string, mimeType: string}>} A promise that resolves to the compressed image data URL and its MIME type.
|
||||
*/
|
||||
export async function compressImage(
|
||||
imageDataUrl: string,
|
||||
options: { scale?: number; quality?: number; format?: 'image/jpeg' | 'image/webp' },
|
||||
): Promise<{ dataUrl: string; mimeType: string }> {
|
||||
const { scale = 1.0, quality = 0.8, format = 'image/jpeg' } = options;
|
||||
|
||||
// 1. Create an ImageBitmap from the original data URL for efficient drawing.
|
||||
const imageBitmap = await createImageBitmapFromUrl(imageDataUrl);
|
||||
|
||||
// 2. Calculate the new dimensions based on the scale factor.
|
||||
const newWidth = Math.round(imageBitmap.width * scale);
|
||||
const newHeight = Math.round(imageBitmap.height * scale);
|
||||
|
||||
// 3. Use OffscreenCanvas for performance, as it doesn't need to be in the DOM.
|
||||
const canvas = new OffscreenCanvas(newWidth, newHeight);
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
throw new Error('Failed to get 2D context from OffscreenCanvas');
|
||||
}
|
||||
|
||||
// 4. Draw the original image onto the smaller canvas, effectively resizing it.
|
||||
ctx.drawImage(imageBitmap, 0, 0, newWidth, newHeight);
|
||||
|
||||
// 5. Export the canvas content to the target format with the specified quality.
|
||||
// This is the step that performs the data compression.
|
||||
const compressedDataUrl = await canvas.convertToBlob({ type: format, quality: quality });
|
||||
|
||||
// A helper to convert blob to data URL since OffscreenCanvas.toDataURL is not standard yet
|
||||
// on all execution contexts (like service workers).
|
||||
const dataUrl = await new Promise<string>((resolve) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result as string);
|
||||
reader.readAsDataURL(compressedDataUrl);
|
||||
});
|
||||
|
||||
return { dataUrl, mimeType: format };
|
||||
}
|
132
app/chrome-extension/utils/lru-cache.ts
Normal file
132
app/chrome-extension/utils/lru-cache.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
class LRUNode<K, V> {
|
||||
constructor(
|
||||
public key: K,
|
||||
public value: V,
|
||||
public prev: LRUNode<K, V> | null = null,
|
||||
public next: LRUNode<K, V> | null = null,
|
||||
public frequency: number = 1,
|
||||
public lastAccessed: number = Date.now(),
|
||||
) {}
|
||||
}
|
||||
|
||||
class LRUCache<K = string, V = any> {
|
||||
private capacity: number;
|
||||
private cache: Map<K, LRUNode<K, V>>;
|
||||
private head: LRUNode<K, V>;
|
||||
private tail: LRUNode<K, V>;
|
||||
|
||||
constructor(capacity: number) {
|
||||
this.capacity = capacity > 0 ? capacity : 100;
|
||||
this.cache = new Map<K, LRUNode<K, V>>();
|
||||
|
||||
this.head = new LRUNode<K, V>(null as any, null as any);
|
||||
this.tail = new LRUNode<K, V>(null as any, null as any);
|
||||
this.head.next = this.tail;
|
||||
this.tail.prev = this.head;
|
||||
}
|
||||
|
||||
private addToHead(node: LRUNode<K, V>): void {
|
||||
node.prev = this.head;
|
||||
node.next = this.head.next;
|
||||
this.head.next!.prev = node;
|
||||
this.head.next = node;
|
||||
}
|
||||
|
||||
private removeNode(node: LRUNode<K, V>): void {
|
||||
node.prev!.next = node.next;
|
||||
node.next!.prev = node.prev;
|
||||
}
|
||||
|
||||
private moveToHead(node: LRUNode<K, V>): void {
|
||||
this.removeNode(node);
|
||||
this.addToHead(node);
|
||||
}
|
||||
|
||||
private findVictimNode(): LRUNode<K, V> {
|
||||
let victim = this.tail.prev!;
|
||||
let minScore = this.calculateEvictionScore(victim);
|
||||
|
||||
let current = this.tail.prev;
|
||||
let count = 0;
|
||||
const maxCheck = Math.min(5, this.cache.size);
|
||||
|
||||
while (current && current !== this.head && count < maxCheck) {
|
||||
const score = this.calculateEvictionScore(current);
|
||||
if (score < minScore) {
|
||||
minScore = score;
|
||||
victim = current;
|
||||
}
|
||||
current = current.prev;
|
||||
count++;
|
||||
}
|
||||
|
||||
return victim;
|
||||
}
|
||||
|
||||
private calculateEvictionScore(node: LRUNode<K, V>): number {
|
||||
const now = Date.now();
|
||||
const timeSinceAccess = now - node.lastAccessed;
|
||||
const timeWeight = 1 / (1 + timeSinceAccess / (1000 * 60));
|
||||
const frequencyWeight = Math.log(node.frequency + 1);
|
||||
|
||||
return frequencyWeight * timeWeight;
|
||||
}
|
||||
|
||||
get(key: K): V | null {
|
||||
const node = this.cache.get(key);
|
||||
if (node) {
|
||||
node.frequency++;
|
||||
node.lastAccessed = Date.now();
|
||||
this.moveToHead(node);
|
||||
return node.value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
set(key: K, value: V): void {
|
||||
const existingNode = this.cache.get(key);
|
||||
|
||||
if (existingNode) {
|
||||
existingNode.value = value;
|
||||
this.moveToHead(existingNode);
|
||||
} else {
|
||||
const newNode = new LRUNode(key, value);
|
||||
|
||||
if (this.cache.size >= this.capacity) {
|
||||
const victimNode = this.findVictimNode();
|
||||
this.removeNode(victimNode);
|
||||
this.cache.delete(victimNode.key);
|
||||
}
|
||||
|
||||
this.cache.set(key, newNode);
|
||||
this.addToHead(newNode);
|
||||
}
|
||||
}
|
||||
|
||||
has(key: K): boolean {
|
||||
return this.cache.has(key);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
this.head.next = this.tail;
|
||||
this.tail.prev = this.head;
|
||||
}
|
||||
|
||||
get size(): number {
|
||||
return this.cache.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getStats(): { size: number; capacity: number; usage: number } {
|
||||
return {
|
||||
size: this.cache.size,
|
||||
capacity: this.capacity,
|
||||
usage: this.cache.size / this.capacity,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default LRUCache;
|
369
app/chrome-extension/utils/model-cache-manager.ts
Normal file
369
app/chrome-extension/utils/model-cache-manager.ts
Normal file
@@ -0,0 +1,369 @@
|
||||
/**
|
||||
* Model Cache Manager
|
||||
*/
|
||||
|
||||
const CACHE_NAME = 'onnx-model-cache-v1';
|
||||
const CACHE_EXPIRY_DAYS = 30;
|
||||
const MAX_CACHE_SIZE_MB = 500;
|
||||
|
||||
export interface CacheMetadata {
|
||||
timestamp: number;
|
||||
modelUrl: string;
|
||||
size: number;
|
||||
version: string;
|
||||
}
|
||||
|
||||
export interface CacheEntry {
|
||||
url: string;
|
||||
size: number;
|
||||
sizeMB: number;
|
||||
timestamp: number;
|
||||
age: string;
|
||||
expired: boolean;
|
||||
}
|
||||
|
||||
export interface CacheStats {
|
||||
totalSize: number;
|
||||
totalSizeMB: number;
|
||||
entryCount: number;
|
||||
entries: CacheEntry[];
|
||||
}
|
||||
|
||||
interface CacheEntryDetails {
|
||||
url: string;
|
||||
timestamp: number;
|
||||
size: number;
|
||||
}
|
||||
|
||||
export class ModelCacheManager {
|
||||
private static instance: ModelCacheManager | null = null;
|
||||
|
||||
public static getInstance(): ModelCacheManager {
|
||||
if (!ModelCacheManager.instance) {
|
||||
ModelCacheManager.instance = new ModelCacheManager();
|
||||
}
|
||||
return ModelCacheManager.instance;
|
||||
}
|
||||
|
||||
private constructor() {}
|
||||
|
||||
private getCacheMetadataKey(modelUrl: string): string {
|
||||
const encodedUrl = encodeURIComponent(modelUrl);
|
||||
return `https://cache-metadata.local/${encodedUrl}`;
|
||||
}
|
||||
|
||||
private isCacheExpired(metadata: CacheMetadata): boolean {
|
||||
const now = Date.now();
|
||||
const expiryTime = metadata.timestamp + CACHE_EXPIRY_DAYS * 24 * 60 * 60 * 1000;
|
||||
return now > expiryTime;
|
||||
}
|
||||
|
||||
private isMetadataUrl(url: string): boolean {
|
||||
return url.startsWith('https://cache-metadata.local/');
|
||||
}
|
||||
|
||||
private async collectCacheEntries(): Promise<{
|
||||
entries: CacheEntryDetails[];
|
||||
totalSize: number;
|
||||
entryCount: number;
|
||||
}> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const keys = await cache.keys();
|
||||
const entries: CacheEntryDetails[] = [];
|
||||
let totalSize = 0;
|
||||
let entryCount = 0;
|
||||
|
||||
for (const request of keys) {
|
||||
if (this.isMetadataUrl(request.url)) continue;
|
||||
|
||||
const response = await cache.match(request);
|
||||
if (response) {
|
||||
const blob = await response.blob();
|
||||
const size = blob.size;
|
||||
totalSize += size;
|
||||
entryCount++;
|
||||
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(request.url));
|
||||
let timestamp = 0;
|
||||
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
timestamp = metadata.timestamp;
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse cache metadata:', error);
|
||||
}
|
||||
}
|
||||
|
||||
entries.push({
|
||||
url: request.url,
|
||||
timestamp,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { entries, totalSize, entryCount };
|
||||
}
|
||||
|
||||
public async cleanupCacheOnDemand(newDataSize: number = 0): Promise<void> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const { entries, totalSize } = await this.collectCacheEntries();
|
||||
const maxSizeBytes = MAX_CACHE_SIZE_MB * 1024 * 1024;
|
||||
const projectedSize = totalSize + newDataSize;
|
||||
|
||||
if (projectedSize <= maxSizeBytes) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Cache size (${(totalSize / 1024 / 1024).toFixed(2)}MB) + new data (${(newDataSize / 1024 / 1024).toFixed(2)}MB) exceeds limit (${MAX_CACHE_SIZE_MB}MB), cleaning up...`,
|
||||
);
|
||||
|
||||
const expiredEntries: CacheEntryDetails[] = [];
|
||||
const validEntries: CacheEntryDetails[] = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(entry.url));
|
||||
let isExpired = false;
|
||||
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
isExpired = this.isCacheExpired(metadata);
|
||||
} catch (error) {
|
||||
isExpired = true;
|
||||
}
|
||||
} else {
|
||||
isExpired = true;
|
||||
}
|
||||
|
||||
if (isExpired) {
|
||||
expiredEntries.push(entry);
|
||||
} else {
|
||||
validEntries.push(entry);
|
||||
}
|
||||
}
|
||||
|
||||
let currentSize = totalSize;
|
||||
for (const entry of expiredEntries) {
|
||||
await cache.delete(entry.url);
|
||||
await cache.delete(this.getCacheMetadataKey(entry.url));
|
||||
currentSize -= entry.size;
|
||||
console.log(
|
||||
`Cleaned up expired cache entry: ${entry.url} (${(entry.size / 1024 / 1024).toFixed(2)}MB)`,
|
||||
);
|
||||
}
|
||||
|
||||
if (currentSize + newDataSize > maxSizeBytes) {
|
||||
validEntries.sort((a, b) => a.timestamp - b.timestamp);
|
||||
|
||||
for (const entry of validEntries) {
|
||||
if (currentSize + newDataSize <= maxSizeBytes) break;
|
||||
|
||||
await cache.delete(entry.url);
|
||||
await cache.delete(this.getCacheMetadataKey(entry.url));
|
||||
currentSize -= entry.size;
|
||||
console.log(
|
||||
`Cleaned up old cache entry: ${entry.url} (${(entry.size / 1024 / 1024).toFixed(2)}MB)`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Cache cleanup complete. New size: ${(currentSize / 1024 / 1024).toFixed(2)}MB`);
|
||||
}
|
||||
|
||||
public async storeCacheMetadata(modelUrl: string, size: number): Promise<void> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const metadata: CacheMetadata = {
|
||||
timestamp: Date.now(),
|
||||
modelUrl,
|
||||
size,
|
||||
version: CACHE_NAME,
|
||||
};
|
||||
|
||||
const metadataResponse = new Response(JSON.stringify(metadata), {
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
|
||||
await cache.put(this.getCacheMetadataKey(modelUrl), metadataResponse);
|
||||
}
|
||||
|
||||
public async getCachedModelData(modelUrl: string): Promise<ArrayBuffer | null> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const cachedResponse = await cache.match(modelUrl);
|
||||
|
||||
if (!cachedResponse) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(modelUrl));
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
if (!this.isCacheExpired(metadata)) {
|
||||
console.log('Model found in cache and not expired. Loading from cache.');
|
||||
return cachedResponse.arrayBuffer();
|
||||
} else {
|
||||
console.log('Cached model is expired, removing...');
|
||||
await this.deleteCacheEntry(modelUrl);
|
||||
return null;
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse cache metadata, treating as expired:', error);
|
||||
await this.deleteCacheEntry(modelUrl);
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
console.log('Cached model has no metadata, treating as expired...');
|
||||
await this.deleteCacheEntry(modelUrl);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async storeModelData(modelUrl: string, data: ArrayBuffer): Promise<void> {
|
||||
await this.cleanupCacheOnDemand(data.byteLength);
|
||||
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const response = new Response(data);
|
||||
|
||||
await cache.put(modelUrl, response);
|
||||
await this.storeCacheMetadata(modelUrl, data.byteLength);
|
||||
|
||||
console.log(
|
||||
`Model cached successfully (${(data.byteLength / 1024 / 1024).toFixed(2)}MB): ${modelUrl}`,
|
||||
);
|
||||
}
|
||||
|
||||
public async deleteCacheEntry(modelUrl: string): Promise<void> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
await cache.delete(modelUrl);
|
||||
await cache.delete(this.getCacheMetadataKey(modelUrl));
|
||||
}
|
||||
|
||||
public async clearAllCache(): Promise<void> {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const keys = await cache.keys();
|
||||
|
||||
for (const request of keys) {
|
||||
await cache.delete(request);
|
||||
}
|
||||
|
||||
console.log('All model cache entries cleared');
|
||||
}
|
||||
|
||||
public async getCacheStats(): Promise<CacheStats> {
|
||||
const { entries, totalSize, entryCount } = await this.collectCacheEntries();
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
|
||||
const cacheEntries: CacheEntry[] = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(entry.url));
|
||||
let expired = false;
|
||||
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
expired = this.isCacheExpired(metadata);
|
||||
} catch (error) {
|
||||
expired = true;
|
||||
}
|
||||
} else {
|
||||
expired = true;
|
||||
}
|
||||
|
||||
const age =
|
||||
entry.timestamp > 0
|
||||
? `${Math.round((Date.now() - entry.timestamp) / (1000 * 60 * 60 * 24))} days`
|
||||
: 'unknown';
|
||||
|
||||
cacheEntries.push({
|
||||
url: entry.url,
|
||||
size: entry.size,
|
||||
sizeMB: Number((entry.size / 1024 / 1024).toFixed(2)),
|
||||
timestamp: entry.timestamp,
|
||||
age,
|
||||
expired,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
totalSize,
|
||||
totalSizeMB: Number((totalSize / 1024 / 1024).toFixed(2)),
|
||||
entryCount,
|
||||
entries: cacheEntries.sort((a, b) => b.timestamp - a.timestamp),
|
||||
};
|
||||
}
|
||||
|
||||
public async manualCleanup(): Promise<void> {
|
||||
await this.cleanupCacheOnDemand(0);
|
||||
console.log('Manual cache cleanup completed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific model is cached and not expired
|
||||
* @param modelUrl The model URL to check
|
||||
* @returns Promise<boolean> True if model is cached and valid
|
||||
*/
|
||||
public async isModelCached(modelUrl: string): Promise<boolean> {
|
||||
try {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const cachedResponse = await cache.match(modelUrl);
|
||||
|
||||
if (!cachedResponse) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(modelUrl));
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
return !this.isCacheExpired(metadata);
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse cache metadata for cache check:', error);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// No metadata means expired
|
||||
return false;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error checking model cache:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any valid (non-expired) model cache exists
|
||||
* @returns Promise<boolean> True if at least one valid model cache exists
|
||||
*/
|
||||
public async hasAnyValidCache(): Promise<boolean> {
|
||||
try {
|
||||
const cache = await caches.open(CACHE_NAME);
|
||||
const keys = await cache.keys();
|
||||
|
||||
for (const request of keys) {
|
||||
if (this.isMetadataUrl(request.url)) continue;
|
||||
|
||||
const metadataResponse = await cache.match(this.getCacheMetadataKey(request.url));
|
||||
if (metadataResponse) {
|
||||
try {
|
||||
const metadata: CacheMetadata = await metadataResponse.json();
|
||||
if (!this.isCacheExpired(metadata)) {
|
||||
return true; // Found at least one valid cache
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip invalid metadata
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.error('Error checking for valid cache:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
108
app/chrome-extension/utils/offscreen-manager.ts
Normal file
108
app/chrome-extension/utils/offscreen-manager.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Offscreen Document manager
|
||||
* Ensures only one offscreen document is created across the entire extension to avoid conflicts
|
||||
*/
|
||||
|
||||
export class OffscreenManager {
|
||||
private static instance: OffscreenManager | null = null;
|
||||
private isCreated = false;
|
||||
private isCreating = false;
|
||||
private createPromise: Promise<void> | null = null;
|
||||
|
||||
private constructor() {}
|
||||
|
||||
/**
|
||||
* Get singleton instance
|
||||
*/
|
||||
public static getInstance(): OffscreenManager {
|
||||
if (!OffscreenManager.instance) {
|
||||
OffscreenManager.instance = new OffscreenManager();
|
||||
}
|
||||
return OffscreenManager.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure offscreen document exists
|
||||
*/
|
||||
public async ensureOffscreenDocument(): Promise<void> {
|
||||
if (this.isCreated) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.isCreating && this.createPromise) {
|
||||
return this.createPromise;
|
||||
}
|
||||
|
||||
this.isCreating = true;
|
||||
this.createPromise = this._doCreateOffscreenDocument().finally(() => {
|
||||
this.isCreating = false;
|
||||
});
|
||||
|
||||
return this.createPromise;
|
||||
}
|
||||
|
||||
private async _doCreateOffscreenDocument(): Promise<void> {
|
||||
try {
|
||||
if (!chrome.offscreen) {
|
||||
throw new Error('Offscreen API not available. Chrome 109+ required.');
|
||||
}
|
||||
|
||||
const existingContexts = await (chrome.runtime as any).getContexts({
|
||||
contextTypes: ['OFFSCREEN_DOCUMENT'],
|
||||
});
|
||||
|
||||
if (existingContexts && existingContexts.length > 0) {
|
||||
console.log('OffscreenManager: Offscreen document already exists');
|
||||
this.isCreated = true;
|
||||
return;
|
||||
}
|
||||
|
||||
await chrome.offscreen.createDocument({
|
||||
url: 'offscreen.html',
|
||||
reasons: ['WORKERS'],
|
||||
justification: 'Need to run semantic similarity engine with workers',
|
||||
});
|
||||
|
||||
this.isCreated = true;
|
||||
console.log('OffscreenManager: Offscreen document created successfully');
|
||||
} catch (error) {
|
||||
console.error('OffscreenManager: Failed to create offscreen document:', error);
|
||||
this.isCreated = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if offscreen document is created
|
||||
*/
|
||||
public isOffscreenDocumentCreated(): boolean {
|
||||
return this.isCreated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close offscreen document
|
||||
*/
|
||||
public async closeOffscreenDocument(): Promise<void> {
|
||||
try {
|
||||
if (chrome.offscreen && this.isCreated) {
|
||||
await chrome.offscreen.closeDocument();
|
||||
this.isCreated = false;
|
||||
console.log('OffscreenManager: Offscreen document closed');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('OffscreenManager: Failed to close offscreen document:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset state (for testing)
|
||||
*/
|
||||
public reset(): void {
|
||||
this.isCreated = false;
|
||||
this.isCreating = false;
|
||||
this.createPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export const offscreenManager = OffscreenManager.getInstance();
|
2388
app/chrome-extension/utils/semantic-similarity-engine.ts
Normal file
2388
app/chrome-extension/utils/semantic-similarity-engine.ts
Normal file
File diff suppressed because it is too large
Load Diff
496
app/chrome-extension/utils/simd-math-engine.ts
Normal file
496
app/chrome-extension/utils/simd-math-engine.ts
Normal file
@@ -0,0 +1,496 @@
|
||||
/**
|
||||
* SIMD-optimized mathematical computation engine
|
||||
* Uses WebAssembly + SIMD instructions to accelerate vector calculations
|
||||
*/
|
||||
|
||||
interface SIMDMathWasm {
|
||||
free(): void;
|
||||
cosine_similarity(vec_a: Float32Array, vec_b: Float32Array): number;
|
||||
batch_similarity(vectors: Float32Array, query: Float32Array, vector_dim: number): Float32Array;
|
||||
similarity_matrix(
|
||||
vectors_a: Float32Array,
|
||||
vectors_b: Float32Array,
|
||||
vector_dim: number,
|
||||
): Float32Array;
|
||||
}
|
||||
|
||||
interface WasmModule {
|
||||
SIMDMath: new () => SIMDMathWasm;
|
||||
memory: WebAssembly.Memory;
|
||||
default: (module_or_path?: any) => Promise<any>;
|
||||
}
|
||||
|
||||
export class SIMDMathEngine {
|
||||
private wasmModule: WasmModule | null = null;
|
||||
private simdMath: SIMDMathWasm | null = null;
|
||||
private isInitialized = false;
|
||||
private isInitializing = false;
|
||||
private initPromise: Promise<void> | null = null;
|
||||
|
||||
private alignedBufferPool: Map<number, Float32Array[]> = new Map();
|
||||
private maxPoolSize = 5;
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (this.isInitialized) return;
|
||||
if (this.isInitializing && this.initPromise) return this.initPromise;
|
||||
|
||||
this.isInitializing = true;
|
||||
this.initPromise = this._doInitialize().finally(() => {
|
||||
this.isInitializing = false;
|
||||
});
|
||||
|
||||
return this.initPromise;
|
||||
}
|
||||
|
||||
private async _doInitialize(): Promise<void> {
|
||||
try {
|
||||
console.log('SIMDMathEngine: Initializing WebAssembly module...');
|
||||
|
||||
const wasmUrl = chrome.runtime.getURL('workers/simd_math.js');
|
||||
const wasmModule = await import(wasmUrl);
|
||||
|
||||
const wasmInstance = await wasmModule.default();
|
||||
|
||||
this.wasmModule = {
|
||||
SIMDMath: wasmModule.SIMDMath,
|
||||
memory: wasmInstance.memory,
|
||||
default: wasmModule.default,
|
||||
};
|
||||
|
||||
this.simdMath = new this.wasmModule.SIMDMath();
|
||||
|
||||
this.isInitialized = true;
|
||||
console.log('SIMDMathEngine: WebAssembly module initialized successfully');
|
||||
} catch (error) {
|
||||
console.error('SIMDMathEngine: Failed to initialize WebAssembly module:', error);
|
||||
this.isInitialized = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get aligned buffer (16-byte aligned, suitable for SIMD)
|
||||
*/
|
||||
private getAlignedBuffer(size: number): Float32Array {
|
||||
if (!this.alignedBufferPool.has(size)) {
|
||||
this.alignedBufferPool.set(size, []);
|
||||
}
|
||||
|
||||
const pool = this.alignedBufferPool.get(size)!;
|
||||
if (pool.length > 0) {
|
||||
return pool.pop()!;
|
||||
}
|
||||
|
||||
// Create 16-byte aligned buffer
|
||||
const buffer = new ArrayBuffer(size * 4 + 15);
|
||||
const alignedOffset = (16 - (buffer.byteLength % 16)) % 16;
|
||||
return new Float32Array(buffer, alignedOffset, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release aligned buffer back to pool
|
||||
*/
|
||||
private releaseAlignedBuffer(buffer: Float32Array): void {
|
||||
const size = buffer.length;
|
||||
const pool = this.alignedBufferPool.get(size);
|
||||
if (pool && pool.length < this.maxPoolSize) {
|
||||
buffer.fill(0); // Clear to zero
|
||||
pool.push(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if vector is already aligned
|
||||
*/
|
||||
private isAligned(array: Float32Array): boolean {
|
||||
return array.byteOffset % 16 === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure vector alignment, create aligned copy if not aligned
|
||||
*/
|
||||
private ensureAligned(array: Float32Array): { aligned: Float32Array; needsRelease: boolean } {
|
||||
if (this.isAligned(array)) {
|
||||
return { aligned: array, needsRelease: false };
|
||||
}
|
||||
|
||||
const aligned = this.getAlignedBuffer(array.length);
|
||||
aligned.set(array);
|
||||
return { aligned, needsRelease: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* SIMD-optimized cosine similarity calculation
|
||||
*/
|
||||
async cosineSimilarity(vecA: Float32Array, vecB: Float32Array): Promise<number> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
if (!this.simdMath) {
|
||||
throw new Error('SIMD math engine not initialized');
|
||||
}
|
||||
|
||||
// Ensure vector alignment
|
||||
const { aligned: alignedA, needsRelease: releaseA } = this.ensureAligned(vecA);
|
||||
const { aligned: alignedB, needsRelease: releaseB } = this.ensureAligned(vecB);
|
||||
|
||||
try {
|
||||
const result = this.simdMath.cosine_similarity(alignedA, alignedB);
|
||||
return result;
|
||||
} finally {
|
||||
// Release temporary buffers
|
||||
if (releaseA) this.releaseAlignedBuffer(alignedA);
|
||||
if (releaseB) this.releaseAlignedBuffer(alignedB);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch similarity calculation
|
||||
*/
|
||||
async batchSimilarity(vectors: Float32Array[], query: Float32Array): Promise<number[]> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
if (!this.simdMath) {
|
||||
throw new Error('SIMD math engine not initialized');
|
||||
}
|
||||
|
||||
const vectorDim = query.length;
|
||||
const numVectors = vectors.length;
|
||||
|
||||
// Pack all vectors into contiguous memory layout
|
||||
const packedVectors = this.getAlignedBuffer(numVectors * vectorDim);
|
||||
const { aligned: alignedQuery, needsRelease: releaseQuery } = this.ensureAligned(query);
|
||||
|
||||
try {
|
||||
// Copy vector data
|
||||
let offset = 0;
|
||||
for (const vector of vectors) {
|
||||
packedVectors.set(vector, offset);
|
||||
offset += vectorDim;
|
||||
}
|
||||
|
||||
// Batch calculation
|
||||
const results = this.simdMath.batch_similarity(packedVectors, alignedQuery, vectorDim);
|
||||
return Array.from(results);
|
||||
} finally {
|
||||
this.releaseAlignedBuffer(packedVectors);
|
||||
if (releaseQuery) this.releaseAlignedBuffer(alignedQuery);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Similarity matrix calculation
|
||||
*/
|
||||
async similarityMatrix(vectorsA: Float32Array[], vectorsB: Float32Array[]): Promise<number[][]> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
if (!this.simdMath || vectorsA.length === 0 || vectorsB.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const vectorDim = vectorsA[0].length;
|
||||
const numA = vectorsA.length;
|
||||
const numB = vectorsB.length;
|
||||
|
||||
// Pack vectors
|
||||
const packedA = this.getAlignedBuffer(numA * vectorDim);
|
||||
const packedB = this.getAlignedBuffer(numB * vectorDim);
|
||||
|
||||
try {
|
||||
// Copy data
|
||||
let offsetA = 0;
|
||||
for (const vector of vectorsA) {
|
||||
packedA.set(vector, offsetA);
|
||||
offsetA += vectorDim;
|
||||
}
|
||||
|
||||
let offsetB = 0;
|
||||
for (const vector of vectorsB) {
|
||||
packedB.set(vector, offsetB);
|
||||
offsetB += vectorDim;
|
||||
}
|
||||
|
||||
// Calculate matrix
|
||||
const flatResults = this.simdMath.similarity_matrix(packedA, packedB, vectorDim);
|
||||
|
||||
// Convert to 2D array
|
||||
const matrix: number[][] = [];
|
||||
for (let i = 0; i < numA; i++) {
|
||||
const row: number[] = [];
|
||||
for (let j = 0; j < numB; j++) {
|
||||
row.push(flatResults[i * numB + j]);
|
||||
}
|
||||
matrix.push(row);
|
||||
}
|
||||
|
||||
return matrix;
|
||||
} finally {
|
||||
this.releaseAlignedBuffer(packedA);
|
||||
this.releaseAlignedBuffer(packedB);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check SIMD support
|
||||
*/
|
||||
static async checkSIMDSupport(): Promise<boolean> {
|
||||
try {
|
||||
console.log('SIMDMathEngine: Checking SIMD support...');
|
||||
|
||||
// Get browser information
|
||||
const userAgent = navigator.userAgent;
|
||||
const browserInfo = SIMDMathEngine.getBrowserInfo();
|
||||
console.log('Browser info:', browserInfo);
|
||||
console.log('User Agent:', userAgent);
|
||||
|
||||
// Check WebAssembly basic support
|
||||
if (typeof WebAssembly !== 'object') {
|
||||
console.log('WebAssembly not supported');
|
||||
return false;
|
||||
}
|
||||
console.log('✅ WebAssembly basic support: OK');
|
||||
|
||||
// Check WebAssembly.validate method
|
||||
if (typeof WebAssembly.validate !== 'function') {
|
||||
console.log('❌ WebAssembly.validate not available');
|
||||
return false;
|
||||
}
|
||||
console.log('✅ WebAssembly.validate: OK');
|
||||
|
||||
// Test basic WebAssembly module validation
|
||||
const basicWasm = new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00]);
|
||||
const basicValid = WebAssembly.validate(basicWasm);
|
||||
console.log('✅ Basic WASM validation:', basicValid);
|
||||
|
||||
// Check WebAssembly SIMD support - using correct SIMD test module
|
||||
console.log('Testing SIMD WASM module...');
|
||||
|
||||
// Method 1: Use standard SIMD detection bytecode
|
||||
let wasmSIMDSupported = false;
|
||||
try {
|
||||
// This is a minimal SIMD module containing v128.const instruction
|
||||
const simdWasm = new Uint8Array([
|
||||
0x00,
|
||||
0x61,
|
||||
0x73,
|
||||
0x6d, // WASM magic
|
||||
0x01,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00, // version
|
||||
0x01,
|
||||
0x05,
|
||||
0x01, // type section
|
||||
0x60,
|
||||
0x00,
|
||||
0x01,
|
||||
0x7b, // function type: () -> v128
|
||||
0x03,
|
||||
0x02,
|
||||
0x01,
|
||||
0x00, // function section
|
||||
0x0a,
|
||||
0x0a,
|
||||
0x01, // code section
|
||||
0x08,
|
||||
0x00, // function body
|
||||
0xfd,
|
||||
0x0c, // v128.const
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00,
|
||||
0x0b, // end
|
||||
]);
|
||||
wasmSIMDSupported = WebAssembly.validate(simdWasm);
|
||||
console.log('Method 1 - Standard SIMD test result:', wasmSIMDSupported);
|
||||
} catch (error) {
|
||||
console.log('Method 1 failed:', error);
|
||||
}
|
||||
|
||||
// Method 2: If method 1 fails, try simpler SIMD instruction
|
||||
if (!wasmSIMDSupported) {
|
||||
try {
|
||||
// Test using i32x4.splat instruction
|
||||
const simpleSimdWasm = new Uint8Array([
|
||||
0x00,
|
||||
0x61,
|
||||
0x73,
|
||||
0x6d, // WASM magic
|
||||
0x01,
|
||||
0x00,
|
||||
0x00,
|
||||
0x00, // version
|
||||
0x01,
|
||||
0x06,
|
||||
0x01, // type section
|
||||
0x60,
|
||||
0x01,
|
||||
0x7f,
|
||||
0x01,
|
||||
0x7b, // function type: (i32) -> v128
|
||||
0x03,
|
||||
0x02,
|
||||
0x01,
|
||||
0x00, // function section
|
||||
0x0a,
|
||||
0x07,
|
||||
0x01, // code section
|
||||
0x05,
|
||||
0x00, // function body
|
||||
0x20,
|
||||
0x00, // local.get 0
|
||||
0xfd,
|
||||
0x0d, // i32x4.splat
|
||||
0x0b, // end
|
||||
]);
|
||||
wasmSIMDSupported = WebAssembly.validate(simpleSimdWasm);
|
||||
console.log('Method 2 - Simple SIMD test result:', wasmSIMDSupported);
|
||||
} catch (error) {
|
||||
console.log('Method 2 failed:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Method 3: If previous methods fail, try detecting specific SIMD features
|
||||
if (!wasmSIMDSupported) {
|
||||
try {
|
||||
// Check if SIMD feature flags are supported
|
||||
const featureTest = WebAssembly.validate(
|
||||
new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00]),
|
||||
);
|
||||
|
||||
if (featureTest) {
|
||||
// In Chrome, if basic WebAssembly works and version >= 91, SIMD is usually available
|
||||
const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
|
||||
if (chromeMatch && parseInt(chromeMatch[1]) >= 91) {
|
||||
console.log('Method 3 - Chrome version check: SIMD should be available');
|
||||
wasmSIMDSupported = true;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Method 3 failed:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Output final result
|
||||
if (!wasmSIMDSupported) {
|
||||
console.log('❌ SIMD not supported. Browser requirements:');
|
||||
console.log('- Chrome 91+, Firefox 89+, Safari 16.4+, Edge 91+');
|
||||
console.log('Your browser should support SIMD. Possible issues:');
|
||||
console.log('1. Extension context limitations');
|
||||
console.log('2. Security policies');
|
||||
console.log('3. Feature flags disabled');
|
||||
} else {
|
||||
console.log('✅ SIMD supported!');
|
||||
}
|
||||
|
||||
return wasmSIMDSupported;
|
||||
} catch (error: any) {
|
||||
console.error('SIMD support check failed:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('Error details:', {
|
||||
name: error.name,
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get browser information
|
||||
*/
|
||||
static getBrowserInfo(): { name: string; version: string; supported: boolean } {
|
||||
const userAgent = navigator.userAgent;
|
||||
let browserName = 'Unknown';
|
||||
let version = 'Unknown';
|
||||
let supported = false;
|
||||
|
||||
// Chrome
|
||||
if (userAgent.includes('Chrome/')) {
|
||||
browserName = 'Chrome';
|
||||
const match = userAgent.match(/Chrome\/(\d+)/);
|
||||
if (match) {
|
||||
version = match[1];
|
||||
supported = parseInt(version) >= 91;
|
||||
}
|
||||
}
|
||||
// Firefox
|
||||
else if (userAgent.includes('Firefox/')) {
|
||||
browserName = 'Firefox';
|
||||
const match = userAgent.match(/Firefox\/(\d+)/);
|
||||
if (match) {
|
||||
version = match[1];
|
||||
supported = parseInt(version) >= 89;
|
||||
}
|
||||
}
|
||||
// Safari
|
||||
else if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) {
|
||||
browserName = 'Safari';
|
||||
const match = userAgent.match(/Version\/(\d+\.\d+)/);
|
||||
if (match) {
|
||||
version = match[1];
|
||||
const versionNum = parseFloat(version);
|
||||
supported = versionNum >= 16.4;
|
||||
}
|
||||
}
|
||||
// Edge
|
||||
else if (userAgent.includes('Edg/')) {
|
||||
browserName = 'Edge';
|
||||
const match = userAgent.match(/Edg\/(\d+)/);
|
||||
if (match) {
|
||||
version = match[1];
|
||||
supported = parseInt(version) >= 91;
|
||||
}
|
||||
}
|
||||
|
||||
return { name: browserName, version, supported };
|
||||
}
|
||||
|
||||
getStats() {
|
||||
return {
|
||||
isInitialized: this.isInitialized,
|
||||
isInitializing: this.isInitializing,
|
||||
bufferPoolStats: Array.from(this.alignedBufferPool.entries()).map(([size, buffers]) => ({
|
||||
size,
|
||||
pooled: buffers.length,
|
||||
maxPoolSize: this.maxPoolSize,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
if (this.simdMath) {
|
||||
try {
|
||||
this.simdMath.free();
|
||||
} catch (error) {
|
||||
console.warn('Failed to free SIMD math instance:', error);
|
||||
}
|
||||
this.simdMath = null;
|
||||
}
|
||||
|
||||
this.alignedBufferPool.clear();
|
||||
this.wasmModule = null;
|
||||
this.isInitialized = false;
|
||||
this.isInitializing = false;
|
||||
this.initPromise = null;
|
||||
}
|
||||
}
|
264
app/chrome-extension/utils/text-chunker.ts
Normal file
264
app/chrome-extension/utils/text-chunker.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
/**
|
||||
* Text chunking utility
|
||||
* Based on semantic chunking strategy, splits long text into small chunks suitable for vectorization
|
||||
*/
|
||||
|
||||
export interface TextChunk {
|
||||
text: string;
|
||||
source: string;
|
||||
index: number;
|
||||
wordCount: number;
|
||||
}
|
||||
|
||||
export interface ChunkingOptions {
|
||||
maxWordsPerChunk?: number;
|
||||
overlapSentences?: number;
|
||||
minChunkLength?: number;
|
||||
includeTitle?: boolean;
|
||||
}
|
||||
|
||||
export class TextChunker {
|
||||
private readonly defaultOptions: Required<ChunkingOptions> = {
|
||||
maxWordsPerChunk: 80,
|
||||
overlapSentences: 1,
|
||||
minChunkLength: 20,
|
||||
includeTitle: true,
|
||||
};
|
||||
|
||||
public chunkText(content: string, title?: string, options?: ChunkingOptions): TextChunk[] {
|
||||
const opts = { ...this.defaultOptions, ...options };
|
||||
const chunks: TextChunk[] = [];
|
||||
|
||||
if (opts.includeTitle && title?.trim() && title.trim().length > 5) {
|
||||
chunks.push({
|
||||
text: title.trim(),
|
||||
source: 'title',
|
||||
index: 0,
|
||||
wordCount: title.trim().split(/\s+/).length,
|
||||
});
|
||||
}
|
||||
|
||||
const cleanContent = content.trim();
|
||||
if (!cleanContent) {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const sentences = this.splitIntoSentences(cleanContent);
|
||||
|
||||
if (sentences.length === 0) {
|
||||
return this.fallbackChunking(cleanContent, chunks, opts);
|
||||
}
|
||||
|
||||
const hasLongSentences = sentences.some(
|
||||
(s: string) => s.split(/\s+/).length > opts.maxWordsPerChunk,
|
||||
);
|
||||
|
||||
if (hasLongSentences) {
|
||||
return this.mixedChunking(sentences, chunks, opts);
|
||||
}
|
||||
|
||||
return this.groupSentencesIntoChunks(sentences, chunks, opts);
|
||||
}
|
||||
|
||||
private splitIntoSentences(content: string): string[] {
|
||||
const processedContent = content
|
||||
.replace(/([。!?])\s*/g, '$1\n')
|
||||
.replace(/([.!?])\s+(?=[A-Z])/g, '$1\n')
|
||||
.replace(/([.!?]["'])\s+(?=[A-Z])/g, '$1\n')
|
||||
.replace(/([.!?])\s*$/gm, '$1\n')
|
||||
.replace(/([。!?][""])\s*/g, '$1\n')
|
||||
.replace(/\n\s*\n/g, '\n');
|
||||
|
||||
const sentences = processedContent
|
||||
.split('\n')
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => s.length > 15);
|
||||
|
||||
if (sentences.length < 3 && content.length > 500) {
|
||||
return this.aggressiveSentenceSplitting(content);
|
||||
}
|
||||
|
||||
return sentences;
|
||||
}
|
||||
|
||||
private aggressiveSentenceSplitting(content: string): string[] {
|
||||
const sentences = content
|
||||
.replace(/([.!?。!?])/g, '$1\n')
|
||||
.replace(/([;;::])/g, '$1\n')
|
||||
.replace(/([))])\s*(?=[\u4e00-\u9fa5A-Z])/g, '$1\n')
|
||||
.split('\n')
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => s.length > 15);
|
||||
|
||||
const maxWordsPerChunk = 80;
|
||||
const finalSentences: string[] = [];
|
||||
|
||||
for (const sentence of sentences) {
|
||||
const words = sentence.split(/\s+/);
|
||||
if (words.length <= maxWordsPerChunk) {
|
||||
finalSentences.push(sentence);
|
||||
} else {
|
||||
const overlapWords = 5;
|
||||
for (let i = 0; i < words.length; i += maxWordsPerChunk - overlapWords) {
|
||||
const chunkWords = words.slice(i, i + maxWordsPerChunk);
|
||||
const chunkText = chunkWords.join(' ');
|
||||
if (chunkText.length > 15) {
|
||||
finalSentences.push(chunkText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return finalSentences;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group sentences into chunks
|
||||
*/
|
||||
private groupSentencesIntoChunks(
|
||||
sentences: string[],
|
||||
existingChunks: TextChunk[],
|
||||
options: Required<ChunkingOptions>,
|
||||
): TextChunk[] {
|
||||
const chunks = [...existingChunks];
|
||||
let chunkIndex = chunks.length;
|
||||
|
||||
let i = 0;
|
||||
while (i < sentences.length) {
|
||||
let currentChunkText = '';
|
||||
let currentWordCount = 0;
|
||||
let sentencesUsed = 0;
|
||||
|
||||
while (i + sentencesUsed < sentences.length && currentWordCount < options.maxWordsPerChunk) {
|
||||
const sentence = sentences[i + sentencesUsed];
|
||||
const sentenceWords = sentence.split(/\s+/).length;
|
||||
|
||||
if (currentWordCount + sentenceWords > options.maxWordsPerChunk && currentWordCount > 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
currentChunkText += (currentChunkText ? ' ' : '') + sentence;
|
||||
currentWordCount += sentenceWords;
|
||||
sentencesUsed++;
|
||||
}
|
||||
|
||||
if (currentChunkText.trim().length > options.minChunkLength) {
|
||||
chunks.push({
|
||||
text: currentChunkText.trim(),
|
||||
source: `content_chunk_${chunkIndex}`,
|
||||
index: chunkIndex,
|
||||
wordCount: currentWordCount,
|
||||
});
|
||||
chunkIndex++;
|
||||
}
|
||||
|
||||
i += Math.max(1, sentencesUsed - options.overlapSentences);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mixed chunking method (handles long sentences)
|
||||
*/
|
||||
private mixedChunking(
|
||||
sentences: string[],
|
||||
existingChunks: TextChunk[],
|
||||
options: Required<ChunkingOptions>,
|
||||
): TextChunk[] {
|
||||
const chunks = [...existingChunks];
|
||||
let chunkIndex = chunks.length;
|
||||
|
||||
for (const sentence of sentences) {
|
||||
const sentenceWords = sentence.split(/\s+/).length;
|
||||
|
||||
if (sentenceWords <= options.maxWordsPerChunk) {
|
||||
chunks.push({
|
||||
text: sentence.trim(),
|
||||
source: `sentence_chunk_${chunkIndex}`,
|
||||
index: chunkIndex,
|
||||
wordCount: sentenceWords,
|
||||
});
|
||||
chunkIndex++;
|
||||
} else {
|
||||
const words = sentence.split(/\s+/);
|
||||
for (let i = 0; i < words.length; i += options.maxWordsPerChunk) {
|
||||
const chunkWords = words.slice(i, i + options.maxWordsPerChunk);
|
||||
const chunkText = chunkWords.join(' ');
|
||||
|
||||
if (chunkText.length > options.minChunkLength) {
|
||||
chunks.push({
|
||||
text: chunkText,
|
||||
source: `long_sentence_chunk_${chunkIndex}_part_${Math.floor(i / options.maxWordsPerChunk)}`,
|
||||
index: chunkIndex,
|
||||
wordCount: chunkWords.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
chunkIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback chunking (when sentence splitting fails)
|
||||
*/
|
||||
private fallbackChunking(
|
||||
content: string,
|
||||
existingChunks: TextChunk[],
|
||||
options: Required<ChunkingOptions>,
|
||||
): TextChunk[] {
|
||||
const chunks = [...existingChunks];
|
||||
let chunkIndex = chunks.length;
|
||||
|
||||
const paragraphs = content
|
||||
.split(/\n\s*\n/)
|
||||
.filter((p) => p.trim().length > options.minChunkLength);
|
||||
|
||||
if (paragraphs.length > 1) {
|
||||
paragraphs.forEach((paragraph, index) => {
|
||||
const cleanParagraph = paragraph.trim();
|
||||
if (cleanParagraph.length > 0) {
|
||||
const words = cleanParagraph.split(/\s+/);
|
||||
const maxWordsPerChunk = 150;
|
||||
|
||||
for (let i = 0; i < words.length; i += maxWordsPerChunk) {
|
||||
const chunkWords = words.slice(i, i + maxWordsPerChunk);
|
||||
const chunkText = chunkWords.join(' ');
|
||||
|
||||
if (chunkText.length > options.minChunkLength) {
|
||||
chunks.push({
|
||||
text: chunkText,
|
||||
source: `paragraph_${index}_chunk_${Math.floor(i / maxWordsPerChunk)}`,
|
||||
index: chunkIndex,
|
||||
wordCount: chunkWords.length,
|
||||
});
|
||||
chunkIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
const words = content.trim().split(/\s+/);
|
||||
const maxWordsPerChunk = 150;
|
||||
|
||||
for (let i = 0; i < words.length; i += maxWordsPerChunk) {
|
||||
const chunkWords = words.slice(i, i + maxWordsPerChunk);
|
||||
const chunkText = chunkWords.join(' ');
|
||||
|
||||
if (chunkText.length > options.minChunkLength) {
|
||||
chunks.push({
|
||||
text: chunkText,
|
||||
source: `content_chunk_${Math.floor(i / maxWordsPerChunk)}`,
|
||||
index: chunkIndex,
|
||||
wordCount: chunkWords.length,
|
||||
});
|
||||
chunkIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
}
|
1563
app/chrome-extension/utils/vector-database.ts
Normal file
1563
app/chrome-extension/utils/vector-database.ts
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user