first commit

This commit is contained in:
nasir@endelospay.com
2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions

View File

@@ -0,0 +1,586 @@
/**
* Content index manager
* Responsible for automatically extracting, chunking and indexing tab content
*/
import { TextChunker } from './text-chunker';
import { VectorDatabase, getGlobalVectorDatabase } from './vector-database';
import {
SemanticSimilarityEngine,
SemanticSimilarityEngineProxy,
PREDEFINED_MODELS,
type ModelPreset,
} from './semantic-similarity-engine';
import { TOOL_MESSAGE_TYPES } from '@/common/message-types';
export interface IndexingOptions {
autoIndex?: boolean;
maxChunksPerPage?: number;
skipDuplicates?: boolean;
}
export class ContentIndexer {
private textChunker: TextChunker;
private vectorDatabase!: VectorDatabase;
private semanticEngine!: SemanticSimilarityEngine | SemanticSimilarityEngineProxy;
private isInitialized = false;
private isInitializing = false;
private initPromise: Promise<void> | null = null;
private indexedPages = new Set<string>();
private readonly options: Required<IndexingOptions>;
constructor(options?: IndexingOptions) {
this.options = {
autoIndex: true,
maxChunksPerPage: 50,
skipDuplicates: true,
...options,
};
this.textChunker = new TextChunker();
}
/**
* Get current selected model configuration
*/
private async getCurrentModelConfig() {
try {
const result = await chrome.storage.local.get(['selectedModel', 'selectedVersion']);
const selectedModel = (result.selectedModel as ModelPreset) || 'multilingual-e5-small';
const selectedVersion =
(result.selectedVersion as 'full' | 'quantized' | 'compressed') || 'quantized';
const modelInfo = PREDEFINED_MODELS[selectedModel];
return {
modelPreset: selectedModel,
modelIdentifier: modelInfo.modelIdentifier,
dimension: modelInfo.dimension,
modelVersion: selectedVersion,
useLocalFiles: false,
maxLength: 256,
cacheSize: 1000,
forceOffscreen: true,
};
} catch (error) {
console.error('ContentIndexer: Failed to get current model config, using default:', error);
return {
modelPreset: 'multilingual-e5-small' as const,
modelIdentifier: 'Xenova/multilingual-e5-small',
dimension: 384,
modelVersion: 'quantized' as const,
useLocalFiles: false,
maxLength: 256,
cacheSize: 1000,
forceOffscreen: true,
};
}
}
/**
* Initialize content indexer
*/
public async initialize(): Promise<void> {
if (this.isInitialized) return;
if (this.isInitializing && this.initPromise) return this.initPromise;
this.isInitializing = true;
this.initPromise = this._doInitialize().finally(() => {
this.isInitializing = false;
});
return this.initPromise;
}
private async _doInitialize(): Promise<void> {
try {
// Get current selected model configuration
const engineConfig = await this.getCurrentModelConfig();
// Use proxy class to reuse engine instance in offscreen
this.semanticEngine = new SemanticSimilarityEngineProxy(engineConfig);
await this.semanticEngine.initialize();
this.vectorDatabase = await getGlobalVectorDatabase({
dimension: engineConfig.dimension,
efSearch: 50,
});
await this.vectorDatabase.initialize();
this.setupTabEventListeners();
this.isInitialized = true;
} catch (error) {
console.error('ContentIndexer: Initialization failed:', error);
this.isInitialized = false;
throw error;
}
}
/**
* Index content of specified tab
*/
public async indexTabContent(tabId: number): Promise<void> {
// Check if semantic engine is ready before attempting to index
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
console.log(
`ContentIndexer: Skipping tab ${tabId} - semantic engine not ready and not initializing`,
);
return;
}
if (!this.isInitialized) {
// Only initialize if semantic engine is already ready
if (!this.isSemanticEngineReady()) {
console.log(
`ContentIndexer: Skipping tab ${tabId} - ContentIndexer not initialized and semantic engine not ready`,
);
return;
}
await this.initialize();
}
try {
const tab = await chrome.tabs.get(tabId);
if (!tab.url || !this.shouldIndexUrl(tab.url)) {
console.log(`ContentIndexer: Skipping tab ${tabId} - URL not indexable`);
return;
}
const pageKey = `${tab.url}_${tab.title}`;
if (this.options.skipDuplicates && this.indexedPages.has(pageKey)) {
console.log(`ContentIndexer: Skipping tab ${tabId} - already indexed`);
return;
}
console.log(`ContentIndexer: Starting to index tab ${tabId}: ${tab.title}`);
const content = await this.extractTabContent(tabId);
if (!content) {
console.log(`ContentIndexer: No content extracted from tab ${tabId}`);
return;
}
const chunks = this.textChunker.chunkText(content.textContent, content.title);
console.log(`ContentIndexer: Generated ${chunks.length} chunks for tab ${tabId}`);
const chunksToIndex = chunks.slice(0, this.options.maxChunksPerPage);
if (chunks.length > this.options.maxChunksPerPage) {
console.log(
`ContentIndexer: Limited chunks from ${chunks.length} to ${this.options.maxChunksPerPage}`,
);
}
for (const chunk of chunksToIndex) {
try {
const embedding = await this.semanticEngine.getEmbedding(chunk.text);
const label = await this.vectorDatabase.addDocument(
tabId,
tab.url!,
tab.title || '',
chunk,
embedding,
);
console.log(`ContentIndexer: Indexed chunk ${chunk.index} with label ${label}`);
} catch (error) {
console.error(`ContentIndexer: Failed to index chunk ${chunk.index}:`, error);
}
}
this.indexedPages.add(pageKey);
console.log(
`ContentIndexer: Successfully indexed ${chunksToIndex.length} chunks for tab ${tabId}`,
);
} catch (error) {
console.error(`ContentIndexer: Failed to index tab ${tabId}:`, error);
}
}
/**
* Search content
*/
public async searchContent(query: string, topK: number = 10) {
// Check if semantic engine is ready before attempting to search
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
throw new Error(
'Semantic engine is not ready yet. Please initialize the semantic engine first.',
);
}
if (!this.isInitialized) {
// Only initialize if semantic engine is already ready
if (!this.isSemanticEngineReady()) {
throw new Error(
'ContentIndexer not initialized and semantic engine not ready. Please initialize the semantic engine first.',
);
}
await this.initialize();
}
try {
const queryEmbedding = await this.semanticEngine.getEmbedding(query);
const results = await this.vectorDatabase.search(queryEmbedding, topK);
console.log(`ContentIndexer: Found ${results.length} results for query: "${query}"`);
return results;
} catch (error) {
console.error('ContentIndexer: Search failed:', error);
if (error instanceof Error && error.message.includes('not initialized')) {
console.log(
'ContentIndexer: Attempting to reinitialize semantic engine and retry search...',
);
try {
await this.semanticEngine.initialize();
const queryEmbedding = await this.semanticEngine.getEmbedding(query);
const results = await this.vectorDatabase.search(queryEmbedding, topK);
console.log(
`ContentIndexer: Retry successful, found ${results.length} results for query: "${query}"`,
);
return results;
} catch (retryError) {
console.error('ContentIndexer: Retry after reinitialization also failed:', retryError);
throw retryError;
}
}
throw error;
}
}
/**
* Remove tab index
*/
public async removeTabIndex(tabId: number): Promise<void> {
if (!this.isInitialized) {
return;
}
try {
await this.vectorDatabase.removeTabDocuments(tabId);
for (const pageKey of this.indexedPages) {
if (pageKey.includes(`tab_${tabId}_`)) {
this.indexedPages.delete(pageKey);
}
}
console.log(`ContentIndexer: Removed index for tab ${tabId}`);
} catch (error) {
console.error(`ContentIndexer: Failed to remove index for tab ${tabId}:`, error);
}
}
/**
* Check if semantic engine is ready (checks both local and global state)
*/
public isSemanticEngineReady(): boolean {
return this.semanticEngine && this.semanticEngine.isInitialized;
}
/**
* Check if global semantic engine is ready (in background/offscreen)
*/
public async isGlobalSemanticEngineReady(): Promise<boolean> {
try {
// Since ContentIndexer runs in background script, directly call the function instead of sending message
const { handleGetModelStatus } = await import('@/entrypoints/background/semantic-similarity');
const response = await handleGetModelStatus();
return (
response &&
response.success &&
response.status &&
response.status.initializationStatus === 'ready'
);
} catch (error) {
console.error('ContentIndexer: Failed to check global semantic engine status:', error);
return false;
}
}
/**
* Check if semantic engine is initializing
*/
public isSemanticEngineInitializing(): boolean {
return (
this.isInitializing || (this.semanticEngine && (this.semanticEngine as any).isInitializing)
);
}
/**
* Reinitialize content indexer (for model switching)
*/
public async reinitialize(): Promise<void> {
console.log('ContentIndexer: Reinitializing for model switch...');
this.isInitialized = false;
this.isInitializing = false;
this.initPromise = null;
await this.performCompleteDataCleanupForModelSwitch();
this.indexedPages.clear();
console.log('ContentIndexer: Cleared indexed pages cache');
try {
console.log('ContentIndexer: Creating new semantic engine proxy...');
const newEngineConfig = await this.getCurrentModelConfig();
console.log('ContentIndexer: New engine config:', newEngineConfig);
this.semanticEngine = new SemanticSimilarityEngineProxy(newEngineConfig);
console.log('ContentIndexer: New semantic engine proxy created');
await this.semanticEngine.initialize();
console.log('ContentIndexer: Semantic engine proxy initialization completed');
} catch (error) {
console.error('ContentIndexer: Failed to create new semantic engine proxy:', error);
throw error;
}
console.log(
'ContentIndexer: New semantic engine proxy is ready, proceeding with initialization',
);
await this.initialize();
console.log('ContentIndexer: Reinitialization completed successfully');
}
/**
* Perform complete data cleanup for model switching
*/
private async performCompleteDataCleanupForModelSwitch(): Promise<void> {
console.log('ContentIndexer: Starting complete data cleanup for model switch...');
try {
// Clear existing vector database instance
if (this.vectorDatabase) {
try {
console.log('ContentIndexer: Clearing existing vector database instance...');
await this.vectorDatabase.clear();
console.log('ContentIndexer: Vector database instance cleared successfully');
} catch (error) {
console.warn('ContentIndexer: Failed to clear vector database instance:', error);
}
}
try {
const { clearAllVectorData } = await import('./vector-database');
await clearAllVectorData();
console.log('ContentIndexer: Cleared all vector data for model switch');
} catch (error) {
console.warn('ContentIndexer: Failed to clear vector data:', error);
}
try {
const keysToRemove = [
'hnswlib_document_mappings_tab_content_index.dat',
'hnswlib_document_mappings_content_index.dat',
'hnswlib_document_mappings_vector_index.dat',
'vectorDatabaseStats',
'lastCleanupTime',
];
await chrome.storage.local.remove(keysToRemove);
console.log('ContentIndexer: Cleared chrome.storage model-related data');
} catch (error) {
console.warn('ContentIndexer: Failed to clear chrome.storage data:', error);
}
try {
const deleteVectorDB = indexedDB.deleteDatabase('VectorDatabaseStorage');
await new Promise<void>((resolve) => {
deleteVectorDB.onsuccess = () => {
console.log('ContentIndexer: VectorDatabaseStorage database deleted');
resolve();
};
deleteVectorDB.onerror = () => {
console.warn('ContentIndexer: Failed to delete VectorDatabaseStorage database');
resolve(); // Don't block the process
};
deleteVectorDB.onblocked = () => {
console.warn('ContentIndexer: VectorDatabaseStorage database deletion blocked');
resolve(); // Don't block the process
};
});
// Clean up hnswlib-index database
const deleteHnswDB = indexedDB.deleteDatabase('/hnswlib-index');
await new Promise<void>((resolve) => {
deleteHnswDB.onsuccess = () => {
console.log('ContentIndexer: /hnswlib-index database deleted');
resolve();
};
deleteHnswDB.onerror = () => {
console.warn('ContentIndexer: Failed to delete /hnswlib-index database');
resolve(); // Don't block the process
};
deleteHnswDB.onblocked = () => {
console.warn('ContentIndexer: /hnswlib-index database deletion blocked');
resolve(); // Don't block the process
};
});
console.log('ContentIndexer: All IndexedDB databases cleared for model switch');
} catch (error) {
console.warn('ContentIndexer: Failed to clear IndexedDB databases:', error);
}
console.log('ContentIndexer: Complete data cleanup for model switch finished successfully');
} catch (error) {
console.error('ContentIndexer: Complete data cleanup for model switch failed:', error);
throw error;
}
}
/**
* Manually trigger semantic engine initialization (async, don't wait for completion)
* Note: This should only be called after the semantic engine is already initialized
*/
public startSemanticEngineInitialization(): void {
if (!this.isInitialized && !this.isInitializing) {
console.log('ContentIndexer: Checking if semantic engine is ready...');
// Check if global semantic engine is ready before initializing ContentIndexer
this.isGlobalSemanticEngineReady()
.then((isReady) => {
if (isReady) {
console.log('ContentIndexer: Starting initialization (semantic engine ready)...');
this.initialize().catch((error) => {
console.error('ContentIndexer: Background initialization failed:', error);
});
} else {
console.log('ContentIndexer: Semantic engine not ready, skipping initialization');
}
})
.catch((error) => {
console.error('ContentIndexer: Failed to check semantic engine status:', error);
});
}
}
/**
* Get indexing statistics
*/
public getStats() {
const vectorStats = this.vectorDatabase
? this.vectorDatabase.getStats()
: {
totalDocuments: 0,
totalTabs: 0,
indexSize: 0,
};
return {
...vectorStats,
indexedPages: this.indexedPages.size,
isInitialized: this.isInitialized,
semanticEngineReady: this.isSemanticEngineReady(),
semanticEngineInitializing: this.isSemanticEngineInitializing(),
};
}
/**
* Clear all indexes
*/
public async clearAllIndexes(): Promise<void> {
if (!this.isInitialized) {
return;
}
try {
await this.vectorDatabase.clear();
this.indexedPages.clear();
console.log('ContentIndexer: All indexes cleared');
} catch (error) {
console.error('ContentIndexer: Failed to clear indexes:', error);
}
}
private setupTabEventListeners(): void {
chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
if (this.options.autoIndex && changeInfo.status === 'complete' && tab.url) {
setTimeout(() => {
if (!this.isSemanticEngineReady() && !this.isSemanticEngineInitializing()) {
console.log(
`ContentIndexer: Skipping auto-index for tab ${tabId} - semantic engine not ready`,
);
return;
}
this.indexTabContent(tabId).catch((error) => {
console.error(`ContentIndexer: Auto-indexing failed for tab ${tabId}:`, error);
});
}, 2000);
}
});
chrome.tabs.onRemoved.addListener(async (tabId) => {
await this.removeTabIndex(tabId);
});
if (chrome.webNavigation) {
chrome.webNavigation.onCommitted.addListener(async (details) => {
if (details.frameId === 0) {
await this.removeTabIndex(details.tabId);
}
});
}
}
private shouldIndexUrl(url: string): boolean {
const excludePatterns = [
/^chrome:\/\//,
/^chrome-extension:\/\//,
/^edge:\/\//,
/^about:/,
/^moz-extension:\/\//,
/^file:\/\//,
];
return !excludePatterns.some((pattern) => pattern.test(url));
}
private async extractTabContent(
tabId: number,
): Promise<{ textContent: string; title: string } | null> {
try {
await chrome.scripting.executeScript({
target: { tabId },
files: ['inject-scripts/web-fetcher-helper.js'],
});
const response = await chrome.tabs.sendMessage(tabId, {
action: TOOL_MESSAGE_TYPES.WEB_FETCHER_GET_TEXT_CONTENT,
});
if (response.success && response.textContent) {
return {
textContent: response.textContent,
title: response.title || '',
};
} else {
console.error(
`ContentIndexer: Failed to extract content from tab ${tabId}:`,
response.error,
);
return null;
}
} catch (error) {
console.error(`ContentIndexer: Error extracting content from tab ${tabId}:`, error);
return null;
}
}
}
let globalContentIndexer: ContentIndexer | null = null;
/**
* Get global ContentIndexer instance
*/
export function getGlobalContentIndexer(): ContentIndexer {
if (!globalContentIndexer) {
globalContentIndexer = new ContentIndexer();
}
return globalContentIndexer;
}

View File

@@ -0,0 +1,273 @@
/**
* Chrome Extension i18n utility
* Provides safe access to chrome.i18n.getMessage with fallbacks
*/
// Fallback messages for when Chrome APIs aren't available (English)
const fallbackMessages: Record<string, string> = {
// Extension metadata
extensionName: 'chrome-mcp-server',
extensionDescription: 'Exposes browser capabilities with your own chrome',
// Section headers
nativeServerConfigLabel: 'Native Server Configuration',
semanticEngineLabel: 'Semantic Engine',
embeddingModelLabel: 'Embedding Model',
indexDataManagementLabel: 'Index Data Management',
modelCacheManagementLabel: 'Model Cache Management',
// Status labels
statusLabel: 'Status',
runningStatusLabel: 'Running Status',
connectionStatusLabel: 'Connection Status',
lastUpdatedLabel: 'Last Updated:',
// Connection states
connectButton: 'Connect',
disconnectButton: 'Disconnect',
connectingStatus: 'Connecting...',
connectedStatus: 'Connected',
disconnectedStatus: 'Disconnected',
detectingStatus: 'Detecting...',
// Server states
serviceRunningStatus: 'Service Running (Port: {0})',
serviceNotConnectedStatus: 'Service Not Connected',
connectedServiceNotStartedStatus: 'Connected, Service Not Started',
// Configuration labels
mcpServerConfigLabel: 'MCP Server Configuration',
connectionPortLabel: 'Connection Port',
refreshStatusButton: 'Refresh Status',
copyConfigButton: 'Copy Configuration',
// Action buttons
retryButton: 'Retry',
cancelButton: 'Cancel',
confirmButton: 'Confirm',
saveButton: 'Save',
closeButton: 'Close',
resetButton: 'Reset',
// Progress states
initializingStatus: 'Initializing...',
processingStatus: 'Processing...',
loadingStatus: 'Loading...',
clearingStatus: 'Clearing...',
cleaningStatus: 'Cleaning...',
downloadingStatus: 'Downloading...',
// Semantic engine states
semanticEngineReadyStatus: 'Semantic Engine Ready',
semanticEngineInitializingStatus: 'Semantic Engine Initializing...',
semanticEngineInitFailedStatus: 'Semantic Engine Initialization Failed',
semanticEngineNotInitStatus: 'Semantic Engine Not Initialized',
initSemanticEngineButton: 'Initialize Semantic Engine',
reinitializeButton: 'Reinitialize',
// Model states
downloadingModelStatus: 'Downloading Model... {0}%',
switchingModelStatus: 'Switching Model...',
modelLoadedStatus: 'Model Loaded',
modelFailedStatus: 'Model Failed to Load',
// Model descriptions
lightweightModelDescription: 'Lightweight Multilingual Model',
betterThanSmallDescription: 'Slightly larger than e5-small, but better performance',
multilingualModelDescription: 'Multilingual Semantic Model',
// Performance levels
fastPerformance: 'Fast',
balancedPerformance: 'Balanced',
accuratePerformance: 'Accurate',
// Error messages
networkErrorMessage: 'Network connection error, please check network and retry',
modelCorruptedErrorMessage: 'Model file corrupted or incomplete, please retry download',
unknownErrorMessage: 'Unknown error, please check if your network can access HuggingFace',
permissionDeniedErrorMessage: 'Permission denied',
timeoutErrorMessage: 'Operation timed out',
// Data statistics
indexedPagesLabel: 'Indexed Pages',
indexSizeLabel: 'Index Size',
activeTabsLabel: 'Active Tabs',
vectorDocumentsLabel: 'Vector Documents',
cacheSizeLabel: 'Cache Size',
cacheEntriesLabel: 'Cache Entries',
// Data management
clearAllDataButton: 'Clear All Data',
clearAllCacheButton: 'Clear All Cache',
cleanExpiredCacheButton: 'Clean Expired Cache',
exportDataButton: 'Export Data',
importDataButton: 'Import Data',
// Dialog titles
confirmClearDataTitle: 'Confirm Clear Data',
settingsTitle: 'Settings',
aboutTitle: 'About',
helpTitle: 'Help',
// Dialog messages
clearDataWarningMessage:
'This operation will clear all indexed webpage content and vector data, including:',
clearDataList1: 'All webpage text content index',
clearDataList2: 'Vector embedding data',
clearDataList3: 'Search history and cache',
clearDataIrreversibleWarning:
'This operation is irreversible! After clearing, you need to browse webpages again to rebuild the index.',
confirmClearButton: 'Confirm Clear',
// Cache states
cacheDetailsLabel: 'Cache Details',
noCacheDataMessage: 'No cache data',
loadingCacheInfoStatus: 'Loading cache information...',
processingCacheStatus: 'Processing cache...',
expiredLabel: 'Expired',
// Browser integration
bookmarksBarLabel: 'Bookmarks Bar',
newTabLabel: 'New Tab',
currentPageLabel: 'Current Page',
// Accessibility
menuLabel: 'Menu',
navigationLabel: 'Navigation',
mainContentLabel: 'Main Content',
// Future features
languageSelectorLabel: 'Language',
themeLabel: 'Theme',
lightTheme: 'Light',
darkTheme: 'Dark',
autoTheme: 'Auto',
advancedSettingsLabel: 'Advanced Settings',
debugModeLabel: 'Debug Mode',
verboseLoggingLabel: 'Verbose Logging',
// Notifications
successNotification: 'Operation completed successfully',
warningNotification: 'Warning: Please review before proceeding',
infoNotification: 'Information',
configCopiedNotification: 'Configuration copied to clipboard',
dataClearedNotification: 'Data cleared successfully',
// Units
bytesUnit: 'bytes',
kilobytesUnit: 'KB',
megabytesUnit: 'MB',
gigabytesUnit: 'GB',
itemsUnit: 'items',
pagesUnit: 'pages',
// Legacy keys for backwards compatibility
nativeServerConfig: 'Native Server Configuration',
runningStatus: 'Running Status',
refreshStatus: 'Refresh Status',
lastUpdated: 'Last Updated:',
mcpServerConfig: 'MCP Server Configuration',
connectionPort: 'Connection Port',
connecting: 'Connecting...',
disconnect: 'Disconnect',
connect: 'Connect',
semanticEngine: 'Semantic Engine',
embeddingModel: 'Embedding Model',
retry: 'Retry',
indexDataManagement: 'Index Data Management',
clearing: 'Clearing...',
clearAllData: 'Clear All Data',
copyConfig: 'Copy Configuration',
serviceRunning: 'Service Running (Port: {0})',
connectedServiceNotStarted: 'Connected, Service Not Started',
serviceNotConnected: 'Service Not Connected',
detecting: 'Detecting...',
lightweightModel: 'Lightweight Multilingual Model',
betterThanSmall: 'Slightly larger than e5-small, but better performance',
multilingualModel: 'Multilingual Semantic Model',
fast: 'Fast',
balanced: 'Balanced',
accurate: 'Accurate',
semanticEngineReady: 'Semantic Engine Ready',
semanticEngineInitializing: 'Semantic Engine Initializing...',
semanticEngineInitFailed: 'Semantic Engine Initialization Failed',
semanticEngineNotInit: 'Semantic Engine Not Initialized',
downloadingModel: 'Downloading Model... {0}%',
switchingModel: 'Switching Model...',
networkError: 'Network connection error, please check network and retry',
modelCorrupted: 'Model file corrupted or incomplete, please retry download',
unknownError: 'Unknown error, please check if your network can access HuggingFace',
reinitialize: 'Reinitialize',
initializing: 'Initializing...',
initSemanticEngine: 'Initialize Semantic Engine',
indexedPages: 'Indexed Pages',
indexSize: 'Index Size',
activeTabs: 'Active Tabs',
vectorDocuments: 'Vector Documents',
confirmClearData: 'Confirm Clear Data',
clearDataWarning:
'This operation will clear all indexed webpage content and vector data, including:',
clearDataIrreversible:
'This operation is irreversible! After clearing, you need to browse webpages again to rebuild the index.',
confirmClear: 'Confirm Clear',
cancel: 'Cancel',
confirm: 'Confirm',
processing: 'Processing...',
modelCacheManagement: 'Model Cache Management',
cacheSize: 'Cache Size',
cacheEntries: 'Cache Entries',
cacheDetails: 'Cache Details',
noCacheData: 'No cache data',
loadingCacheInfo: 'Loading cache information...',
processingCache: 'Processing cache...',
cleaning: 'Cleaning...',
cleanExpiredCache: 'Clean Expired Cache',
clearAllCache: 'Clear All Cache',
expired: 'Expired',
bookmarksBar: 'Bookmarks Bar',
};
/**
* Safe i18n message getter with fallback support
* @param key Message key
* @param substitutions Optional substitution values
* @returns Localized message or fallback
*/
export function getMessage(key: string, substitutions?: string[]): string {
try {
// Check if Chrome extension APIs are available
if (typeof chrome !== 'undefined' && chrome.i18n && chrome.i18n.getMessage) {
const message = chrome.i18n.getMessage(key, substitutions);
if (message) {
return message;
}
}
} catch (error) {
console.warn(`Failed to get i18n message for key "${key}":`, error);
}
// Fallback to English messages
let fallback = fallbackMessages[key] || key;
// Handle substitutions in fallback messages
if (substitutions && substitutions.length > 0) {
substitutions.forEach((value, index) => {
fallback = fallback.replace(`{${index}}`, value);
});
}
return fallback;
}
/**
* Check if Chrome extension i18n APIs are available
*/
export function isI18nAvailable(): boolean {
try {
return (
typeof chrome !== 'undefined' && chrome.i18n && typeof chrome.i18n.getMessage === 'function'
);
} catch {
return false;
}
}

View File

@@ -0,0 +1,194 @@
/**
* Image processing utility functions
*/
/**
* Create ImageBitmap from data URL (for OffscreenCanvas)
* @param dataUrl Image data URL
* @returns Created ImageBitmap object
*/
export async function createImageBitmapFromUrl(dataUrl: string): Promise<ImageBitmap> {
const response = await fetch(dataUrl);
const blob = await response.blob();
return await createImageBitmap(blob);
}
/**
* Stitch multiple image parts (dataURL) onto a single canvas
* @param parts Array of image parts, each containing dataUrl and y coordinate
* @param totalWidthPx Total width (pixels)
* @param totalHeightPx Total height (pixels)
* @returns Stitched canvas
*/
export async function stitchImages(
parts: { dataUrl: string; y: number }[],
totalWidthPx: number,
totalHeightPx: number,
): Promise<OffscreenCanvas> {
const canvas = new OffscreenCanvas(totalWidthPx, totalHeightPx);
const ctx = canvas.getContext('2d');
if (!ctx) {
throw new Error('Unable to get canvas context');
}
ctx.fillStyle = '#FFFFFF';
ctx.fillRect(0, 0, canvas.width, canvas.height);
for (const part of parts) {
try {
const img = await createImageBitmapFromUrl(part.dataUrl);
const sx = 0;
const sy = 0;
const sWidth = img.width;
let sHeight = img.height;
const dy = part.y;
if (dy + sHeight > totalHeightPx) {
sHeight = totalHeightPx - dy;
}
if (sHeight <= 0) continue;
ctx.drawImage(img, sx, sy, sWidth, sHeight, 0, dy, sWidth, sHeight);
} catch (error) {
console.error('Error stitching image part:', error, part);
}
}
return canvas;
}
/**
* Crop image (from dataURL) to specified rectangle and resize
* @param originalDataUrl Original image data URL
* @param cropRectPx Crop rectangle (physical pixels)
* @param dpr Device pixel ratio
* @param targetWidthOpt Optional target output width (CSS pixels)
* @param targetHeightOpt Optional target output height (CSS pixels)
* @returns Cropped canvas
*/
export async function cropAndResizeImage(
originalDataUrl: string,
cropRectPx: { x: number; y: number; width: number; height: number },
dpr: number = 1,
targetWidthOpt?: number,
targetHeightOpt?: number,
): Promise<OffscreenCanvas> {
const img = await createImageBitmapFromUrl(originalDataUrl);
let sx = cropRectPx.x;
let sy = cropRectPx.y;
let sWidth = cropRectPx.width;
let sHeight = cropRectPx.height;
// Ensure crop area is within image boundaries
if (sx < 0) {
sWidth += sx;
sx = 0;
}
if (sy < 0) {
sHeight += sy;
sy = 0;
}
if (sx + sWidth > img.width) {
sWidth = img.width - sx;
}
if (sy + sHeight > img.height) {
sHeight = img.height - sy;
}
if (sWidth <= 0 || sHeight <= 0) {
throw new Error(
'Invalid calculated crop size (<=0). Element may not be visible or fully captured.',
);
}
const finalCanvasWidthPx = targetWidthOpt ? targetWidthOpt * dpr : sWidth;
const finalCanvasHeightPx = targetHeightOpt ? targetHeightOpt * dpr : sHeight;
const canvas = new OffscreenCanvas(finalCanvasWidthPx, finalCanvasHeightPx);
const ctx = canvas.getContext('2d');
if (!ctx) {
throw new Error('Unable to get canvas context');
}
ctx.drawImage(img, sx, sy, sWidth, sHeight, 0, 0, finalCanvasWidthPx, finalCanvasHeightPx);
return canvas;
}
/**
* Convert canvas to data URL
* @param canvas Canvas
* @param format Image format
* @param quality JPEG quality (0-1)
* @returns Data URL
*/
export async function canvasToDataURL(
canvas: OffscreenCanvas,
format: string = 'image/png',
quality?: number,
): Promise<string> {
const blob = await canvas.convertToBlob({
type: format,
quality: format === 'image/jpeg' ? quality : undefined,
});
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result as string);
reader.onerror = reject;
reader.readAsDataURL(blob);
});
}
/**
* Compresses an image by scaling it and converting it to a target format with a specific quality.
* This is the most effective way to reduce image data size for transport or storage.
*
* @param {string} imageDataUrl - The original image data URL (e.g., from captureVisibleTab).
* @param {object} options - Compression options.
* @param {number} [options.scale=1.0] - The scaling factor for dimensions (e.g., 0.7 for 70%).
* @param {number} [options.quality=0.8] - The quality for lossy formats like JPEG (0.0 to 1.0).
* @param {string} [options.format='image/jpeg'] - The target image format.
* @returns {Promise<{dataUrl: string, mimeType: string}>} A promise that resolves to the compressed image data URL and its MIME type.
*/
export async function compressImage(
imageDataUrl: string,
options: { scale?: number; quality?: number; format?: 'image/jpeg' | 'image/webp' },
): Promise<{ dataUrl: string; mimeType: string }> {
const { scale = 1.0, quality = 0.8, format = 'image/jpeg' } = options;
// 1. Create an ImageBitmap from the original data URL for efficient drawing.
const imageBitmap = await createImageBitmapFromUrl(imageDataUrl);
// 2. Calculate the new dimensions based on the scale factor.
const newWidth = Math.round(imageBitmap.width * scale);
const newHeight = Math.round(imageBitmap.height * scale);
// 3. Use OffscreenCanvas for performance, as it doesn't need to be in the DOM.
const canvas = new OffscreenCanvas(newWidth, newHeight);
const ctx = canvas.getContext('2d');
if (!ctx) {
throw new Error('Failed to get 2D context from OffscreenCanvas');
}
// 4. Draw the original image onto the smaller canvas, effectively resizing it.
ctx.drawImage(imageBitmap, 0, 0, newWidth, newHeight);
// 5. Export the canvas content to the target format with the specified quality.
// This is the step that performs the data compression.
const compressedDataUrl = await canvas.convertToBlob({ type: format, quality: quality });
// A helper to convert blob to data URL since OffscreenCanvas.toDataURL is not standard yet
// on all execution contexts (like service workers).
const dataUrl = await new Promise<string>((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result as string);
reader.readAsDataURL(compressedDataUrl);
});
return { dataUrl, mimeType: format };
}

View File

@@ -0,0 +1,132 @@
class LRUNode<K, V> {
constructor(
public key: K,
public value: V,
public prev: LRUNode<K, V> | null = null,
public next: LRUNode<K, V> | null = null,
public frequency: number = 1,
public lastAccessed: number = Date.now(),
) {}
}
class LRUCache<K = string, V = any> {
private capacity: number;
private cache: Map<K, LRUNode<K, V>>;
private head: LRUNode<K, V>;
private tail: LRUNode<K, V>;
constructor(capacity: number) {
this.capacity = capacity > 0 ? capacity : 100;
this.cache = new Map<K, LRUNode<K, V>>();
this.head = new LRUNode<K, V>(null as any, null as any);
this.tail = new LRUNode<K, V>(null as any, null as any);
this.head.next = this.tail;
this.tail.prev = this.head;
}
private addToHead(node: LRUNode<K, V>): void {
node.prev = this.head;
node.next = this.head.next;
this.head.next!.prev = node;
this.head.next = node;
}
private removeNode(node: LRUNode<K, V>): void {
node.prev!.next = node.next;
node.next!.prev = node.prev;
}
private moveToHead(node: LRUNode<K, V>): void {
this.removeNode(node);
this.addToHead(node);
}
private findVictimNode(): LRUNode<K, V> {
let victim = this.tail.prev!;
let minScore = this.calculateEvictionScore(victim);
let current = this.tail.prev;
let count = 0;
const maxCheck = Math.min(5, this.cache.size);
while (current && current !== this.head && count < maxCheck) {
const score = this.calculateEvictionScore(current);
if (score < minScore) {
minScore = score;
victim = current;
}
current = current.prev;
count++;
}
return victim;
}
private calculateEvictionScore(node: LRUNode<K, V>): number {
const now = Date.now();
const timeSinceAccess = now - node.lastAccessed;
const timeWeight = 1 / (1 + timeSinceAccess / (1000 * 60));
const frequencyWeight = Math.log(node.frequency + 1);
return frequencyWeight * timeWeight;
}
get(key: K): V | null {
const node = this.cache.get(key);
if (node) {
node.frequency++;
node.lastAccessed = Date.now();
this.moveToHead(node);
return node.value;
}
return null;
}
set(key: K, value: V): void {
const existingNode = this.cache.get(key);
if (existingNode) {
existingNode.value = value;
this.moveToHead(existingNode);
} else {
const newNode = new LRUNode(key, value);
if (this.cache.size >= this.capacity) {
const victimNode = this.findVictimNode();
this.removeNode(victimNode);
this.cache.delete(victimNode.key);
}
this.cache.set(key, newNode);
this.addToHead(newNode);
}
}
has(key: K): boolean {
return this.cache.has(key);
}
clear(): void {
this.cache.clear();
this.head.next = this.tail;
this.tail.prev = this.head;
}
get size(): number {
return this.cache.size;
}
/**
* Get cache statistics
*/
getStats(): { size: number; capacity: number; usage: number } {
return {
size: this.cache.size,
capacity: this.capacity,
usage: this.cache.size / this.capacity,
};
}
}
export default LRUCache;

View File

@@ -0,0 +1,369 @@
/**
* Model Cache Manager
*/
const CACHE_NAME = 'onnx-model-cache-v1';
const CACHE_EXPIRY_DAYS = 30;
const MAX_CACHE_SIZE_MB = 500;
export interface CacheMetadata {
timestamp: number;
modelUrl: string;
size: number;
version: string;
}
export interface CacheEntry {
url: string;
size: number;
sizeMB: number;
timestamp: number;
age: string;
expired: boolean;
}
export interface CacheStats {
totalSize: number;
totalSizeMB: number;
entryCount: number;
entries: CacheEntry[];
}
interface CacheEntryDetails {
url: string;
timestamp: number;
size: number;
}
export class ModelCacheManager {
private static instance: ModelCacheManager | null = null;
public static getInstance(): ModelCacheManager {
if (!ModelCacheManager.instance) {
ModelCacheManager.instance = new ModelCacheManager();
}
return ModelCacheManager.instance;
}
private constructor() {}
private getCacheMetadataKey(modelUrl: string): string {
const encodedUrl = encodeURIComponent(modelUrl);
return `https://cache-metadata.local/${encodedUrl}`;
}
private isCacheExpired(metadata: CacheMetadata): boolean {
const now = Date.now();
const expiryTime = metadata.timestamp + CACHE_EXPIRY_DAYS * 24 * 60 * 60 * 1000;
return now > expiryTime;
}
private isMetadataUrl(url: string): boolean {
return url.startsWith('https://cache-metadata.local/');
}
private async collectCacheEntries(): Promise<{
entries: CacheEntryDetails[];
totalSize: number;
entryCount: number;
}> {
const cache = await caches.open(CACHE_NAME);
const keys = await cache.keys();
const entries: CacheEntryDetails[] = [];
let totalSize = 0;
let entryCount = 0;
for (const request of keys) {
if (this.isMetadataUrl(request.url)) continue;
const response = await cache.match(request);
if (response) {
const blob = await response.blob();
const size = blob.size;
totalSize += size;
entryCount++;
const metadataResponse = await cache.match(this.getCacheMetadataKey(request.url));
let timestamp = 0;
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
timestamp = metadata.timestamp;
} catch (error) {
console.warn('Failed to parse cache metadata:', error);
}
}
entries.push({
url: request.url,
timestamp,
size,
});
}
}
return { entries, totalSize, entryCount };
}
public async cleanupCacheOnDemand(newDataSize: number = 0): Promise<void> {
const cache = await caches.open(CACHE_NAME);
const { entries, totalSize } = await this.collectCacheEntries();
const maxSizeBytes = MAX_CACHE_SIZE_MB * 1024 * 1024;
const projectedSize = totalSize + newDataSize;
if (projectedSize <= maxSizeBytes) {
return;
}
console.log(
`Cache size (${(totalSize / 1024 / 1024).toFixed(2)}MB) + new data (${(newDataSize / 1024 / 1024).toFixed(2)}MB) exceeds limit (${MAX_CACHE_SIZE_MB}MB), cleaning up...`,
);
const expiredEntries: CacheEntryDetails[] = [];
const validEntries: CacheEntryDetails[] = [];
for (const entry of entries) {
const metadataResponse = await cache.match(this.getCacheMetadataKey(entry.url));
let isExpired = false;
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
isExpired = this.isCacheExpired(metadata);
} catch (error) {
isExpired = true;
}
} else {
isExpired = true;
}
if (isExpired) {
expiredEntries.push(entry);
} else {
validEntries.push(entry);
}
}
let currentSize = totalSize;
for (const entry of expiredEntries) {
await cache.delete(entry.url);
await cache.delete(this.getCacheMetadataKey(entry.url));
currentSize -= entry.size;
console.log(
`Cleaned up expired cache entry: ${entry.url} (${(entry.size / 1024 / 1024).toFixed(2)}MB)`,
);
}
if (currentSize + newDataSize > maxSizeBytes) {
validEntries.sort((a, b) => a.timestamp - b.timestamp);
for (const entry of validEntries) {
if (currentSize + newDataSize <= maxSizeBytes) break;
await cache.delete(entry.url);
await cache.delete(this.getCacheMetadataKey(entry.url));
currentSize -= entry.size;
console.log(
`Cleaned up old cache entry: ${entry.url} (${(entry.size / 1024 / 1024).toFixed(2)}MB)`,
);
}
}
console.log(`Cache cleanup complete. New size: ${(currentSize / 1024 / 1024).toFixed(2)}MB`);
}
public async storeCacheMetadata(modelUrl: string, size: number): Promise<void> {
const cache = await caches.open(CACHE_NAME);
const metadata: CacheMetadata = {
timestamp: Date.now(),
modelUrl,
size,
version: CACHE_NAME,
};
const metadataResponse = new Response(JSON.stringify(metadata), {
headers: { 'Content-Type': 'application/json' },
});
await cache.put(this.getCacheMetadataKey(modelUrl), metadataResponse);
}
public async getCachedModelData(modelUrl: string): Promise<ArrayBuffer | null> {
const cache = await caches.open(CACHE_NAME);
const cachedResponse = await cache.match(modelUrl);
if (!cachedResponse) {
return null;
}
const metadataResponse = await cache.match(this.getCacheMetadataKey(modelUrl));
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
if (!this.isCacheExpired(metadata)) {
console.log('Model found in cache and not expired. Loading from cache.');
return cachedResponse.arrayBuffer();
} else {
console.log('Cached model is expired, removing...');
await this.deleteCacheEntry(modelUrl);
return null;
}
} catch (error) {
console.warn('Failed to parse cache metadata, treating as expired:', error);
await this.deleteCacheEntry(modelUrl);
return null;
}
} else {
console.log('Cached model has no metadata, treating as expired...');
await this.deleteCacheEntry(modelUrl);
return null;
}
}
public async storeModelData(modelUrl: string, data: ArrayBuffer): Promise<void> {
await this.cleanupCacheOnDemand(data.byteLength);
const cache = await caches.open(CACHE_NAME);
const response = new Response(data);
await cache.put(modelUrl, response);
await this.storeCacheMetadata(modelUrl, data.byteLength);
console.log(
`Model cached successfully (${(data.byteLength / 1024 / 1024).toFixed(2)}MB): ${modelUrl}`,
);
}
public async deleteCacheEntry(modelUrl: string): Promise<void> {
const cache = await caches.open(CACHE_NAME);
await cache.delete(modelUrl);
await cache.delete(this.getCacheMetadataKey(modelUrl));
}
public async clearAllCache(): Promise<void> {
const cache = await caches.open(CACHE_NAME);
const keys = await cache.keys();
for (const request of keys) {
await cache.delete(request);
}
console.log('All model cache entries cleared');
}
public async getCacheStats(): Promise<CacheStats> {
const { entries, totalSize, entryCount } = await this.collectCacheEntries();
const cache = await caches.open(CACHE_NAME);
const cacheEntries: CacheEntry[] = [];
for (const entry of entries) {
const metadataResponse = await cache.match(this.getCacheMetadataKey(entry.url));
let expired = false;
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
expired = this.isCacheExpired(metadata);
} catch (error) {
expired = true;
}
} else {
expired = true;
}
const age =
entry.timestamp > 0
? `${Math.round((Date.now() - entry.timestamp) / (1000 * 60 * 60 * 24))} days`
: 'unknown';
cacheEntries.push({
url: entry.url,
size: entry.size,
sizeMB: Number((entry.size / 1024 / 1024).toFixed(2)),
timestamp: entry.timestamp,
age,
expired,
});
}
return {
totalSize,
totalSizeMB: Number((totalSize / 1024 / 1024).toFixed(2)),
entryCount,
entries: cacheEntries.sort((a, b) => b.timestamp - a.timestamp),
};
}
public async manualCleanup(): Promise<void> {
await this.cleanupCacheOnDemand(0);
console.log('Manual cache cleanup completed');
}
/**
* Check if a specific model is cached and not expired
* @param modelUrl The model URL to check
* @returns Promise<boolean> True if model is cached and valid
*/
public async isModelCached(modelUrl: string): Promise<boolean> {
try {
const cache = await caches.open(CACHE_NAME);
const cachedResponse = await cache.match(modelUrl);
if (!cachedResponse) {
return false;
}
const metadataResponse = await cache.match(this.getCacheMetadataKey(modelUrl));
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
return !this.isCacheExpired(metadata);
} catch (error) {
console.warn('Failed to parse cache metadata for cache check:', error);
return false;
}
} else {
// No metadata means expired
return false;
}
} catch (error) {
console.error('Error checking model cache:', error);
return false;
}
}
/**
* Check if any valid (non-expired) model cache exists
* @returns Promise<boolean> True if at least one valid model cache exists
*/
public async hasAnyValidCache(): Promise<boolean> {
try {
const cache = await caches.open(CACHE_NAME);
const keys = await cache.keys();
for (const request of keys) {
if (this.isMetadataUrl(request.url)) continue;
const metadataResponse = await cache.match(this.getCacheMetadataKey(request.url));
if (metadataResponse) {
try {
const metadata: CacheMetadata = await metadataResponse.json();
if (!this.isCacheExpired(metadata)) {
return true; // Found at least one valid cache
}
} catch (error) {
// Skip invalid metadata
continue;
}
}
}
return false;
} catch (error) {
console.error('Error checking for valid cache:', error);
return false;
}
}
}

View File

@@ -0,0 +1,108 @@
/**
* Offscreen Document manager
* Ensures only one offscreen document is created across the entire extension to avoid conflicts
*/
export class OffscreenManager {
private static instance: OffscreenManager | null = null;
private isCreated = false;
private isCreating = false;
private createPromise: Promise<void> | null = null;
private constructor() {}
/**
* Get singleton instance
*/
public static getInstance(): OffscreenManager {
if (!OffscreenManager.instance) {
OffscreenManager.instance = new OffscreenManager();
}
return OffscreenManager.instance;
}
/**
* Ensure offscreen document exists
*/
public async ensureOffscreenDocument(): Promise<void> {
if (this.isCreated) {
return;
}
if (this.isCreating && this.createPromise) {
return this.createPromise;
}
this.isCreating = true;
this.createPromise = this._doCreateOffscreenDocument().finally(() => {
this.isCreating = false;
});
return this.createPromise;
}
private async _doCreateOffscreenDocument(): Promise<void> {
try {
if (!chrome.offscreen) {
throw new Error('Offscreen API not available. Chrome 109+ required.');
}
const existingContexts = await (chrome.runtime as any).getContexts({
contextTypes: ['OFFSCREEN_DOCUMENT'],
});
if (existingContexts && existingContexts.length > 0) {
console.log('OffscreenManager: Offscreen document already exists');
this.isCreated = true;
return;
}
await chrome.offscreen.createDocument({
url: 'offscreen.html',
reasons: ['WORKERS'],
justification: 'Need to run semantic similarity engine with workers',
});
this.isCreated = true;
console.log('OffscreenManager: Offscreen document created successfully');
} catch (error) {
console.error('OffscreenManager: Failed to create offscreen document:', error);
this.isCreated = false;
throw error;
}
}
/**
* Check if offscreen document is created
*/
public isOffscreenDocumentCreated(): boolean {
return this.isCreated;
}
/**
* Close offscreen document
*/
public async closeOffscreenDocument(): Promise<void> {
try {
if (chrome.offscreen && this.isCreated) {
await chrome.offscreen.closeDocument();
this.isCreated = false;
console.log('OffscreenManager: Offscreen document closed');
}
} catch (error) {
console.error('OffscreenManager: Failed to close offscreen document:', error);
}
}
/**
* Reset state (for testing)
*/
public reset(): void {
this.isCreated = false;
this.isCreating = false;
this.createPromise = null;
}
}
export const offscreenManager = OffscreenManager.getInstance();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,496 @@
/**
* SIMD-optimized mathematical computation engine
* Uses WebAssembly + SIMD instructions to accelerate vector calculations
*/
interface SIMDMathWasm {
free(): void;
cosine_similarity(vec_a: Float32Array, vec_b: Float32Array): number;
batch_similarity(vectors: Float32Array, query: Float32Array, vector_dim: number): Float32Array;
similarity_matrix(
vectors_a: Float32Array,
vectors_b: Float32Array,
vector_dim: number,
): Float32Array;
}
interface WasmModule {
SIMDMath: new () => SIMDMathWasm;
memory: WebAssembly.Memory;
default: (module_or_path?: any) => Promise<any>;
}
export class SIMDMathEngine {
private wasmModule: WasmModule | null = null;
private simdMath: SIMDMathWasm | null = null;
private isInitialized = false;
private isInitializing = false;
private initPromise: Promise<void> | null = null;
private alignedBufferPool: Map<number, Float32Array[]> = new Map();
private maxPoolSize = 5;
async initialize(): Promise<void> {
if (this.isInitialized) return;
if (this.isInitializing && this.initPromise) return this.initPromise;
this.isInitializing = true;
this.initPromise = this._doInitialize().finally(() => {
this.isInitializing = false;
});
return this.initPromise;
}
private async _doInitialize(): Promise<void> {
try {
console.log('SIMDMathEngine: Initializing WebAssembly module...');
const wasmUrl = chrome.runtime.getURL('workers/simd_math.js');
const wasmModule = await import(wasmUrl);
const wasmInstance = await wasmModule.default();
this.wasmModule = {
SIMDMath: wasmModule.SIMDMath,
memory: wasmInstance.memory,
default: wasmModule.default,
};
this.simdMath = new this.wasmModule.SIMDMath();
this.isInitialized = true;
console.log('SIMDMathEngine: WebAssembly module initialized successfully');
} catch (error) {
console.error('SIMDMathEngine: Failed to initialize WebAssembly module:', error);
this.isInitialized = false;
throw error;
}
}
/**
* Get aligned buffer (16-byte aligned, suitable for SIMD)
*/
private getAlignedBuffer(size: number): Float32Array {
if (!this.alignedBufferPool.has(size)) {
this.alignedBufferPool.set(size, []);
}
const pool = this.alignedBufferPool.get(size)!;
if (pool.length > 0) {
return pool.pop()!;
}
// Create 16-byte aligned buffer
const buffer = new ArrayBuffer(size * 4 + 15);
const alignedOffset = (16 - (buffer.byteLength % 16)) % 16;
return new Float32Array(buffer, alignedOffset, size);
}
/**
* Release aligned buffer back to pool
*/
private releaseAlignedBuffer(buffer: Float32Array): void {
const size = buffer.length;
const pool = this.alignedBufferPool.get(size);
if (pool && pool.length < this.maxPoolSize) {
buffer.fill(0); // Clear to zero
pool.push(buffer);
}
}
/**
* Check if vector is already aligned
*/
private isAligned(array: Float32Array): boolean {
return array.byteOffset % 16 === 0;
}
/**
* Ensure vector alignment, create aligned copy if not aligned
*/
private ensureAligned(array: Float32Array): { aligned: Float32Array; needsRelease: boolean } {
if (this.isAligned(array)) {
return { aligned: array, needsRelease: false };
}
const aligned = this.getAlignedBuffer(array.length);
aligned.set(array);
return { aligned, needsRelease: true };
}
/**
* SIMD-optimized cosine similarity calculation
*/
async cosineSimilarity(vecA: Float32Array, vecB: Float32Array): Promise<number> {
if (!this.isInitialized) {
await this.initialize();
}
if (!this.simdMath) {
throw new Error('SIMD math engine not initialized');
}
// Ensure vector alignment
const { aligned: alignedA, needsRelease: releaseA } = this.ensureAligned(vecA);
const { aligned: alignedB, needsRelease: releaseB } = this.ensureAligned(vecB);
try {
const result = this.simdMath.cosine_similarity(alignedA, alignedB);
return result;
} finally {
// Release temporary buffers
if (releaseA) this.releaseAlignedBuffer(alignedA);
if (releaseB) this.releaseAlignedBuffer(alignedB);
}
}
/**
* Batch similarity calculation
*/
async batchSimilarity(vectors: Float32Array[], query: Float32Array): Promise<number[]> {
if (!this.isInitialized) {
await this.initialize();
}
if (!this.simdMath) {
throw new Error('SIMD math engine not initialized');
}
const vectorDim = query.length;
const numVectors = vectors.length;
// Pack all vectors into contiguous memory layout
const packedVectors = this.getAlignedBuffer(numVectors * vectorDim);
const { aligned: alignedQuery, needsRelease: releaseQuery } = this.ensureAligned(query);
try {
// Copy vector data
let offset = 0;
for (const vector of vectors) {
packedVectors.set(vector, offset);
offset += vectorDim;
}
// Batch calculation
const results = this.simdMath.batch_similarity(packedVectors, alignedQuery, vectorDim);
return Array.from(results);
} finally {
this.releaseAlignedBuffer(packedVectors);
if (releaseQuery) this.releaseAlignedBuffer(alignedQuery);
}
}
/**
* Similarity matrix calculation
*/
async similarityMatrix(vectorsA: Float32Array[], vectorsB: Float32Array[]): Promise<number[][]> {
if (!this.isInitialized) {
await this.initialize();
}
if (!this.simdMath || vectorsA.length === 0 || vectorsB.length === 0) {
return [];
}
const vectorDim = vectorsA[0].length;
const numA = vectorsA.length;
const numB = vectorsB.length;
// Pack vectors
const packedA = this.getAlignedBuffer(numA * vectorDim);
const packedB = this.getAlignedBuffer(numB * vectorDim);
try {
// Copy data
let offsetA = 0;
for (const vector of vectorsA) {
packedA.set(vector, offsetA);
offsetA += vectorDim;
}
let offsetB = 0;
for (const vector of vectorsB) {
packedB.set(vector, offsetB);
offsetB += vectorDim;
}
// Calculate matrix
const flatResults = this.simdMath.similarity_matrix(packedA, packedB, vectorDim);
// Convert to 2D array
const matrix: number[][] = [];
for (let i = 0; i < numA; i++) {
const row: number[] = [];
for (let j = 0; j < numB; j++) {
row.push(flatResults[i * numB + j]);
}
matrix.push(row);
}
return matrix;
} finally {
this.releaseAlignedBuffer(packedA);
this.releaseAlignedBuffer(packedB);
}
}
/**
* Check SIMD support
*/
static async checkSIMDSupport(): Promise<boolean> {
try {
console.log('SIMDMathEngine: Checking SIMD support...');
// Get browser information
const userAgent = navigator.userAgent;
const browserInfo = SIMDMathEngine.getBrowserInfo();
console.log('Browser info:', browserInfo);
console.log('User Agent:', userAgent);
// Check WebAssembly basic support
if (typeof WebAssembly !== 'object') {
console.log('WebAssembly not supported');
return false;
}
console.log('✅ WebAssembly basic support: OK');
// Check WebAssembly.validate method
if (typeof WebAssembly.validate !== 'function') {
console.log('❌ WebAssembly.validate not available');
return false;
}
console.log('✅ WebAssembly.validate: OK');
// Test basic WebAssembly module validation
const basicWasm = new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00]);
const basicValid = WebAssembly.validate(basicWasm);
console.log('✅ Basic WASM validation:', basicValid);
// Check WebAssembly SIMD support - using correct SIMD test module
console.log('Testing SIMD WASM module...');
// Method 1: Use standard SIMD detection bytecode
let wasmSIMDSupported = false;
try {
// This is a minimal SIMD module containing v128.const instruction
const simdWasm = new Uint8Array([
0x00,
0x61,
0x73,
0x6d, // WASM magic
0x01,
0x00,
0x00,
0x00, // version
0x01,
0x05,
0x01, // type section
0x60,
0x00,
0x01,
0x7b, // function type: () -> v128
0x03,
0x02,
0x01,
0x00, // function section
0x0a,
0x0a,
0x01, // code section
0x08,
0x00, // function body
0xfd,
0x0c, // v128.const
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x0b, // end
]);
wasmSIMDSupported = WebAssembly.validate(simdWasm);
console.log('Method 1 - Standard SIMD test result:', wasmSIMDSupported);
} catch (error) {
console.log('Method 1 failed:', error);
}
// Method 2: If method 1 fails, try simpler SIMD instruction
if (!wasmSIMDSupported) {
try {
// Test using i32x4.splat instruction
const simpleSimdWasm = new Uint8Array([
0x00,
0x61,
0x73,
0x6d, // WASM magic
0x01,
0x00,
0x00,
0x00, // version
0x01,
0x06,
0x01, // type section
0x60,
0x01,
0x7f,
0x01,
0x7b, // function type: (i32) -> v128
0x03,
0x02,
0x01,
0x00, // function section
0x0a,
0x07,
0x01, // code section
0x05,
0x00, // function body
0x20,
0x00, // local.get 0
0xfd,
0x0d, // i32x4.splat
0x0b, // end
]);
wasmSIMDSupported = WebAssembly.validate(simpleSimdWasm);
console.log('Method 2 - Simple SIMD test result:', wasmSIMDSupported);
} catch (error) {
console.log('Method 2 failed:', error);
}
}
// Method 3: If previous methods fail, try detecting specific SIMD features
if (!wasmSIMDSupported) {
try {
// Check if SIMD feature flags are supported
const featureTest = WebAssembly.validate(
new Uint8Array([0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00]),
);
if (featureTest) {
// In Chrome, if basic WebAssembly works and version >= 91, SIMD is usually available
const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
if (chromeMatch && parseInt(chromeMatch[1]) >= 91) {
console.log('Method 3 - Chrome version check: SIMD should be available');
wasmSIMDSupported = true;
}
}
} catch (error) {
console.log('Method 3 failed:', error);
}
}
// Output final result
if (!wasmSIMDSupported) {
console.log('❌ SIMD not supported. Browser requirements:');
console.log('- Chrome 91+, Firefox 89+, Safari 16.4+, Edge 91+');
console.log('Your browser should support SIMD. Possible issues:');
console.log('1. Extension context limitations');
console.log('2. Security policies');
console.log('3. Feature flags disabled');
} else {
console.log('✅ SIMD supported!');
}
return wasmSIMDSupported;
} catch (error: any) {
console.error('SIMD support check failed:', error);
if (error instanceof Error) {
console.error('Error details:', {
name: error.name,
message: error.message,
stack: error.stack,
});
}
return false;
}
}
/**
* Get browser information
*/
static getBrowserInfo(): { name: string; version: string; supported: boolean } {
const userAgent = navigator.userAgent;
let browserName = 'Unknown';
let version = 'Unknown';
let supported = false;
// Chrome
if (userAgent.includes('Chrome/')) {
browserName = 'Chrome';
const match = userAgent.match(/Chrome\/(\d+)/);
if (match) {
version = match[1];
supported = parseInt(version) >= 91;
}
}
// Firefox
else if (userAgent.includes('Firefox/')) {
browserName = 'Firefox';
const match = userAgent.match(/Firefox\/(\d+)/);
if (match) {
version = match[1];
supported = parseInt(version) >= 89;
}
}
// Safari
else if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) {
browserName = 'Safari';
const match = userAgent.match(/Version\/(\d+\.\d+)/);
if (match) {
version = match[1];
const versionNum = parseFloat(version);
supported = versionNum >= 16.4;
}
}
// Edge
else if (userAgent.includes('Edg/')) {
browserName = 'Edge';
const match = userAgent.match(/Edg\/(\d+)/);
if (match) {
version = match[1];
supported = parseInt(version) >= 91;
}
}
return { name: browserName, version, supported };
}
getStats() {
return {
isInitialized: this.isInitialized,
isInitializing: this.isInitializing,
bufferPoolStats: Array.from(this.alignedBufferPool.entries()).map(([size, buffers]) => ({
size,
pooled: buffers.length,
maxPoolSize: this.maxPoolSize,
})),
};
}
dispose(): void {
if (this.simdMath) {
try {
this.simdMath.free();
} catch (error) {
console.warn('Failed to free SIMD math instance:', error);
}
this.simdMath = null;
}
this.alignedBufferPool.clear();
this.wasmModule = null;
this.isInitialized = false;
this.isInitializing = false;
this.initPromise = null;
}
}

View File

@@ -0,0 +1,264 @@
/**
* Text chunking utility
* Based on semantic chunking strategy, splits long text into small chunks suitable for vectorization
*/
export interface TextChunk {
text: string;
source: string;
index: number;
wordCount: number;
}
export interface ChunkingOptions {
maxWordsPerChunk?: number;
overlapSentences?: number;
minChunkLength?: number;
includeTitle?: boolean;
}
export class TextChunker {
private readonly defaultOptions: Required<ChunkingOptions> = {
maxWordsPerChunk: 80,
overlapSentences: 1,
minChunkLength: 20,
includeTitle: true,
};
public chunkText(content: string, title?: string, options?: ChunkingOptions): TextChunk[] {
const opts = { ...this.defaultOptions, ...options };
const chunks: TextChunk[] = [];
if (opts.includeTitle && title?.trim() && title.trim().length > 5) {
chunks.push({
text: title.trim(),
source: 'title',
index: 0,
wordCount: title.trim().split(/\s+/).length,
});
}
const cleanContent = content.trim();
if (!cleanContent) {
return chunks;
}
const sentences = this.splitIntoSentences(cleanContent);
if (sentences.length === 0) {
return this.fallbackChunking(cleanContent, chunks, opts);
}
const hasLongSentences = sentences.some(
(s: string) => s.split(/\s+/).length > opts.maxWordsPerChunk,
);
if (hasLongSentences) {
return this.mixedChunking(sentences, chunks, opts);
}
return this.groupSentencesIntoChunks(sentences, chunks, opts);
}
private splitIntoSentences(content: string): string[] {
const processedContent = content
.replace(/([。!?])\s*/g, '$1\n')
.replace(/([.!?])\s+(?=[A-Z])/g, '$1\n')
.replace(/([.!?]["'])\s+(?=[A-Z])/g, '$1\n')
.replace(/([.!?])\s*$/gm, '$1\n')
.replace(/([。!?][""])\s*/g, '$1\n')
.replace(/\n\s*\n/g, '\n');
const sentences = processedContent
.split('\n')
.map((s) => s.trim())
.filter((s) => s.length > 15);
if (sentences.length < 3 && content.length > 500) {
return this.aggressiveSentenceSplitting(content);
}
return sentences;
}
private aggressiveSentenceSplitting(content: string): string[] {
const sentences = content
.replace(/([.!?。!?])/g, '$1\n')
.replace(/([;:])/g, '$1\n')
.replace(/([)])\s*(?=[\u4e00-\u9fa5A-Z])/g, '$1\n')
.split('\n')
.map((s) => s.trim())
.filter((s) => s.length > 15);
const maxWordsPerChunk = 80;
const finalSentences: string[] = [];
for (const sentence of sentences) {
const words = sentence.split(/\s+/);
if (words.length <= maxWordsPerChunk) {
finalSentences.push(sentence);
} else {
const overlapWords = 5;
for (let i = 0; i < words.length; i += maxWordsPerChunk - overlapWords) {
const chunkWords = words.slice(i, i + maxWordsPerChunk);
const chunkText = chunkWords.join(' ');
if (chunkText.length > 15) {
finalSentences.push(chunkText);
}
}
}
}
return finalSentences;
}
/**
* Group sentences into chunks
*/
private groupSentencesIntoChunks(
sentences: string[],
existingChunks: TextChunk[],
options: Required<ChunkingOptions>,
): TextChunk[] {
const chunks = [...existingChunks];
let chunkIndex = chunks.length;
let i = 0;
while (i < sentences.length) {
let currentChunkText = '';
let currentWordCount = 0;
let sentencesUsed = 0;
while (i + sentencesUsed < sentences.length && currentWordCount < options.maxWordsPerChunk) {
const sentence = sentences[i + sentencesUsed];
const sentenceWords = sentence.split(/\s+/).length;
if (currentWordCount + sentenceWords > options.maxWordsPerChunk && currentWordCount > 0) {
break;
}
currentChunkText += (currentChunkText ? ' ' : '') + sentence;
currentWordCount += sentenceWords;
sentencesUsed++;
}
if (currentChunkText.trim().length > options.minChunkLength) {
chunks.push({
text: currentChunkText.trim(),
source: `content_chunk_${chunkIndex}`,
index: chunkIndex,
wordCount: currentWordCount,
});
chunkIndex++;
}
i += Math.max(1, sentencesUsed - options.overlapSentences);
}
return chunks;
}
/**
* Mixed chunking method (handles long sentences)
*/
private mixedChunking(
sentences: string[],
existingChunks: TextChunk[],
options: Required<ChunkingOptions>,
): TextChunk[] {
const chunks = [...existingChunks];
let chunkIndex = chunks.length;
for (const sentence of sentences) {
const sentenceWords = sentence.split(/\s+/).length;
if (sentenceWords <= options.maxWordsPerChunk) {
chunks.push({
text: sentence.trim(),
source: `sentence_chunk_${chunkIndex}`,
index: chunkIndex,
wordCount: sentenceWords,
});
chunkIndex++;
} else {
const words = sentence.split(/\s+/);
for (let i = 0; i < words.length; i += options.maxWordsPerChunk) {
const chunkWords = words.slice(i, i + options.maxWordsPerChunk);
const chunkText = chunkWords.join(' ');
if (chunkText.length > options.minChunkLength) {
chunks.push({
text: chunkText,
source: `long_sentence_chunk_${chunkIndex}_part_${Math.floor(i / options.maxWordsPerChunk)}`,
index: chunkIndex,
wordCount: chunkWords.length,
});
}
}
chunkIndex++;
}
}
return chunks;
}
/**
* Fallback chunking (when sentence splitting fails)
*/
private fallbackChunking(
content: string,
existingChunks: TextChunk[],
options: Required<ChunkingOptions>,
): TextChunk[] {
const chunks = [...existingChunks];
let chunkIndex = chunks.length;
const paragraphs = content
.split(/\n\s*\n/)
.filter((p) => p.trim().length > options.minChunkLength);
if (paragraphs.length > 1) {
paragraphs.forEach((paragraph, index) => {
const cleanParagraph = paragraph.trim();
if (cleanParagraph.length > 0) {
const words = cleanParagraph.split(/\s+/);
const maxWordsPerChunk = 150;
for (let i = 0; i < words.length; i += maxWordsPerChunk) {
const chunkWords = words.slice(i, i + maxWordsPerChunk);
const chunkText = chunkWords.join(' ');
if (chunkText.length > options.minChunkLength) {
chunks.push({
text: chunkText,
source: `paragraph_${index}_chunk_${Math.floor(i / maxWordsPerChunk)}`,
index: chunkIndex,
wordCount: chunkWords.length,
});
chunkIndex++;
}
}
}
});
} else {
const words = content.trim().split(/\s+/);
const maxWordsPerChunk = 150;
for (let i = 0; i < words.length; i += maxWordsPerChunk) {
const chunkWords = words.slice(i, i + maxWordsPerChunk);
const chunkText = chunkWords.join(' ');
if (chunkText.length > options.minChunkLength) {
chunks.push({
text: chunkText,
source: `content_chunk_${Math.floor(i / maxWordsPerChunk)}`,
index: chunkIndex,
wordCount: chunkWords.length,
});
chunkIndex++;
}
}
}
return chunks;
}
}

File diff suppressed because it is too large Load Diff