Files
broswer-automation/app/chrome-extension/entrypoints/background/tools/browser/vector-search.ts
nasir@endelospay.com d97cad1736 first commit
2025-08-12 02:54:17 +05:00

309 lines
9.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Vectorized tab content search tool
* Uses vector database for efficient semantic search
*/
import { createErrorResponse, ToolResult } from '@/common/tool-handler';
import { BaseBrowserToolExecutor } from '../base-browser';
import { TOOL_NAMES } from 'chrome-mcp-shared';
import { ContentIndexer } from '@/utils/content-indexer';
import { LIMITS, ERROR_MESSAGES } from '@/common/constants';
import type { SearchResult } from '@/utils/vector-database';
interface VectorSearchResult {
tabId: number;
url: string;
title: string;
semanticScore: number;
matchedSnippet: string;
chunkSource: string;
timestamp: number;
}
/**
* Tool for vectorized search of tab content using semantic similarity
*/
class VectorSearchTabsContentTool extends BaseBrowserToolExecutor {
name = TOOL_NAMES.BROWSER.SEARCH_TABS_CONTENT;
private contentIndexer: ContentIndexer;
private isInitialized = false;
constructor() {
super();
this.contentIndexer = new ContentIndexer({
autoIndex: true,
maxChunksPerPage: LIMITS.MAX_SEARCH_RESULTS,
skipDuplicates: true,
});
}
private async initializeIndexer(): Promise<void> {
try {
await this.contentIndexer.initialize();
this.isInitialized = true;
console.log('VectorSearchTabsContentTool: Content indexer initialized successfully');
} catch (error) {
console.error('VectorSearchTabsContentTool: Failed to initialize content indexer:', error);
this.isInitialized = false;
}
}
async execute(args: { query: string }): Promise<ToolResult> {
try {
const { query } = args;
if (!query || query.trim().length === 0) {
return createErrorResponse(
ERROR_MESSAGES.INVALID_PARAMETERS + ': Query parameter is required and cannot be empty',
);
}
console.log(`VectorSearchTabsContentTool: Starting vector search with query: "${query}"`);
// Check semantic engine status
if (!this.contentIndexer.isSemanticEngineReady()) {
if (this.contentIndexer.isSemanticEngineInitializing()) {
return createErrorResponse(
'Vector search engine is still initializing (model downloading). Please wait a moment and try again.',
);
} else {
// Try to initialize
console.log('VectorSearchTabsContentTool: Initializing content indexer...');
await this.initializeIndexer();
// Check semantic engine status again
if (!this.contentIndexer.isSemanticEngineReady()) {
return createErrorResponse('Failed to initialize vector search engine');
}
}
}
// Execute vector search, get more results for deduplication
const searchResults = await this.contentIndexer.searchContent(query, 50);
// Convert search results format
const vectorSearchResults = this.convertSearchResults(searchResults);
// Deduplicate by tab, keep only the highest similarity fragment per tab
const deduplicatedResults = this.deduplicateByTab(vectorSearchResults);
// Sort by similarity and get top 10 results
const topResults = deduplicatedResults
.sort((a, b) => b.semanticScore - a.semanticScore)
.slice(0, 10);
// Get index statistics
const stats = this.contentIndexer.getStats();
const result = {
success: true,
totalTabsSearched: stats.totalTabs,
matchedTabsCount: topResults.length,
vectorSearchEnabled: true,
indexStats: {
totalDocuments: stats.totalDocuments,
totalTabs: stats.totalTabs,
indexedPages: stats.indexedPages,
semanticEngineReady: stats.semanticEngineReady,
semanticEngineInitializing: stats.semanticEngineInitializing,
},
matchedTabs: topResults.map((result) => ({
tabId: result.tabId,
url: result.url,
title: result.title,
semanticScore: result.semanticScore,
matchedSnippets: [result.matchedSnippet],
chunkSource: result.chunkSource,
timestamp: result.timestamp,
})),
};
console.log(
`VectorSearchTabsContentTool: Found ${topResults.length} results with vector search`,
);
return {
content: [
{
type: 'text',
text: JSON.stringify(result, null, 2),
},
],
isError: false,
};
} catch (error) {
console.error('VectorSearchTabsContentTool: Search failed:', error);
return createErrorResponse(
`Vector search failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
/**
* Ensure all tabs are indexed
*/
private async ensureTabsIndexed(tabs: chrome.tabs.Tab[]): Promise<void> {
const indexPromises = tabs
.filter((tab) => tab.id)
.map(async (tab) => {
try {
await this.contentIndexer.indexTabContent(tab.id!);
} catch (error) {
console.warn(`VectorSearchTabsContentTool: Failed to index tab ${tab.id}:`, error);
}
});
await Promise.allSettled(indexPromises);
}
/**
* Convert search results format
*/
private convertSearchResults(searchResults: SearchResult[]): VectorSearchResult[] {
return searchResults.map((result) => ({
tabId: result.document.tabId,
url: result.document.url,
title: result.document.title,
semanticScore: result.similarity,
matchedSnippet: this.extractSnippet(result.document.chunk.text),
chunkSource: result.document.chunk.source,
timestamp: result.document.timestamp,
}));
}
/**
* Deduplicate by tab, keep only the highest similarity fragment per tab
*/
private deduplicateByTab(results: VectorSearchResult[]): VectorSearchResult[] {
const tabMap = new Map<number, VectorSearchResult>();
for (const result of results) {
const existingResult = tabMap.get(result.tabId);
// If this tab has no result yet, or current result has higher similarity, update it
if (!existingResult || result.semanticScore > existingResult.semanticScore) {
tabMap.set(result.tabId, result);
}
}
return Array.from(tabMap.values());
}
/**
* Extract text snippet for display
*/
private extractSnippet(text: string, maxLength: number = 200): string {
if (text.length <= maxLength) {
return text;
}
// Try to truncate at sentence boundary
const truncated = text.substring(0, maxLength);
const lastSentenceEnd = Math.max(
truncated.lastIndexOf('.'),
truncated.lastIndexOf('!'),
truncated.lastIndexOf('?'),
truncated.lastIndexOf('。'),
truncated.lastIndexOf(''),
truncated.lastIndexOf(''),
);
if (lastSentenceEnd > maxLength * 0.7) {
return truncated.substring(0, lastSentenceEnd + 1);
}
// If no suitable sentence boundary found, truncate at word boundary
const lastSpaceIndex = truncated.lastIndexOf(' ');
if (lastSpaceIndex > maxLength * 0.8) {
return truncated.substring(0, lastSpaceIndex) + '...';
}
return truncated + '...';
}
/**
* Get index statistics
*/
public async getIndexStats() {
if (!this.isInitialized) {
// Don't automatically initialize - just return basic stats
return {
totalDocuments: 0,
totalTabs: 0,
indexSize: 0,
indexedPages: 0,
isInitialized: false,
semanticEngineReady: false,
semanticEngineInitializing: false,
};
}
return this.contentIndexer.getStats();
}
/**
* Manually rebuild index
*/
public async rebuildIndex(): Promise<void> {
if (!this.isInitialized) {
await this.initializeIndexer();
}
try {
// Clear existing indexes
await this.contentIndexer.clearAllIndexes();
// Get all tabs and reindex
const windows = await chrome.windows.getAll({ populate: true });
const allTabs: chrome.tabs.Tab[] = [];
for (const window of windows) {
if (window.tabs) {
allTabs.push(...window.tabs);
}
}
const validTabs = allTabs.filter(
(tab) =>
tab.id &&
tab.url &&
!tab.url.startsWith('chrome://') &&
!tab.url.startsWith('chrome-extension://') &&
!tab.url.startsWith('edge://') &&
!tab.url.startsWith('about:'),
);
await this.ensureTabsIndexed(validTabs);
console.log(`VectorSearchTabsContentTool: Rebuilt index for ${validTabs.length} tabs`);
} catch (error) {
console.error('VectorSearchTabsContentTool: Failed to rebuild index:', error);
throw error;
}
}
/**
* Manually index specified tab
*/
public async indexTab(tabId: number): Promise<void> {
if (!this.isInitialized) {
await this.initializeIndexer();
}
await this.contentIndexer.indexTabContent(tabId);
}
/**
* Remove index for specified tab
*/
public async removeTabIndex(tabId: number): Promise<void> {
if (!this.isInitialized) {
return;
}
await this.contentIndexer.removeTabIndex(tabId);
}
}
// Export tool instance
export const vectorSearchTabsContentTool = new VectorSearchTabsContentTool();