first commit

This commit is contained in:
nasir@endelospay.com
2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions

View File

@@ -0,0 +1,308 @@
/**
* Vectorized tab content search tool
* Uses vector database for efficient semantic search
*/
import { createErrorResponse, ToolResult } from '@/common/tool-handler';
import { BaseBrowserToolExecutor } from '../base-browser';
import { TOOL_NAMES } from 'chrome-mcp-shared';
import { ContentIndexer } from '@/utils/content-indexer';
import { LIMITS, ERROR_MESSAGES } from '@/common/constants';
import type { SearchResult } from '@/utils/vector-database';
interface VectorSearchResult {
tabId: number;
url: string;
title: string;
semanticScore: number;
matchedSnippet: string;
chunkSource: string;
timestamp: number;
}
/**
* Tool for vectorized search of tab content using semantic similarity
*/
class VectorSearchTabsContentTool extends BaseBrowserToolExecutor {
name = TOOL_NAMES.BROWSER.SEARCH_TABS_CONTENT;
private contentIndexer: ContentIndexer;
private isInitialized = false;
constructor() {
super();
this.contentIndexer = new ContentIndexer({
autoIndex: true,
maxChunksPerPage: LIMITS.MAX_SEARCH_RESULTS,
skipDuplicates: true,
});
}
private async initializeIndexer(): Promise<void> {
try {
await this.contentIndexer.initialize();
this.isInitialized = true;
console.log('VectorSearchTabsContentTool: Content indexer initialized successfully');
} catch (error) {
console.error('VectorSearchTabsContentTool: Failed to initialize content indexer:', error);
this.isInitialized = false;
}
}
async execute(args: { query: string }): Promise<ToolResult> {
try {
const { query } = args;
if (!query || query.trim().length === 0) {
return createErrorResponse(
ERROR_MESSAGES.INVALID_PARAMETERS + ': Query parameter is required and cannot be empty',
);
}
console.log(`VectorSearchTabsContentTool: Starting vector search with query: "${query}"`);
// Check semantic engine status
if (!this.contentIndexer.isSemanticEngineReady()) {
if (this.contentIndexer.isSemanticEngineInitializing()) {
return createErrorResponse(
'Vector search engine is still initializing (model downloading). Please wait a moment and try again.',
);
} else {
// Try to initialize
console.log('VectorSearchTabsContentTool: Initializing content indexer...');
await this.initializeIndexer();
// Check semantic engine status again
if (!this.contentIndexer.isSemanticEngineReady()) {
return createErrorResponse('Failed to initialize vector search engine');
}
}
}
// Execute vector search, get more results for deduplication
const searchResults = await this.contentIndexer.searchContent(query, 50);
// Convert search results format
const vectorSearchResults = this.convertSearchResults(searchResults);
// Deduplicate by tab, keep only the highest similarity fragment per tab
const deduplicatedResults = this.deduplicateByTab(vectorSearchResults);
// Sort by similarity and get top 10 results
const topResults = deduplicatedResults
.sort((a, b) => b.semanticScore - a.semanticScore)
.slice(0, 10);
// Get index statistics
const stats = this.contentIndexer.getStats();
const result = {
success: true,
totalTabsSearched: stats.totalTabs,
matchedTabsCount: topResults.length,
vectorSearchEnabled: true,
indexStats: {
totalDocuments: stats.totalDocuments,
totalTabs: stats.totalTabs,
indexedPages: stats.indexedPages,
semanticEngineReady: stats.semanticEngineReady,
semanticEngineInitializing: stats.semanticEngineInitializing,
},
matchedTabs: topResults.map((result) => ({
tabId: result.tabId,
url: result.url,
title: result.title,
semanticScore: result.semanticScore,
matchedSnippets: [result.matchedSnippet],
chunkSource: result.chunkSource,
timestamp: result.timestamp,
})),
};
console.log(
`VectorSearchTabsContentTool: Found ${topResults.length} results with vector search`,
);
return {
content: [
{
type: 'text',
text: JSON.stringify(result, null, 2),
},
],
isError: false,
};
} catch (error) {
console.error('VectorSearchTabsContentTool: Search failed:', error);
return createErrorResponse(
`Vector search failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
/**
* Ensure all tabs are indexed
*/
private async ensureTabsIndexed(tabs: chrome.tabs.Tab[]): Promise<void> {
const indexPromises = tabs
.filter((tab) => tab.id)
.map(async (tab) => {
try {
await this.contentIndexer.indexTabContent(tab.id!);
} catch (error) {
console.warn(`VectorSearchTabsContentTool: Failed to index tab ${tab.id}:`, error);
}
});
await Promise.allSettled(indexPromises);
}
/**
* Convert search results format
*/
private convertSearchResults(searchResults: SearchResult[]): VectorSearchResult[] {
return searchResults.map((result) => ({
tabId: result.document.tabId,
url: result.document.url,
title: result.document.title,
semanticScore: result.similarity,
matchedSnippet: this.extractSnippet(result.document.chunk.text),
chunkSource: result.document.chunk.source,
timestamp: result.document.timestamp,
}));
}
/**
* Deduplicate by tab, keep only the highest similarity fragment per tab
*/
private deduplicateByTab(results: VectorSearchResult[]): VectorSearchResult[] {
const tabMap = new Map<number, VectorSearchResult>();
for (const result of results) {
const existingResult = tabMap.get(result.tabId);
// If this tab has no result yet, or current result has higher similarity, update it
if (!existingResult || result.semanticScore > existingResult.semanticScore) {
tabMap.set(result.tabId, result);
}
}
return Array.from(tabMap.values());
}
/**
* Extract text snippet for display
*/
private extractSnippet(text: string, maxLength: number = 200): string {
if (text.length <= maxLength) {
return text;
}
// Try to truncate at sentence boundary
const truncated = text.substring(0, maxLength);
const lastSentenceEnd = Math.max(
truncated.lastIndexOf('.'),
truncated.lastIndexOf('!'),
truncated.lastIndexOf('?'),
truncated.lastIndexOf('。'),
truncated.lastIndexOf(''),
truncated.lastIndexOf(''),
);
if (lastSentenceEnd > maxLength * 0.7) {
return truncated.substring(0, lastSentenceEnd + 1);
}
// If no suitable sentence boundary found, truncate at word boundary
const lastSpaceIndex = truncated.lastIndexOf(' ');
if (lastSpaceIndex > maxLength * 0.8) {
return truncated.substring(0, lastSpaceIndex) + '...';
}
return truncated + '...';
}
/**
* Get index statistics
*/
public async getIndexStats() {
if (!this.isInitialized) {
// Don't automatically initialize - just return basic stats
return {
totalDocuments: 0,
totalTabs: 0,
indexSize: 0,
indexedPages: 0,
isInitialized: false,
semanticEngineReady: false,
semanticEngineInitializing: false,
};
}
return this.contentIndexer.getStats();
}
/**
* Manually rebuild index
*/
public async rebuildIndex(): Promise<void> {
if (!this.isInitialized) {
await this.initializeIndexer();
}
try {
// Clear existing indexes
await this.contentIndexer.clearAllIndexes();
// Get all tabs and reindex
const windows = await chrome.windows.getAll({ populate: true });
const allTabs: chrome.tabs.Tab[] = [];
for (const window of windows) {
if (window.tabs) {
allTabs.push(...window.tabs);
}
}
const validTabs = allTabs.filter(
(tab) =>
tab.id &&
tab.url &&
!tab.url.startsWith('chrome://') &&
!tab.url.startsWith('chrome-extension://') &&
!tab.url.startsWith('edge://') &&
!tab.url.startsWith('about:'),
);
await this.ensureTabsIndexed(validTabs);
console.log(`VectorSearchTabsContentTool: Rebuilt index for ${validTabs.length} tabs`);
} catch (error) {
console.error('VectorSearchTabsContentTool: Failed to rebuild index:', error);
throw error;
}
}
/**
* Manually index specified tab
*/
public async indexTab(tabId: number): Promise<void> {
if (!this.isInitialized) {
await this.initializeIndexer();
}
await this.contentIndexer.indexTabContent(tabId);
}
/**
* Remove index for specified tab
*/
public async removeTabIndex(tabId: number): Promise<void> {
if (!this.isInitialized) {
return;
}
await this.contentIndexer.removeTabIndex(tabId);
}
}
// Export tool instance
export const vectorSearchTabsContentTool = new VectorSearchTabsContentTool();