first commit
This commit is contained in:
@@ -0,0 +1,308 @@
|
||||
/**
|
||||
* Vectorized tab content search tool
|
||||
* Uses vector database for efficient semantic search
|
||||
*/
|
||||
|
||||
import { createErrorResponse, ToolResult } from '@/common/tool-handler';
|
||||
import { BaseBrowserToolExecutor } from '../base-browser';
|
||||
import { TOOL_NAMES } from 'chrome-mcp-shared';
|
||||
import { ContentIndexer } from '@/utils/content-indexer';
|
||||
import { LIMITS, ERROR_MESSAGES } from '@/common/constants';
|
||||
import type { SearchResult } from '@/utils/vector-database';
|
||||
|
||||
interface VectorSearchResult {
|
||||
tabId: number;
|
||||
url: string;
|
||||
title: string;
|
||||
semanticScore: number;
|
||||
matchedSnippet: string;
|
||||
chunkSource: string;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool for vectorized search of tab content using semantic similarity
|
||||
*/
|
||||
class VectorSearchTabsContentTool extends BaseBrowserToolExecutor {
|
||||
name = TOOL_NAMES.BROWSER.SEARCH_TABS_CONTENT;
|
||||
private contentIndexer: ContentIndexer;
|
||||
private isInitialized = false;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.contentIndexer = new ContentIndexer({
|
||||
autoIndex: true,
|
||||
maxChunksPerPage: LIMITS.MAX_SEARCH_RESULTS,
|
||||
skipDuplicates: true,
|
||||
});
|
||||
}
|
||||
|
||||
private async initializeIndexer(): Promise<void> {
|
||||
try {
|
||||
await this.contentIndexer.initialize();
|
||||
this.isInitialized = true;
|
||||
console.log('VectorSearchTabsContentTool: Content indexer initialized successfully');
|
||||
} catch (error) {
|
||||
console.error('VectorSearchTabsContentTool: Failed to initialize content indexer:', error);
|
||||
this.isInitialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
async execute(args: { query: string }): Promise<ToolResult> {
|
||||
try {
|
||||
const { query } = args;
|
||||
|
||||
if (!query || query.trim().length === 0) {
|
||||
return createErrorResponse(
|
||||
ERROR_MESSAGES.INVALID_PARAMETERS + ': Query parameter is required and cannot be empty',
|
||||
);
|
||||
}
|
||||
|
||||
console.log(`VectorSearchTabsContentTool: Starting vector search with query: "${query}"`);
|
||||
|
||||
// Check semantic engine status
|
||||
if (!this.contentIndexer.isSemanticEngineReady()) {
|
||||
if (this.contentIndexer.isSemanticEngineInitializing()) {
|
||||
return createErrorResponse(
|
||||
'Vector search engine is still initializing (model downloading). Please wait a moment and try again.',
|
||||
);
|
||||
} else {
|
||||
// Try to initialize
|
||||
console.log('VectorSearchTabsContentTool: Initializing content indexer...');
|
||||
await this.initializeIndexer();
|
||||
|
||||
// Check semantic engine status again
|
||||
if (!this.contentIndexer.isSemanticEngineReady()) {
|
||||
return createErrorResponse('Failed to initialize vector search engine');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute vector search, get more results for deduplication
|
||||
const searchResults = await this.contentIndexer.searchContent(query, 50);
|
||||
|
||||
// Convert search results format
|
||||
const vectorSearchResults = this.convertSearchResults(searchResults);
|
||||
|
||||
// Deduplicate by tab, keep only the highest similarity fragment per tab
|
||||
const deduplicatedResults = this.deduplicateByTab(vectorSearchResults);
|
||||
|
||||
// Sort by similarity and get top 10 results
|
||||
const topResults = deduplicatedResults
|
||||
.sort((a, b) => b.semanticScore - a.semanticScore)
|
||||
.slice(0, 10);
|
||||
|
||||
// Get index statistics
|
||||
const stats = this.contentIndexer.getStats();
|
||||
|
||||
const result = {
|
||||
success: true,
|
||||
totalTabsSearched: stats.totalTabs,
|
||||
matchedTabsCount: topResults.length,
|
||||
vectorSearchEnabled: true,
|
||||
indexStats: {
|
||||
totalDocuments: stats.totalDocuments,
|
||||
totalTabs: stats.totalTabs,
|
||||
indexedPages: stats.indexedPages,
|
||||
semanticEngineReady: stats.semanticEngineReady,
|
||||
semanticEngineInitializing: stats.semanticEngineInitializing,
|
||||
},
|
||||
matchedTabs: topResults.map((result) => ({
|
||||
tabId: result.tabId,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
semanticScore: result.semanticScore,
|
||||
matchedSnippets: [result.matchedSnippet],
|
||||
chunkSource: result.chunkSource,
|
||||
timestamp: result.timestamp,
|
||||
})),
|
||||
};
|
||||
|
||||
console.log(
|
||||
`VectorSearchTabsContentTool: Found ${topResults.length} results with vector search`,
|
||||
);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('VectorSearchTabsContentTool: Search failed:', error);
|
||||
return createErrorResponse(
|
||||
`Vector search failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure all tabs are indexed
|
||||
*/
|
||||
private async ensureTabsIndexed(tabs: chrome.tabs.Tab[]): Promise<void> {
|
||||
const indexPromises = tabs
|
||||
.filter((tab) => tab.id)
|
||||
.map(async (tab) => {
|
||||
try {
|
||||
await this.contentIndexer.indexTabContent(tab.id!);
|
||||
} catch (error) {
|
||||
console.warn(`VectorSearchTabsContentTool: Failed to index tab ${tab.id}:`, error);
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.allSettled(indexPromises);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert search results format
|
||||
*/
|
||||
private convertSearchResults(searchResults: SearchResult[]): VectorSearchResult[] {
|
||||
return searchResults.map((result) => ({
|
||||
tabId: result.document.tabId,
|
||||
url: result.document.url,
|
||||
title: result.document.title,
|
||||
semanticScore: result.similarity,
|
||||
matchedSnippet: this.extractSnippet(result.document.chunk.text),
|
||||
chunkSource: result.document.chunk.source,
|
||||
timestamp: result.document.timestamp,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicate by tab, keep only the highest similarity fragment per tab
|
||||
*/
|
||||
private deduplicateByTab(results: VectorSearchResult[]): VectorSearchResult[] {
|
||||
const tabMap = new Map<number, VectorSearchResult>();
|
||||
|
||||
for (const result of results) {
|
||||
const existingResult = tabMap.get(result.tabId);
|
||||
|
||||
// If this tab has no result yet, or current result has higher similarity, update it
|
||||
if (!existingResult || result.semanticScore > existingResult.semanticScore) {
|
||||
tabMap.set(result.tabId, result);
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(tabMap.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text snippet for display
|
||||
*/
|
||||
private extractSnippet(text: string, maxLength: number = 200): string {
|
||||
if (text.length <= maxLength) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Try to truncate at sentence boundary
|
||||
const truncated = text.substring(0, maxLength);
|
||||
const lastSentenceEnd = Math.max(
|
||||
truncated.lastIndexOf('.'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?'),
|
||||
truncated.lastIndexOf('。'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?'),
|
||||
);
|
||||
|
||||
if (lastSentenceEnd > maxLength * 0.7) {
|
||||
return truncated.substring(0, lastSentenceEnd + 1);
|
||||
}
|
||||
|
||||
// If no suitable sentence boundary found, truncate at word boundary
|
||||
const lastSpaceIndex = truncated.lastIndexOf(' ');
|
||||
if (lastSpaceIndex > maxLength * 0.8) {
|
||||
return truncated.substring(0, lastSpaceIndex) + '...';
|
||||
}
|
||||
|
||||
return truncated + '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index statistics
|
||||
*/
|
||||
public async getIndexStats() {
|
||||
if (!this.isInitialized) {
|
||||
// Don't automatically initialize - just return basic stats
|
||||
return {
|
||||
totalDocuments: 0,
|
||||
totalTabs: 0,
|
||||
indexSize: 0,
|
||||
indexedPages: 0,
|
||||
isInitialized: false,
|
||||
semanticEngineReady: false,
|
||||
semanticEngineInitializing: false,
|
||||
};
|
||||
}
|
||||
return this.contentIndexer.getStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually rebuild index
|
||||
*/
|
||||
public async rebuildIndex(): Promise<void> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initializeIndexer();
|
||||
}
|
||||
|
||||
try {
|
||||
// Clear existing indexes
|
||||
await this.contentIndexer.clearAllIndexes();
|
||||
|
||||
// Get all tabs and reindex
|
||||
const windows = await chrome.windows.getAll({ populate: true });
|
||||
const allTabs: chrome.tabs.Tab[] = [];
|
||||
|
||||
for (const window of windows) {
|
||||
if (window.tabs) {
|
||||
allTabs.push(...window.tabs);
|
||||
}
|
||||
}
|
||||
|
||||
const validTabs = allTabs.filter(
|
||||
(tab) =>
|
||||
tab.id &&
|
||||
tab.url &&
|
||||
!tab.url.startsWith('chrome://') &&
|
||||
!tab.url.startsWith('chrome-extension://') &&
|
||||
!tab.url.startsWith('edge://') &&
|
||||
!tab.url.startsWith('about:'),
|
||||
);
|
||||
|
||||
await this.ensureTabsIndexed(validTabs);
|
||||
|
||||
console.log(`VectorSearchTabsContentTool: Rebuilt index for ${validTabs.length} tabs`);
|
||||
} catch (error) {
|
||||
console.error('VectorSearchTabsContentTool: Failed to rebuild index:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually index specified tab
|
||||
*/
|
||||
public async indexTab(tabId: number): Promise<void> {
|
||||
if (!this.isInitialized) {
|
||||
await this.initializeIndexer();
|
||||
}
|
||||
|
||||
await this.contentIndexer.indexTabContent(tabId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove index for specified tab
|
||||
*/
|
||||
public async removeTabIndex(tabId: number): Promise<void> {
|
||||
if (!this.isInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.contentIndexer.removeTabIndex(tabId);
|
||||
}
|
||||
}
|
||||
|
||||
// Export tool instance
|
||||
export const vectorSearchTabsContentTool = new VectorSearchTabsContentTool();
|
Reference in New Issue
Block a user