import { createErrorResponse, ToolResult } from '@/common/tool-handler'; import { BaseBrowserToolExecutor } from '../base-browser'; import { TOOL_NAMES } from 'chrome-mcp-shared'; import { TOOL_MESSAGE_TYPES } from '@/common/message-types'; import { TIMEOUTS, ERROR_MESSAGES } from '@/common/constants'; import { canvasToDataURL, createImageBitmapFromUrl, cropAndResizeImage, stitchImages, compressImage, } from '../../../../utils/image-utils'; // Screenshot-specific constants const SCREENSHOT_CONSTANTS = { SCROLL_DELAY_MS: 350, // Time to wait after scroll for rendering and lazy loading CAPTURE_STITCH_DELAY_MS: 50, // Small delay between captures in a scroll sequence MAX_CAPTURE_PARTS: 50, // Maximum number of parts to capture (for infinite scroll pages) MAX_CAPTURE_HEIGHT_PX: 50000, // Maximum height in pixels to capture PIXEL_TOLERANCE: 1, SCRIPT_INIT_DELAY: 100, // Delay for script initialization } as const; interface ScreenshotToolParams { name: string; selector?: string; width?: number; height?: number; storeBase64?: boolean; fullPage?: boolean; savePng?: boolean; maxHeight?: number; // Maximum height to capture in pixels (for infinite scroll pages) } /** * Tool for capturing screenshots of web pages */ class ScreenshotTool extends BaseBrowserToolExecutor { name = TOOL_NAMES.BROWSER.SCREENSHOT; /** * Execute screenshot operation */ async execute(args: ScreenshotToolParams): Promise { const { name = 'screenshot', selector, storeBase64 = false, fullPage = false, savePng = true, } = args; console.log(`Starting screenshot with options:`, args); // Get current tab const tabs = await chrome.tabs.query({ active: true, currentWindow: true }); if (!tabs[0]) { return createErrorResponse(ERROR_MESSAGES.TAB_NOT_FOUND); } const tab = tabs[0]; // Check URL restrictions if ( tab.url?.startsWith('chrome://') || tab.url?.startsWith('edge://') || tab.url?.startsWith('https://chrome.google.com/webstore') || tab.url?.startsWith('https://microsoftedge.microsoft.com/') ) { return createErrorResponse( 'Cannot capture special browser pages or web store pages due to security restrictions.', ); } let finalImageDataUrl: string | undefined; const results: any = { base64: null, fileSaved: false }; let originalScroll = { x: 0, y: 0 }; try { await this.injectContentScript(tab.id!, ['inject-scripts/screenshot-helper.js']); // Wait for script initialization await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY)); // 1. Prepare page (hide scrollbars, potentially fixed elements) await this.sendMessageToTab(tab.id!, { action: TOOL_MESSAGE_TYPES.SCREENSHOT_PREPARE_PAGE_FOR_CAPTURE, options: { fullPage }, }); // Get initial page details, including original scroll position const pageDetails = await this.sendMessageToTab(tab.id!, { action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_PAGE_DETAILS, }); originalScroll = { x: pageDetails.currentScrollX, y: pageDetails.currentScrollY }; if (fullPage) { this.logInfo('Capturing full page...'); finalImageDataUrl = await this._captureFullPage(tab.id!, args, pageDetails); } else if (selector) { this.logInfo(`Capturing element: ${selector}`); finalImageDataUrl = await this._captureElement(tab.id!, args, pageDetails.devicePixelRatio); } else { // Visible area only this.logInfo('Capturing visible area...'); finalImageDataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, { format: 'png' }); } if (!finalImageDataUrl) { throw new Error('Failed to capture image data'); } // 2. Process output if (storeBase64 === true) { // Compress image for base64 output to reduce size const compressed = await compressImage(finalImageDataUrl, { scale: 0.7, // Reduce dimensions by 30% quality: 0.8, // 80% quality for good balance format: 'image/jpeg', // JPEG for better compression }); // Include base64 data in response (without prefix) const base64Data = compressed.dataUrl.replace(/^data:image\/[^;]+;base64,/, ''); results.base64 = base64Data; return { content: [ { type: 'text', text: JSON.stringify({ base64Data, mimeType: compressed.mimeType }), }, ], isError: false, }; } if (savePng === true) { // Save PNG file to downloads this.logInfo('Saving PNG...'); try { // Generate filename const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const filename = `${name.replace(/[^a-z0-9_-]/gi, '_') || 'screenshot'}_${timestamp}.png`; // Use Chrome's download API to save the file const downloadId = await chrome.downloads.download({ url: finalImageDataUrl, filename: filename, saveAs: false, }); results.downloadId = downloadId; results.filename = filename; results.fileSaved = true; // Try to get the full file path try { // Wait a moment to ensure download info is updated await new Promise((resolve) => setTimeout(resolve, 100)); // Search for download item to get full path const [downloadItem] = await chrome.downloads.search({ id: downloadId }); if (downloadItem && downloadItem.filename) { // Add full path to response results.fullPath = downloadItem.filename; } } catch (pathError) { console.warn('Could not get full file path:', pathError); } } catch (error) { console.error('Error saving PNG file:', error); results.saveError = String(error instanceof Error ? error.message : error); } } } catch (error) { console.error('Error during screenshot execution:', error); return createErrorResponse( `Screenshot error: ${error instanceof Error ? error.message : JSON.stringify(error)}`, ); } finally { // 3. Reset page try { await this.sendMessageToTab(tab.id!, { action: TOOL_MESSAGE_TYPES.SCREENSHOT_RESET_PAGE_AFTER_CAPTURE, scrollX: originalScroll.x, scrollY: originalScroll.y, }); } catch (err) { console.warn('Failed to reset page, tab might have closed:', err); } } this.logInfo('Screenshot completed!'); return { content: [ { type: 'text', text: JSON.stringify({ success: true, message: `Screenshot [${name}] captured successfully`, tabId: tab.id, url: tab.url, name: name, ...results, }), }, ], isError: false, }; } /** * Log information */ private logInfo(message: string) { console.log(`[Screenshot Tool] ${message}`); } /** * Capture specific element */ async _captureElement( tabId: number, options: ScreenshotToolParams, pageDpr: number, ): Promise { const elementDetails = await this.sendMessageToTab(tabId, { action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_ELEMENT_DETAILS, selector: options.selector, }); const dpr = elementDetails.devicePixelRatio || pageDpr || 1; // Element rect is viewport-relative, in CSS pixels // captureVisibleTab captures in physical pixels const cropRectPx = { x: elementDetails.rect.x * dpr, y: elementDetails.rect.y * dpr, width: elementDetails.rect.width * dpr, height: elementDetails.rect.height * dpr, }; // Small delay to ensure element is fully rendered after scrollIntoView await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY)); const visibleCaptureDataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' }); if (!visibleCaptureDataUrl) { throw new Error('Failed to capture visible tab for element cropping'); } const croppedCanvas = await cropAndResizeImage( visibleCaptureDataUrl, cropRectPx, dpr, options.width, // Target output width in CSS pixels options.height, // Target output height in CSS pixels ); return canvasToDataURL(croppedCanvas); } /** * Capture full page */ async _captureFullPage( tabId: number, options: ScreenshotToolParams, initialPageDetails: any, ): Promise { const dpr = initialPageDetails.devicePixelRatio; const totalWidthCss = options.width || initialPageDetails.totalWidth; // Use option width if provided const totalHeightCss = initialPageDetails.totalHeight; // Full page always uses actual height // Apply maximum height limit for infinite scroll pages const maxHeightPx = options.maxHeight || SCREENSHOT_CONSTANTS.MAX_CAPTURE_HEIGHT_PX; const limitedHeightCss = Math.min(totalHeightCss, maxHeightPx / dpr); const totalWidthPx = totalWidthCss * dpr; const totalHeightPx = limitedHeightCss * dpr; // Viewport dimensions (CSS pixels) - logged for debugging this.logInfo( `Viewport size: ${initialPageDetails.viewportWidth}x${initialPageDetails.viewportHeight} CSS pixels`, ); this.logInfo( `Page dimensions: ${totalWidthCss}x${totalHeightCss} CSS pixels (limited to ${limitedHeightCss} height)`, ); const viewportHeightCss = initialPageDetails.viewportHeight; const capturedParts = []; let currentScrollYCss = 0; let capturedHeightPx = 0; let partIndex = 0; while (capturedHeightPx < totalHeightPx && partIndex < SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) { this.logInfo( `Capturing part ${partIndex + 1}... (${Math.round((capturedHeightPx / totalHeightPx) * 100)}%)`, ); if (currentScrollYCss > 0) { // Don't scroll for the first part if already at top const scrollResp = await this.sendMessageToTab(tabId, { action: TOOL_MESSAGE_TYPES.SCREENSHOT_SCROLL_PAGE, x: 0, y: currentScrollYCss, scrollDelay: SCREENSHOT_CONSTANTS.SCROLL_DELAY_MS, }); // Update currentScrollYCss based on actual scroll achieved currentScrollYCss = scrollResp.newScrollY; } // Ensure rendering after scroll await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.CAPTURE_STITCH_DELAY_MS), ); const dataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' }); if (!dataUrl) throw new Error('captureVisibleTab returned empty during full page capture'); const yOffsetPx = currentScrollYCss * dpr; capturedParts.push({ dataUrl, y: yOffsetPx }); const imgForHeight = await createImageBitmapFromUrl(dataUrl); // To get actual captured height const lastPartEffectiveHeightPx = Math.min(imgForHeight.height, totalHeightPx - yOffsetPx); capturedHeightPx = yOffsetPx + lastPartEffectiveHeightPx; if (capturedHeightPx >= totalHeightPx - SCREENSHOT_CONSTANTS.PIXEL_TOLERANCE) break; currentScrollYCss += viewportHeightCss; // Prevent overscrolling past the document height for the next scroll command if ( currentScrollYCss > totalHeightCss - viewportHeightCss && currentScrollYCss < totalHeightCss ) { currentScrollYCss = totalHeightCss - viewportHeightCss; } partIndex++; } // Check if we hit any limits if (partIndex >= SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) { this.logInfo( `Reached maximum number of capture parts (${SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS}). This may be an infinite scroll page.`, ); } if (totalHeightCss > limitedHeightCss) { this.logInfo( `Page height (${totalHeightCss}px) exceeds maximum capture height (${maxHeightPx / dpr}px). Capturing limited portion.`, ); } this.logInfo('Stitching image...'); const finalCanvas = await stitchImages(capturedParts, totalWidthPx, totalHeightPx); // If user specified width but not height (or vice versa for full page), resize maintaining aspect ratio let outputCanvas = finalCanvas; if (options.width && !options.height) { const targetWidthPx = options.width * dpr; const aspectRatio = finalCanvas.height / finalCanvas.width; const targetHeightPx = targetWidthPx * aspectRatio; outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx); const ctx = outputCanvas.getContext('2d'); if (ctx) { ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx); } } else if (options.height && !options.width) { const targetHeightPx = options.height * dpr; const aspectRatio = finalCanvas.width / finalCanvas.height; const targetWidthPx = targetHeightPx * aspectRatio; outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx); const ctx = outputCanvas.getContext('2d'); if (ctx) { ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx); } } else if (options.width && options.height) { // Both specified, direct resize const targetWidthPx = options.width * dpr; const targetHeightPx = options.height * dpr; outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx); const ctx = outputCanvas.getContext('2d'); if (ctx) { ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx); } } return canvasToDataURL(outputCanvas); } } export const screenshotTool = new ScreenshotTool();