Files
broswer-automation/app/chrome-extension/entrypoints/background/tools/browser/screenshot.ts
nasir@endelospay.com d97cad1736 first commit
2025-08-12 02:54:17 +05:00

389 lines
14 KiB
TypeScript

import { createErrorResponse, ToolResult } from '@/common/tool-handler';
import { BaseBrowserToolExecutor } from '../base-browser';
import { TOOL_NAMES } from 'chrome-mcp-shared';
import { TOOL_MESSAGE_TYPES } from '@/common/message-types';
import { TIMEOUTS, ERROR_MESSAGES } from '@/common/constants';
import {
canvasToDataURL,
createImageBitmapFromUrl,
cropAndResizeImage,
stitchImages,
compressImage,
} from '../../../../utils/image-utils';
// Screenshot-specific constants
const SCREENSHOT_CONSTANTS = {
SCROLL_DELAY_MS: 350, // Time to wait after scroll for rendering and lazy loading
CAPTURE_STITCH_DELAY_MS: 50, // Small delay between captures in a scroll sequence
MAX_CAPTURE_PARTS: 50, // Maximum number of parts to capture (for infinite scroll pages)
MAX_CAPTURE_HEIGHT_PX: 50000, // Maximum height in pixels to capture
PIXEL_TOLERANCE: 1,
SCRIPT_INIT_DELAY: 100, // Delay for script initialization
} as const;
interface ScreenshotToolParams {
name: string;
selector?: string;
width?: number;
height?: number;
storeBase64?: boolean;
fullPage?: boolean;
savePng?: boolean;
maxHeight?: number; // Maximum height to capture in pixels (for infinite scroll pages)
}
/**
* Tool for capturing screenshots of web pages
*/
class ScreenshotTool extends BaseBrowserToolExecutor {
name = TOOL_NAMES.BROWSER.SCREENSHOT;
/**
* Execute screenshot operation
*/
async execute(args: ScreenshotToolParams): Promise<ToolResult> {
const {
name = 'screenshot',
selector,
storeBase64 = false,
fullPage = false,
savePng = true,
} = args;
console.log(`Starting screenshot with options:`, args);
// Get current tab
const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs[0]) {
return createErrorResponse(ERROR_MESSAGES.TAB_NOT_FOUND);
}
const tab = tabs[0];
// Check URL restrictions
if (
tab.url?.startsWith('chrome://') ||
tab.url?.startsWith('edge://') ||
tab.url?.startsWith('https://chrome.google.com/webstore') ||
tab.url?.startsWith('https://microsoftedge.microsoft.com/')
) {
return createErrorResponse(
'Cannot capture special browser pages or web store pages due to security restrictions.',
);
}
let finalImageDataUrl: string | undefined;
const results: any = { base64: null, fileSaved: false };
let originalScroll = { x: 0, y: 0 };
try {
await this.injectContentScript(tab.id!, ['inject-scripts/screenshot-helper.js']);
// Wait for script initialization
await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY));
// 1. Prepare page (hide scrollbars, potentially fixed elements)
await this.sendMessageToTab(tab.id!, {
action: TOOL_MESSAGE_TYPES.SCREENSHOT_PREPARE_PAGE_FOR_CAPTURE,
options: { fullPage },
});
// Get initial page details, including original scroll position
const pageDetails = await this.sendMessageToTab(tab.id!, {
action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_PAGE_DETAILS,
});
originalScroll = { x: pageDetails.currentScrollX, y: pageDetails.currentScrollY };
if (fullPage) {
this.logInfo('Capturing full page...');
finalImageDataUrl = await this._captureFullPage(tab.id!, args, pageDetails);
} else if (selector) {
this.logInfo(`Capturing element: ${selector}`);
finalImageDataUrl = await this._captureElement(tab.id!, args, pageDetails.devicePixelRatio);
} else {
// Visible area only
this.logInfo('Capturing visible area...');
finalImageDataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, { format: 'png' });
}
if (!finalImageDataUrl) {
throw new Error('Failed to capture image data');
}
// 2. Process output
if (storeBase64 === true) {
// Compress image for base64 output to reduce size
const compressed = await compressImage(finalImageDataUrl, {
scale: 0.7, // Reduce dimensions by 30%
quality: 0.8, // 80% quality for good balance
format: 'image/jpeg', // JPEG for better compression
});
// Include base64 data in response (without prefix)
const base64Data = compressed.dataUrl.replace(/^data:image\/[^;]+;base64,/, '');
results.base64 = base64Data;
return {
content: [
{
type: 'text',
text: JSON.stringify({ base64Data, mimeType: compressed.mimeType }),
},
],
isError: false,
};
}
if (savePng === true) {
// Save PNG file to downloads
this.logInfo('Saving PNG...');
try {
// Generate filename
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `${name.replace(/[^a-z0-9_-]/gi, '_') || 'screenshot'}_${timestamp}.png`;
// Use Chrome's download API to save the file
const downloadId = await chrome.downloads.download({
url: finalImageDataUrl,
filename: filename,
saveAs: false,
});
results.downloadId = downloadId;
results.filename = filename;
results.fileSaved = true;
// Try to get the full file path
try {
// Wait a moment to ensure download info is updated
await new Promise((resolve) => setTimeout(resolve, 100));
// Search for download item to get full path
const [downloadItem] = await chrome.downloads.search({ id: downloadId });
if (downloadItem && downloadItem.filename) {
// Add full path to response
results.fullPath = downloadItem.filename;
}
} catch (pathError) {
console.warn('Could not get full file path:', pathError);
}
} catch (error) {
console.error('Error saving PNG file:', error);
results.saveError = String(error instanceof Error ? error.message : error);
}
}
} catch (error) {
console.error('Error during screenshot execution:', error);
return createErrorResponse(
`Screenshot error: ${error instanceof Error ? error.message : JSON.stringify(error)}`,
);
} finally {
// 3. Reset page
try {
await this.sendMessageToTab(tab.id!, {
action: TOOL_MESSAGE_TYPES.SCREENSHOT_RESET_PAGE_AFTER_CAPTURE,
scrollX: originalScroll.x,
scrollY: originalScroll.y,
});
} catch (err) {
console.warn('Failed to reset page, tab might have closed:', err);
}
}
this.logInfo('Screenshot completed!');
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
message: `Screenshot [${name}] captured successfully`,
tabId: tab.id,
url: tab.url,
name: name,
...results,
}),
},
],
isError: false,
};
}
/**
* Log information
*/
private logInfo(message: string) {
console.log(`[Screenshot Tool] ${message}`);
}
/**
* Capture specific element
*/
async _captureElement(
tabId: number,
options: ScreenshotToolParams,
pageDpr: number,
): Promise<string> {
const elementDetails = await this.sendMessageToTab(tabId, {
action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_ELEMENT_DETAILS,
selector: options.selector,
});
const dpr = elementDetails.devicePixelRatio || pageDpr || 1;
// Element rect is viewport-relative, in CSS pixels
// captureVisibleTab captures in physical pixels
const cropRectPx = {
x: elementDetails.rect.x * dpr,
y: elementDetails.rect.y * dpr,
width: elementDetails.rect.width * dpr,
height: elementDetails.rect.height * dpr,
};
// Small delay to ensure element is fully rendered after scrollIntoView
await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY));
const visibleCaptureDataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' });
if (!visibleCaptureDataUrl) {
throw new Error('Failed to capture visible tab for element cropping');
}
const croppedCanvas = await cropAndResizeImage(
visibleCaptureDataUrl,
cropRectPx,
dpr,
options.width, // Target output width in CSS pixels
options.height, // Target output height in CSS pixels
);
return canvasToDataURL(croppedCanvas);
}
/**
* Capture full page
*/
async _captureFullPage(
tabId: number,
options: ScreenshotToolParams,
initialPageDetails: any,
): Promise<string> {
const dpr = initialPageDetails.devicePixelRatio;
const totalWidthCss = options.width || initialPageDetails.totalWidth; // Use option width if provided
const totalHeightCss = initialPageDetails.totalHeight; // Full page always uses actual height
// Apply maximum height limit for infinite scroll pages
const maxHeightPx = options.maxHeight || SCREENSHOT_CONSTANTS.MAX_CAPTURE_HEIGHT_PX;
const limitedHeightCss = Math.min(totalHeightCss, maxHeightPx / dpr);
const totalWidthPx = totalWidthCss * dpr;
const totalHeightPx = limitedHeightCss * dpr;
// Viewport dimensions (CSS pixels) - logged for debugging
this.logInfo(
`Viewport size: ${initialPageDetails.viewportWidth}x${initialPageDetails.viewportHeight} CSS pixels`,
);
this.logInfo(
`Page dimensions: ${totalWidthCss}x${totalHeightCss} CSS pixels (limited to ${limitedHeightCss} height)`,
);
const viewportHeightCss = initialPageDetails.viewportHeight;
const capturedParts = [];
let currentScrollYCss = 0;
let capturedHeightPx = 0;
let partIndex = 0;
while (capturedHeightPx < totalHeightPx && partIndex < SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) {
this.logInfo(
`Capturing part ${partIndex + 1}... (${Math.round((capturedHeightPx / totalHeightPx) * 100)}%)`,
);
if (currentScrollYCss > 0) {
// Don't scroll for the first part if already at top
const scrollResp = await this.sendMessageToTab(tabId, {
action: TOOL_MESSAGE_TYPES.SCREENSHOT_SCROLL_PAGE,
x: 0,
y: currentScrollYCss,
scrollDelay: SCREENSHOT_CONSTANTS.SCROLL_DELAY_MS,
});
// Update currentScrollYCss based on actual scroll achieved
currentScrollYCss = scrollResp.newScrollY;
}
// Ensure rendering after scroll
await new Promise((resolve) =>
setTimeout(resolve, SCREENSHOT_CONSTANTS.CAPTURE_STITCH_DELAY_MS),
);
const dataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' });
if (!dataUrl) throw new Error('captureVisibleTab returned empty during full page capture');
const yOffsetPx = currentScrollYCss * dpr;
capturedParts.push({ dataUrl, y: yOffsetPx });
const imgForHeight = await createImageBitmapFromUrl(dataUrl); // To get actual captured height
const lastPartEffectiveHeightPx = Math.min(imgForHeight.height, totalHeightPx - yOffsetPx);
capturedHeightPx = yOffsetPx + lastPartEffectiveHeightPx;
if (capturedHeightPx >= totalHeightPx - SCREENSHOT_CONSTANTS.PIXEL_TOLERANCE) break;
currentScrollYCss += viewportHeightCss;
// Prevent overscrolling past the document height for the next scroll command
if (
currentScrollYCss > totalHeightCss - viewportHeightCss &&
currentScrollYCss < totalHeightCss
) {
currentScrollYCss = totalHeightCss - viewportHeightCss;
}
partIndex++;
}
// Check if we hit any limits
if (partIndex >= SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) {
this.logInfo(
`Reached maximum number of capture parts (${SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS}). This may be an infinite scroll page.`,
);
}
if (totalHeightCss > limitedHeightCss) {
this.logInfo(
`Page height (${totalHeightCss}px) exceeds maximum capture height (${maxHeightPx / dpr}px). Capturing limited portion.`,
);
}
this.logInfo('Stitching image...');
const finalCanvas = await stitchImages(capturedParts, totalWidthPx, totalHeightPx);
// If user specified width but not height (or vice versa for full page), resize maintaining aspect ratio
let outputCanvas = finalCanvas;
if (options.width && !options.height) {
const targetWidthPx = options.width * dpr;
const aspectRatio = finalCanvas.height / finalCanvas.width;
const targetHeightPx = targetWidthPx * aspectRatio;
outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
const ctx = outputCanvas.getContext('2d');
if (ctx) {
ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
}
} else if (options.height && !options.width) {
const targetHeightPx = options.height * dpr;
const aspectRatio = finalCanvas.width / finalCanvas.height;
const targetWidthPx = targetHeightPx * aspectRatio;
outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
const ctx = outputCanvas.getContext('2d');
if (ctx) {
ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
}
} else if (options.width && options.height) {
// Both specified, direct resize
const targetWidthPx = options.width * dpr;
const targetHeightPx = options.height * dpr;
outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
const ctx = outputCanvas.getContext('2d');
if (ctx) {
ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
}
}
return canvasToDataURL(outputCanvas);
}
}
export const screenshotTool = new ScreenshotTool();