first commit

2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions
--- a/app/chrome-extension/entrypoints/background/tools/browser/screenshot.ts
+++ b/app/chrome-extension/entrypoints/background/tools/browser/screenshot.ts
@@ -0,0 +1,388 @@
+import { createErrorResponse, ToolResult } from '@/common/tool-handler';
+import { BaseBrowserToolExecutor } from '../base-browser';
+import { TOOL_NAMES } from 'chrome-mcp-shared';
+import { TOOL_MESSAGE_TYPES } from '@/common/message-types';
+import { TIMEOUTS, ERROR_MESSAGES } from '@/common/constants';
+import {
+  canvasToDataURL,
+  createImageBitmapFromUrl,
+  cropAndResizeImage,
+  stitchImages,
+  compressImage,
+} from '../../../../utils/image-utils';
+
+// Screenshot-specific constants
+const SCREENSHOT_CONSTANTS = {
+  SCROLL_DELAY_MS: 350, // Time to wait after scroll for rendering and lazy loading
+  CAPTURE_STITCH_DELAY_MS: 50, // Small delay between captures in a scroll sequence
+  MAX_CAPTURE_PARTS: 50, // Maximum number of parts to capture (for infinite scroll pages)
+  MAX_CAPTURE_HEIGHT_PX: 50000, // Maximum height in pixels to capture
+  PIXEL_TOLERANCE: 1,
+  SCRIPT_INIT_DELAY: 100, // Delay for script initialization
+} as const;
+
+interface ScreenshotToolParams {
+  name: string;
+  selector?: string;
+  width?: number;
+  height?: number;
+  storeBase64?: boolean;
+  fullPage?: boolean;
+  savePng?: boolean;
+  maxHeight?: number; // Maximum height to capture in pixels (for infinite scroll pages)
+}
+
+/**
+ * Tool for capturing screenshots of web pages
+ */
+class ScreenshotTool extends BaseBrowserToolExecutor {
+  name = TOOL_NAMES.BROWSER.SCREENSHOT;
+
+  /**
+   * Execute screenshot operation
+   */
+  async execute(args: ScreenshotToolParams): Promise<ToolResult> {
+    const {
+      name = 'screenshot',
+      selector,
+      storeBase64 = false,
+      fullPage = false,
+      savePng = true,
+    } = args;
+
+    console.log(`Starting screenshot with options:`, args);
+
+    // Get current tab
+    const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
+    if (!tabs[0]) {
+      return createErrorResponse(ERROR_MESSAGES.TAB_NOT_FOUND);
+    }
+    const tab = tabs[0];
+
+    // Check URL restrictions
+    if (
+      tab.url?.startsWith('chrome://') ||
+      tab.url?.startsWith('edge://') ||
+      tab.url?.startsWith('https://chrome.google.com/webstore') ||
+      tab.url?.startsWith('https://microsoftedge.microsoft.com/')
+    ) {
+      return createErrorResponse(
+        'Cannot capture special browser pages or web store pages due to security restrictions.',
+      );
+    }
+
+    let finalImageDataUrl: string | undefined;
+    const results: any = { base64: null, fileSaved: false };
+    let originalScroll = { x: 0, y: 0 };
+
+    try {
+      await this.injectContentScript(tab.id!, ['inject-scripts/screenshot-helper.js']);
+      // Wait for script initialization
+      await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY));
+      // 1. Prepare page (hide scrollbars, potentially fixed elements)
+      await this.sendMessageToTab(tab.id!, {
+        action: TOOL_MESSAGE_TYPES.SCREENSHOT_PREPARE_PAGE_FOR_CAPTURE,
+        options: { fullPage },
+      });
+
+      // Get initial page details, including original scroll position
+      const pageDetails = await this.sendMessageToTab(tab.id!, {
+        action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_PAGE_DETAILS,
+      });
+      originalScroll = { x: pageDetails.currentScrollX, y: pageDetails.currentScrollY };
+
+      if (fullPage) {
+        this.logInfo('Capturing full page...');
+        finalImageDataUrl = await this._captureFullPage(tab.id!, args, pageDetails);
+      } else if (selector) {
+        this.logInfo(`Capturing element: ${selector}`);
+        finalImageDataUrl = await this._captureElement(tab.id!, args, pageDetails.devicePixelRatio);
+      } else {
+        // Visible area only
+        this.logInfo('Capturing visible area...');
+        finalImageDataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, { format: 'png' });
+      }
+
+      if (!finalImageDataUrl) {
+        throw new Error('Failed to capture image data');
+      }
+
+      // 2. Process output
+      if (storeBase64 === true) {
+        // Compress image for base64 output to reduce size
+        const compressed = await compressImage(finalImageDataUrl, {
+          scale: 0.7, // Reduce dimensions by 30%
+          quality: 0.8, // 80% quality for good balance
+          format: 'image/jpeg', // JPEG for better compression
+        });
+
+        // Include base64 data in response (without prefix)
+        const base64Data = compressed.dataUrl.replace(/^data:image\/[^;]+;base64,/, '');
+        results.base64 = base64Data;
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify({ base64Data, mimeType: compressed.mimeType }),
+            },
+          ],
+          isError: false,
+        };
+      }
+
+      if (savePng === true) {
+        // Save PNG file to downloads
+        this.logInfo('Saving PNG...');
+        try {
+          // Generate filename
+          const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+          const filename = `${name.replace(/[^a-z0-9_-]/gi, '_') || 'screenshot'}_${timestamp}.png`;
+
+          // Use Chrome's download API to save the file
+          const downloadId = await chrome.downloads.download({
+            url: finalImageDataUrl,
+            filename: filename,
+            saveAs: false,
+          });
+
+          results.downloadId = downloadId;
+          results.filename = filename;
+          results.fileSaved = true;
+
+          // Try to get the full file path
+          try {
+            // Wait a moment to ensure download info is updated
+            await new Promise((resolve) => setTimeout(resolve, 100));
+
+            // Search for download item to get full path
+            const [downloadItem] = await chrome.downloads.search({ id: downloadId });
+            if (downloadItem && downloadItem.filename) {
+              // Add full path to response
+              results.fullPath = downloadItem.filename;
+            }
+          } catch (pathError) {
+            console.warn('Could not get full file path:', pathError);
+          }
+        } catch (error) {
+          console.error('Error saving PNG file:', error);
+          results.saveError = String(error instanceof Error ? error.message : error);
+        }
+      }
+    } catch (error) {
+      console.error('Error during screenshot execution:', error);
+      return createErrorResponse(
+        `Screenshot error: ${error instanceof Error ? error.message : JSON.stringify(error)}`,
+      );
+    } finally {
+      // 3. Reset page
+      try {
+        await this.sendMessageToTab(tab.id!, {
+          action: TOOL_MESSAGE_TYPES.SCREENSHOT_RESET_PAGE_AFTER_CAPTURE,
+          scrollX: originalScroll.x,
+          scrollY: originalScroll.y,
+        });
+      } catch (err) {
+        console.warn('Failed to reset page, tab might have closed:', err);
+      }
+    }
+
+    this.logInfo('Screenshot completed!');
+
+    return {
+      content: [
+        {
+          type: 'text',
+          text: JSON.stringify({
+            success: true,
+            message: `Screenshot [${name}] captured successfully`,
+            tabId: tab.id,
+            url: tab.url,
+            name: name,
+            ...results,
+          }),
+        },
+      ],
+      isError: false,
+    };
+  }
+
+  /**
+   * Log information
+   */
+  private logInfo(message: string) {
+    console.log(`[Screenshot Tool] ${message}`);
+  }
+
+  /**
+   * Capture specific element
+   */
+  async _captureElement(
+    tabId: number,
+    options: ScreenshotToolParams,
+    pageDpr: number,
+  ): Promise<string> {
+    const elementDetails = await this.sendMessageToTab(tabId, {
+      action: TOOL_MESSAGE_TYPES.SCREENSHOT_GET_ELEMENT_DETAILS,
+      selector: options.selector,
+    });
+
+    const dpr = elementDetails.devicePixelRatio || pageDpr || 1;
+
+    // Element rect is viewport-relative, in CSS pixels
+    // captureVisibleTab captures in physical pixels
+    const cropRectPx = {
+      x: elementDetails.rect.x * dpr,
+      y: elementDetails.rect.y * dpr,
+      width: elementDetails.rect.width * dpr,
+      height: elementDetails.rect.height * dpr,
+    };
+
+    // Small delay to ensure element is fully rendered after scrollIntoView
+    await new Promise((resolve) => setTimeout(resolve, SCREENSHOT_CONSTANTS.SCRIPT_INIT_DELAY));
+
+    const visibleCaptureDataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' });
+    if (!visibleCaptureDataUrl) {
+      throw new Error('Failed to capture visible tab for element cropping');
+    }
+
+    const croppedCanvas = await cropAndResizeImage(
+      visibleCaptureDataUrl,
+      cropRectPx,
+      dpr,
+      options.width, // Target output width in CSS pixels
+      options.height, // Target output height in CSS pixels
+    );
+    return canvasToDataURL(croppedCanvas);
+  }
+
+  /**
+   * Capture full page
+   */
+  async _captureFullPage(
+    tabId: number,
+    options: ScreenshotToolParams,
+    initialPageDetails: any,
+  ): Promise<string> {
+    const dpr = initialPageDetails.devicePixelRatio;
+    const totalWidthCss = options.width || initialPageDetails.totalWidth; // Use option width if provided
+    const totalHeightCss = initialPageDetails.totalHeight; // Full page always uses actual height
+
+    // Apply maximum height limit for infinite scroll pages
+    const maxHeightPx = options.maxHeight || SCREENSHOT_CONSTANTS.MAX_CAPTURE_HEIGHT_PX;
+    const limitedHeightCss = Math.min(totalHeightCss, maxHeightPx / dpr);
+
+    const totalWidthPx = totalWidthCss * dpr;
+    const totalHeightPx = limitedHeightCss * dpr;
+
+    // Viewport dimensions (CSS pixels) - logged for debugging
+    this.logInfo(
+      `Viewport size: ${initialPageDetails.viewportWidth}x${initialPageDetails.viewportHeight} CSS pixels`,
+    );
+    this.logInfo(
+      `Page dimensions: ${totalWidthCss}x${totalHeightCss} CSS pixels (limited to ${limitedHeightCss} height)`,
+    );
+
+    const viewportHeightCss = initialPageDetails.viewportHeight;
+
+    const capturedParts = [];
+    let currentScrollYCss = 0;
+    let capturedHeightPx = 0;
+    let partIndex = 0;
+
+    while (capturedHeightPx < totalHeightPx && partIndex < SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) {
+      this.logInfo(
+        `Capturing part ${partIndex + 1}... (${Math.round((capturedHeightPx / totalHeightPx) * 100)}%)`,
+      );
+
+      if (currentScrollYCss > 0) {
+        // Don't scroll for the first part if already at top
+        const scrollResp = await this.sendMessageToTab(tabId, {
+          action: TOOL_MESSAGE_TYPES.SCREENSHOT_SCROLL_PAGE,
+          x: 0,
+          y: currentScrollYCss,
+          scrollDelay: SCREENSHOT_CONSTANTS.SCROLL_DELAY_MS,
+        });
+        // Update currentScrollYCss based on actual scroll achieved
+        currentScrollYCss = scrollResp.newScrollY;
+      }
+
+      // Ensure rendering after scroll
+      await new Promise((resolve) =>
+        setTimeout(resolve, SCREENSHOT_CONSTANTS.CAPTURE_STITCH_DELAY_MS),
+      );
+
+      const dataUrl = await chrome.tabs.captureVisibleTab({ format: 'png' });
+      if (!dataUrl) throw new Error('captureVisibleTab returned empty during full page capture');
+
+      const yOffsetPx = currentScrollYCss * dpr;
+      capturedParts.push({ dataUrl, y: yOffsetPx });
+
+      const imgForHeight = await createImageBitmapFromUrl(dataUrl); // To get actual captured height
+      const lastPartEffectiveHeightPx = Math.min(imgForHeight.height, totalHeightPx - yOffsetPx);
+
+      capturedHeightPx = yOffsetPx + lastPartEffectiveHeightPx;
+
+      if (capturedHeightPx >= totalHeightPx - SCREENSHOT_CONSTANTS.PIXEL_TOLERANCE) break;
+
+      currentScrollYCss += viewportHeightCss;
+      // Prevent overscrolling past the document height for the next scroll command
+      if (
+        currentScrollYCss > totalHeightCss - viewportHeightCss &&
+        currentScrollYCss < totalHeightCss
+      ) {
+        currentScrollYCss = totalHeightCss - viewportHeightCss;
+      }
+      partIndex++;
+    }
+
+    // Check if we hit any limits
+    if (partIndex >= SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS) {
+      this.logInfo(
+        `Reached maximum number of capture parts (${SCREENSHOT_CONSTANTS.MAX_CAPTURE_PARTS}). This may be an infinite scroll page.`,
+      );
+    }
+    if (totalHeightCss > limitedHeightCss) {
+      this.logInfo(
+        `Page height (${totalHeightCss}px) exceeds maximum capture height (${maxHeightPx / dpr}px). Capturing limited portion.`,
+      );
+    }
+
+    this.logInfo('Stitching image...');
+    const finalCanvas = await stitchImages(capturedParts, totalWidthPx, totalHeightPx);
+
+    // If user specified width but not height (or vice versa for full page), resize maintaining aspect ratio
+    let outputCanvas = finalCanvas;
+    if (options.width && !options.height) {
+      const targetWidthPx = options.width * dpr;
+      const aspectRatio = finalCanvas.height / finalCanvas.width;
+      const targetHeightPx = targetWidthPx * aspectRatio;
+      outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
+      const ctx = outputCanvas.getContext('2d');
+      if (ctx) {
+        ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
+      }
+    } else if (options.height && !options.width) {
+      const targetHeightPx = options.height * dpr;
+      const aspectRatio = finalCanvas.width / finalCanvas.height;
+      const targetWidthPx = targetHeightPx * aspectRatio;
+      outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
+      const ctx = outputCanvas.getContext('2d');
+      if (ctx) {
+        ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
+      }
+    } else if (options.width && options.height) {
+      // Both specified, direct resize
+      const targetWidthPx = options.width * dpr;
+      const targetHeightPx = options.height * dpr;
+      outputCanvas = new OffscreenCanvas(targetWidthPx, targetHeightPx);
+      const ctx = outputCanvas.getContext('2d');
+      if (ctx) {
+        ctx.drawImage(finalCanvas, 0, 0, targetWidthPx, targetHeightPx);
+      }
+    }
+
+    return canvasToDataURL(outputCanvas);
+  }
+}
+
+export const screenshotTool = new ScreenshotTool();