broswer-automation/agent-livekit/screen_share.py

"""
Screen Share Handler for LiveKit Agent

This module handles screen sharing functionality for the LiveKit Chrome automation agent.
"""

import asyncio
import logging
import cv2
import numpy as np
from typing import Optional, Tuple
import platform
import subprocess

from livekit import rtc
from livekit.rtc._proto import video_frame_pb2 as proto_video


class ScreenShareHandler:
    """Handles screen sharing and capture for the LiveKit agent"""

    def __init__(self, config: Optional[dict] = None):
        self.config = config or {}
        self.logger = logging.getLogger(__name__)

        # Screen capture settings
        self.fps = self.config.get('video', {}).get('screen_capture', {}).get('fps', 30)
        self.quality = self.config.get('video', {}).get('screen_capture', {}).get('quality', 'high')

        # Video settings
        self.width = 1920
        self.height = 1080

        # State
        self.is_sharing = False
        self.video_source: Optional[rtc.VideoSource] = None
        self.video_track: Optional[rtc.LocalVideoTrack] = None
        self.capture_task: Optional[asyncio.Task] = None

        # Platform-specific capture method
        self.platform = platform.system().lower()

    async def initialize(self):
        """Initialize screen capture"""
        try:
            # Test screen capture capability
            test_frame = await self._capture_screen()
            if test_frame is not None:
                self.logger.info("Screen capture initialized successfully")
            else:
                raise Exception("Failed to capture screen")

        except Exception as e:
            self.logger.error(f"Failed to initialize screen capture: {e}")
            raise

    async def start_sharing(self, room: rtc.Room) -> bool:
        """Start screen sharing in the room"""
        try:
            if self.is_sharing:
                self.logger.warning("Screen sharing already active")
                return True

            # Create video source and track
            self.video_source = rtc.VideoSource(self.width, self.height)
            self.video_track = rtc.LocalVideoTrack.create_video_track(
                "screen-share",
                self.video_source
            )

            # Publish track
            options = rtc.TrackPublishOptions()
            options.source = rtc.TrackSource.SOURCE_SCREENSHARE
            options.video_codec = rtc.VideoCodec.H264

            await room.local_participant.publish_track(self.video_track, options)

            # Start capture loop
            self.capture_task = asyncio.create_task(self._capture_loop())
            self.is_sharing = True

            self.logger.info("Screen sharing started")
            return True

        except Exception as e:
            self.logger.error(f"Failed to start screen sharing: {e}")
            return False

    async def stop_sharing(self, room: rtc.Room) -> bool:
        """Stop screen sharing"""
        try:
            if not self.is_sharing:
                return True

            # Stop capture loop
            if self.capture_task:
                self.capture_task.cancel()
                try:
                    await self.capture_task
                except asyncio.CancelledError:
                    pass
                self.capture_task = None

            # Unpublish track
            if self.video_track:
                publications = room.local_participant.track_publications
                for pub in publications.values():
                    if pub.track == self.video_track:
                        await room.local_participant.unpublish_track(pub.sid)
                        break

            self.is_sharing = False
            self.video_source = None
            self.video_track = None

            self.logger.info("Screen sharing stopped")
            return True

        except Exception as e:
            self.logger.error(f"Failed to stop screen sharing: {e}")
            return False

    async def update_screen(self):
        """Force update screen capture (for immediate feedback)"""
        if self.is_sharing and self.video_source:
            frame = await self._capture_screen()
            if frame is not None:
                self._send_frame(frame)

    async def _capture_loop(self):
        """Main capture loop"""
        frame_interval = 1.0 / self.fps

        try:
            while self.is_sharing:
                start_time = asyncio.get_event_loop().time()

                # Capture screen
                frame = await self._capture_screen()
                if frame is not None:
                    self._send_frame(frame)

                # Wait for next frame
                elapsed = asyncio.get_event_loop().time() - start_time
                sleep_time = max(0, frame_interval - elapsed)
                await asyncio.sleep(sleep_time)

        except asyncio.CancelledError:
            self.logger.info("Screen capture loop cancelled")
        except Exception as e:
            self.logger.error(f"Error in capture loop: {e}")

    async def _capture_screen(self) -> Optional[np.ndarray]:
        """Capture the screen and return as numpy array"""
        try:
            if self.platform == 'windows':
                return await self._capture_screen_windows()
            elif self.platform == 'darwin':  # macOS
                return await self._capture_screen_macos()
            elif self.platform == 'linux':
                return await self._capture_screen_linux()
            else:
                self.logger.error(f"Unsupported platform: {self.platform}")
                return None

        except Exception as e:
            self.logger.error(f"Error capturing screen: {e}")
            return None

    async def _capture_screen_windows(self) -> Optional[np.ndarray]:
        """Capture screen on Windows"""
        try:
            import pyautogui

            # Capture screenshot
            screenshot = pyautogui.screenshot()

            # Convert to numpy array
            frame = np.array(screenshot)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            # Resize if needed
            if frame.shape[:2] != (self.height, self.width):
                frame = cv2.resize(frame, (self.width, self.height))

            return frame

        except ImportError:
            self.logger.error("pyautogui not available for Windows screen capture")
            return None
        except Exception as e:
            self.logger.error(f"Windows screen capture error: {e}")
            return None

    async def _capture_screen_macos(self) -> Optional[np.ndarray]:
        """Capture screen on macOS"""
        try:
            # Use screencapture command
            process = await asyncio.create_subprocess_exec(
                'screencapture', '-t', 'png', '-',
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )

            stdout, stderr = await process.communicate()

            if process.returncode == 0:
                # Decode image
                nparr = np.frombuffer(stdout, np.uint8)
                frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

                # Resize if needed
                if frame.shape[:2] != (self.height, self.width):
                    frame = cv2.resize(frame, (self.width, self.height))

                return frame
            else:
                self.logger.error(f"screencapture failed: {stderr.decode()}")
                return None

        except Exception as e:
            self.logger.error(f"macOS screen capture error: {e}")
            return None

    async def _capture_screen_linux(self) -> Optional[np.ndarray]:
        """Capture screen on Linux"""
        try:
            # Use xwd command
            process = await asyncio.create_subprocess_exec(
                'xwd', '-root', '-out', '/dev/stdout',
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )

            stdout, stderr = await process.communicate()

            if process.returncode == 0:
                # Convert xwd to image (this is simplified)
                # In practice, you might want to use a more robust method
                # or use a different capture method like gnome-screenshot

                # For now, try with ImageMagick convert
                convert_process = await asyncio.create_subprocess_exec(
                    'convert', 'xwd:-', 'png:-',
                    input=stdout,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE
                )

                png_data, _ = await convert_process.communicate()

                if convert_process.returncode == 0:
                    nparr = np.frombuffer(png_data, np.uint8)
                    frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

                    # Resize if needed
                    if frame.shape[:2] != (self.height, self.width):
                        frame = cv2.resize(frame, (self.width, self.height))

                    return frame

            return None

        except Exception as e:
            self.logger.error(f"Linux screen capture error: {e}")
            return None

    def _send_frame(self, frame: np.ndarray):
        """Send frame to video source"""
        try:
            if not self.video_source:
                return

            # Convert BGR to RGB
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Create video frame
            video_frame = rtc.VideoFrame(
                width=self.width,
                height=self.height,
                type=proto_video.VideoBufferType.RGB24,
                data=rgb_frame.tobytes()
            )

            # Send frame (capture_frame is synchronous, not async)
            self.video_source.capture_frame(video_frame)

        except Exception as e:
            self.logger.error(f"Error sending frame: {e}")

    def set_quality(self, quality: str):
        """Set video quality (high, medium, low)"""
        self.quality = quality

        if quality == 'high':
            self.width, self.height = 1920, 1080
        elif quality == 'medium':
            self.width, self.height = 1280, 720
        elif quality == 'low':
            self.width, self.height = 854, 480

    def set_fps(self, fps: int):
        """Set capture frame rate"""
        self.fps = max(1, min(60, fps))  # Clamp between 1-60 FPS