""" Screen Share Handler for LiveKit Agent This module handles screen sharing functionality for the LiveKit Chrome automation agent. """ import asyncio import logging import cv2 import numpy as np from typing import Optional, Tuple import platform import subprocess from livekit import rtc from livekit.rtc._proto import video_frame_pb2 as proto_video class ScreenShareHandler: """Handles screen sharing and capture for the LiveKit agent""" def __init__(self, config: Optional[dict] = None): self.config = config or {} self.logger = logging.getLogger(__name__) # Screen capture settings self.fps = self.config.get('video', {}).get('screen_capture', {}).get('fps', 30) self.quality = self.config.get('video', {}).get('screen_capture', {}).get('quality', 'high') # Video settings self.width = 1920 self.height = 1080 # State self.is_sharing = False self.video_source: Optional[rtc.VideoSource] = None self.video_track: Optional[rtc.LocalVideoTrack] = None self.capture_task: Optional[asyncio.Task] = None # Platform-specific capture method self.platform = platform.system().lower() async def initialize(self): """Initialize screen capture""" try: # Test screen capture capability test_frame = await self._capture_screen() if test_frame is not None: self.logger.info("Screen capture initialized successfully") else: raise Exception("Failed to capture screen") except Exception as e: self.logger.error(f"Failed to initialize screen capture: {e}") raise async def start_sharing(self, room: rtc.Room) -> bool: """Start screen sharing in the room""" try: if self.is_sharing: self.logger.warning("Screen sharing already active") return True # Create video source and track self.video_source = rtc.VideoSource(self.width, self.height) self.video_track = rtc.LocalVideoTrack.create_video_track( "screen-share", self.video_source ) # Publish track options = rtc.TrackPublishOptions() options.source = rtc.TrackSource.SOURCE_SCREENSHARE options.video_codec = rtc.VideoCodec.H264 await room.local_participant.publish_track(self.video_track, options) # Start capture loop self.capture_task = asyncio.create_task(self._capture_loop()) self.is_sharing = True self.logger.info("Screen sharing started") return True except Exception as e: self.logger.error(f"Failed to start screen sharing: {e}") return False async def stop_sharing(self, room: rtc.Room) -> bool: """Stop screen sharing""" try: if not self.is_sharing: return True # Stop capture loop if self.capture_task: self.capture_task.cancel() try: await self.capture_task except asyncio.CancelledError: pass self.capture_task = None # Unpublish track if self.video_track: publications = room.local_participant.track_publications for pub in publications.values(): if pub.track == self.video_track: await room.local_participant.unpublish_track(pub.sid) break self.is_sharing = False self.video_source = None self.video_track = None self.logger.info("Screen sharing stopped") return True except Exception as e: self.logger.error(f"Failed to stop screen sharing: {e}") return False async def update_screen(self): """Force update screen capture (for immediate feedback)""" if self.is_sharing and self.video_source: frame = await self._capture_screen() if frame is not None: self._send_frame(frame) async def _capture_loop(self): """Main capture loop""" frame_interval = 1.0 / self.fps try: while self.is_sharing: start_time = asyncio.get_event_loop().time() # Capture screen frame = await self._capture_screen() if frame is not None: self._send_frame(frame) # Wait for next frame elapsed = asyncio.get_event_loop().time() - start_time sleep_time = max(0, frame_interval - elapsed) await asyncio.sleep(sleep_time) except asyncio.CancelledError: self.logger.info("Screen capture loop cancelled") except Exception as e: self.logger.error(f"Error in capture loop: {e}") async def _capture_screen(self) -> Optional[np.ndarray]: """Capture the screen and return as numpy array""" try: if self.platform == 'windows': return await self._capture_screen_windows() elif self.platform == 'darwin': # macOS return await self._capture_screen_macos() elif self.platform == 'linux': return await self._capture_screen_linux() else: self.logger.error(f"Unsupported platform: {self.platform}") return None except Exception as e: self.logger.error(f"Error capturing screen: {e}") return None async def _capture_screen_windows(self) -> Optional[np.ndarray]: """Capture screen on Windows""" try: import pyautogui # Capture screenshot screenshot = pyautogui.screenshot() # Convert to numpy array frame = np.array(screenshot) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Resize if needed if frame.shape[:2] != (self.height, self.width): frame = cv2.resize(frame, (self.width, self.height)) return frame except ImportError: self.logger.error("pyautogui not available for Windows screen capture") return None except Exception as e: self.logger.error(f"Windows screen capture error: {e}") return None async def _capture_screen_macos(self) -> Optional[np.ndarray]: """Capture screen on macOS""" try: # Use screencapture command process = await asyncio.create_subprocess_exec( 'screencapture', '-t', 'png', '-', stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = await process.communicate() if process.returncode == 0: # Decode image nparr = np.frombuffer(stdout, np.uint8) frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # Resize if needed if frame.shape[:2] != (self.height, self.width): frame = cv2.resize(frame, (self.width, self.height)) return frame else: self.logger.error(f"screencapture failed: {stderr.decode()}") return None except Exception as e: self.logger.error(f"macOS screen capture error: {e}") return None async def _capture_screen_linux(self) -> Optional[np.ndarray]: """Capture screen on Linux""" try: # Use xwd command process = await asyncio.create_subprocess_exec( 'xwd', '-root', '-out', '/dev/stdout', stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = await process.communicate() if process.returncode == 0: # Convert xwd to image (this is simplified) # In practice, you might want to use a more robust method # or use a different capture method like gnome-screenshot # For now, try with ImageMagick convert convert_process = await asyncio.create_subprocess_exec( 'convert', 'xwd:-', 'png:-', input=stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) png_data, _ = await convert_process.communicate() if convert_process.returncode == 0: nparr = np.frombuffer(png_data, np.uint8) frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # Resize if needed if frame.shape[:2] != (self.height, self.width): frame = cv2.resize(frame, (self.width, self.height)) return frame return None except Exception as e: self.logger.error(f"Linux screen capture error: {e}") return None def _send_frame(self, frame: np.ndarray): """Send frame to video source""" try: if not self.video_source: return # Convert BGR to RGB rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Create video frame video_frame = rtc.VideoFrame( width=self.width, height=self.height, type=proto_video.VideoBufferType.RGB24, data=rgb_frame.tobytes() ) # Send frame (capture_frame is synchronous, not async) self.video_source.capture_frame(video_frame) except Exception as e: self.logger.error(f"Error sending frame: {e}") def set_quality(self, quality: str): """Set video quality (high, medium, low)""" self.quality = quality if quality == 'high': self.width, self.height = 1920, 1080 elif quality == 'medium': self.width, self.height = 1280, 720 elif quality == 'low': self.width, self.height = 854, 480 def set_fps(self, fps: int): """Set capture frame rate""" self.fps = max(1, min(60, fps)) # Clamp between 1-60 FPS