first commit

2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions
--- a/agent-livekit/screen_share.py
+++ b/agent-livekit/screen_share.py
@@ -0,0 +1,304 @@
+"""
+Screen Share Handler for LiveKit Agent
+
+This module handles screen sharing functionality for the LiveKit Chrome automation agent.
+"""
+
+import asyncio
+import logging
+import cv2
+import numpy as np
+from typing import Optional, Tuple
+import platform
+import subprocess
+
+from livekit import rtc
+from livekit.rtc._proto import video_frame_pb2 as proto_video
+
+
+class ScreenShareHandler:
+    """Handles screen sharing and capture for the LiveKit agent"""
+    
+    def __init__(self, config: Optional[dict] = None):
+        self.config = config or {}
+        self.logger = logging.getLogger(__name__)
+        
+        # Screen capture settings
+        self.fps = self.config.get('video', {}).get('screen_capture', {}).get('fps', 30)
+        self.quality = self.config.get('video', {}).get('screen_capture', {}).get('quality', 'high')
+        
+        # Video settings
+        self.width = 1920
+        self.height = 1080
+        
+        # State
+        self.is_sharing = False
+        self.video_source: Optional[rtc.VideoSource] = None
+        self.video_track: Optional[rtc.LocalVideoTrack] = None
+        self.capture_task: Optional[asyncio.Task] = None
+        
+        # Platform-specific capture method
+        self.platform = platform.system().lower()
+        
+    async def initialize(self):
+        """Initialize screen capture"""
+        try:
+            # Test screen capture capability
+            test_frame = await self._capture_screen()
+            if test_frame is not None:
+                self.logger.info("Screen capture initialized successfully")
+            else:
+                raise Exception("Failed to capture screen")
+                
+        except Exception as e:
+            self.logger.error(f"Failed to initialize screen capture: {e}")
+            raise
+    
+    async def start_sharing(self, room: rtc.Room) -> bool:
+        """Start screen sharing in the room"""
+        try:
+            if self.is_sharing:
+                self.logger.warning("Screen sharing already active")
+                return True
+            
+            # Create video source and track
+            self.video_source = rtc.VideoSource(self.width, self.height)
+            self.video_track = rtc.LocalVideoTrack.create_video_track(
+                "screen-share", 
+                self.video_source
+            )
+            
+            # Publish track
+            options = rtc.TrackPublishOptions()
+            options.source = rtc.TrackSource.SOURCE_SCREENSHARE
+            options.video_codec = rtc.VideoCodec.H264
+            
+            await room.local_participant.publish_track(self.video_track, options)
+            
+            # Start capture loop
+            self.capture_task = asyncio.create_task(self._capture_loop())
+            self.is_sharing = True
+            
+            self.logger.info("Screen sharing started")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Failed to start screen sharing: {e}")
+            return False
+    
+    async def stop_sharing(self, room: rtc.Room) -> bool:
+        """Stop screen sharing"""
+        try:
+            if not self.is_sharing:
+                return True
+            
+            # Stop capture loop
+            if self.capture_task:
+                self.capture_task.cancel()
+                try:
+                    await self.capture_task
+                except asyncio.CancelledError:
+                    pass
+                self.capture_task = None
+            
+            # Unpublish track
+            if self.video_track:
+                publications = room.local_participant.track_publications
+                for pub in publications.values():
+                    if pub.track == self.video_track:
+                        await room.local_participant.unpublish_track(pub.sid)
+                        break
+            
+            self.is_sharing = False
+            self.video_source = None
+            self.video_track = None
+            
+            self.logger.info("Screen sharing stopped")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Failed to stop screen sharing: {e}")
+            return False
+    
+    async def update_screen(self):
+        """Force update screen capture (for immediate feedback)"""
+        if self.is_sharing and self.video_source:
+            frame = await self._capture_screen()
+            if frame is not None:
+                self._send_frame(frame)
+    
+    async def _capture_loop(self):
+        """Main capture loop"""
+        frame_interval = 1.0 / self.fps
+        
+        try:
+            while self.is_sharing:
+                start_time = asyncio.get_event_loop().time()
+                
+                # Capture screen
+                frame = await self._capture_screen()
+                if frame is not None:
+                    self._send_frame(frame)
+                
+                # Wait for next frame
+                elapsed = asyncio.get_event_loop().time() - start_time
+                sleep_time = max(0, frame_interval - elapsed)
+                await asyncio.sleep(sleep_time)
+                
+        except asyncio.CancelledError:
+            self.logger.info("Screen capture loop cancelled")
+        except Exception as e:
+            self.logger.error(f"Error in capture loop: {e}")
+    
+    async def _capture_screen(self) -> Optional[np.ndarray]:
+        """Capture the screen and return as numpy array"""
+        try:
+            if self.platform == 'windows':
+                return await self._capture_screen_windows()
+            elif self.platform == 'darwin':  # macOS
+                return await self._capture_screen_macos()
+            elif self.platform == 'linux':
+                return await self._capture_screen_linux()
+            else:
+                self.logger.error(f"Unsupported platform: {self.platform}")
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"Error capturing screen: {e}")
+            return None
+    
+    async def _capture_screen_windows(self) -> Optional[np.ndarray]:
+        """Capture screen on Windows"""
+        try:
+            import pyautogui
+            
+            # Capture screenshot
+            screenshot = pyautogui.screenshot()
+            
+            # Convert to numpy array
+            frame = np.array(screenshot)
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            
+            # Resize if needed
+            if frame.shape[:2] != (self.height, self.width):
+                frame = cv2.resize(frame, (self.width, self.height))
+            
+            return frame
+            
+        except ImportError:
+            self.logger.error("pyautogui not available for Windows screen capture")
+            return None
+        except Exception as e:
+            self.logger.error(f"Windows screen capture error: {e}")
+            return None
+    
+    async def _capture_screen_macos(self) -> Optional[np.ndarray]:
+        """Capture screen on macOS"""
+        try:
+            # Use screencapture command
+            process = await asyncio.create_subprocess_exec(
+                'screencapture', '-t', 'png', '-',
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            
+            stdout, stderr = await process.communicate()
+            
+            if process.returncode == 0:
+                # Decode image
+                nparr = np.frombuffer(stdout, np.uint8)
+                frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                
+                # Resize if needed
+                if frame.shape[:2] != (self.height, self.width):
+                    frame = cv2.resize(frame, (self.width, self.height))
+                
+                return frame
+            else:
+                self.logger.error(f"screencapture failed: {stderr.decode()}")
+                return None
+                
+        except Exception as e:
+            self.logger.error(f"macOS screen capture error: {e}")
+            return None
+    
+    async def _capture_screen_linux(self) -> Optional[np.ndarray]:
+        """Capture screen on Linux"""
+        try:
+            # Use xwd command
+            process = await asyncio.create_subprocess_exec(
+                'xwd', '-root', '-out', '/dev/stdout',
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            
+            stdout, stderr = await process.communicate()
+            
+            if process.returncode == 0:
+                # Convert xwd to image (this is simplified)
+                # In practice, you might want to use a more robust method
+                # or use a different capture method like gnome-screenshot
+                
+                # For now, try with ImageMagick convert
+                convert_process = await asyncio.create_subprocess_exec(
+                    'convert', 'xwd:-', 'png:-',
+                    input=stdout,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE
+                )
+                
+                png_data, _ = await convert_process.communicate()
+                
+                if convert_process.returncode == 0:
+                    nparr = np.frombuffer(png_data, np.uint8)
+                    frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                    
+                    # Resize if needed
+                    if frame.shape[:2] != (self.height, self.width):
+                        frame = cv2.resize(frame, (self.width, self.height))
+                    
+                    return frame
+                    
+            return None
+            
+        except Exception as e:
+            self.logger.error(f"Linux screen capture error: {e}")
+            return None
+    
+    def _send_frame(self, frame: np.ndarray):
+        """Send frame to video source"""
+        try:
+            if not self.video_source:
+                return
+
+            # Convert BGR to RGB
+            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+            # Create video frame
+            video_frame = rtc.VideoFrame(
+                width=self.width,
+                height=self.height,
+                type=proto_video.VideoBufferType.RGB24,
+                data=rgb_frame.tobytes()
+            )
+
+            # Send frame (capture_frame is synchronous, not async)
+            self.video_source.capture_frame(video_frame)
+
+        except Exception as e:
+            self.logger.error(f"Error sending frame: {e}")
+    
+    def set_quality(self, quality: str):
+        """Set video quality (high, medium, low)"""
+        self.quality = quality
+        
+        if quality == 'high':
+            self.width, self.height = 1920, 1080
+        elif quality == 'medium':
+            self.width, self.height = 1280, 720
+        elif quality == 'low':
+            self.width, self.height = 854, 480
+    
+    def set_fps(self, fps: int):
+        """Set capture frame rate"""
+        self.fps = max(1, min(60, fps))  # Clamp between 1-60 FPS