Files
broswer-automation/agent-livekit/screen_share.py
nasir@endelospay.com d97cad1736 first commit
2025-08-12 02:54:17 +05:00

305 lines
11 KiB
Python

"""
Screen Share Handler for LiveKit Agent
This module handles screen sharing functionality for the LiveKit Chrome automation agent.
"""
import asyncio
import logging
import cv2
import numpy as np
from typing import Optional, Tuple
import platform
import subprocess
from livekit import rtc
from livekit.rtc._proto import video_frame_pb2 as proto_video
class ScreenShareHandler:
"""Handles screen sharing and capture for the LiveKit agent"""
def __init__(self, config: Optional[dict] = None):
self.config = config or {}
self.logger = logging.getLogger(__name__)
# Screen capture settings
self.fps = self.config.get('video', {}).get('screen_capture', {}).get('fps', 30)
self.quality = self.config.get('video', {}).get('screen_capture', {}).get('quality', 'high')
# Video settings
self.width = 1920
self.height = 1080
# State
self.is_sharing = False
self.video_source: Optional[rtc.VideoSource] = None
self.video_track: Optional[rtc.LocalVideoTrack] = None
self.capture_task: Optional[asyncio.Task] = None
# Platform-specific capture method
self.platform = platform.system().lower()
async def initialize(self):
"""Initialize screen capture"""
try:
# Test screen capture capability
test_frame = await self._capture_screen()
if test_frame is not None:
self.logger.info("Screen capture initialized successfully")
else:
raise Exception("Failed to capture screen")
except Exception as e:
self.logger.error(f"Failed to initialize screen capture: {e}")
raise
async def start_sharing(self, room: rtc.Room) -> bool:
"""Start screen sharing in the room"""
try:
if self.is_sharing:
self.logger.warning("Screen sharing already active")
return True
# Create video source and track
self.video_source = rtc.VideoSource(self.width, self.height)
self.video_track = rtc.LocalVideoTrack.create_video_track(
"screen-share",
self.video_source
)
# Publish track
options = rtc.TrackPublishOptions()
options.source = rtc.TrackSource.SOURCE_SCREENSHARE
options.video_codec = rtc.VideoCodec.H264
await room.local_participant.publish_track(self.video_track, options)
# Start capture loop
self.capture_task = asyncio.create_task(self._capture_loop())
self.is_sharing = True
self.logger.info("Screen sharing started")
return True
except Exception as e:
self.logger.error(f"Failed to start screen sharing: {e}")
return False
async def stop_sharing(self, room: rtc.Room) -> bool:
"""Stop screen sharing"""
try:
if not self.is_sharing:
return True
# Stop capture loop
if self.capture_task:
self.capture_task.cancel()
try:
await self.capture_task
except asyncio.CancelledError:
pass
self.capture_task = None
# Unpublish track
if self.video_track:
publications = room.local_participant.track_publications
for pub in publications.values():
if pub.track == self.video_track:
await room.local_participant.unpublish_track(pub.sid)
break
self.is_sharing = False
self.video_source = None
self.video_track = None
self.logger.info("Screen sharing stopped")
return True
except Exception as e:
self.logger.error(f"Failed to stop screen sharing: {e}")
return False
async def update_screen(self):
"""Force update screen capture (for immediate feedback)"""
if self.is_sharing and self.video_source:
frame = await self._capture_screen()
if frame is not None:
self._send_frame(frame)
async def _capture_loop(self):
"""Main capture loop"""
frame_interval = 1.0 / self.fps
try:
while self.is_sharing:
start_time = asyncio.get_event_loop().time()
# Capture screen
frame = await self._capture_screen()
if frame is not None:
self._send_frame(frame)
# Wait for next frame
elapsed = asyncio.get_event_loop().time() - start_time
sleep_time = max(0, frame_interval - elapsed)
await asyncio.sleep(sleep_time)
except asyncio.CancelledError:
self.logger.info("Screen capture loop cancelled")
except Exception as e:
self.logger.error(f"Error in capture loop: {e}")
async def _capture_screen(self) -> Optional[np.ndarray]:
"""Capture the screen and return as numpy array"""
try:
if self.platform == 'windows':
return await self._capture_screen_windows()
elif self.platform == 'darwin': # macOS
return await self._capture_screen_macos()
elif self.platform == 'linux':
return await self._capture_screen_linux()
else:
self.logger.error(f"Unsupported platform: {self.platform}")
return None
except Exception as e:
self.logger.error(f"Error capturing screen: {e}")
return None
async def _capture_screen_windows(self) -> Optional[np.ndarray]:
"""Capture screen on Windows"""
try:
import pyautogui
# Capture screenshot
screenshot = pyautogui.screenshot()
# Convert to numpy array
frame = np.array(screenshot)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# Resize if needed
if frame.shape[:2] != (self.height, self.width):
frame = cv2.resize(frame, (self.width, self.height))
return frame
except ImportError:
self.logger.error("pyautogui not available for Windows screen capture")
return None
except Exception as e:
self.logger.error(f"Windows screen capture error: {e}")
return None
async def _capture_screen_macos(self) -> Optional[np.ndarray]:
"""Capture screen on macOS"""
try:
# Use screencapture command
process = await asyncio.create_subprocess_exec(
'screencapture', '-t', 'png', '-',
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
# Decode image
nparr = np.frombuffer(stdout, np.uint8)
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# Resize if needed
if frame.shape[:2] != (self.height, self.width):
frame = cv2.resize(frame, (self.width, self.height))
return frame
else:
self.logger.error(f"screencapture failed: {stderr.decode()}")
return None
except Exception as e:
self.logger.error(f"macOS screen capture error: {e}")
return None
async def _capture_screen_linux(self) -> Optional[np.ndarray]:
"""Capture screen on Linux"""
try:
# Use xwd command
process = await asyncio.create_subprocess_exec(
'xwd', '-root', '-out', '/dev/stdout',
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
# Convert xwd to image (this is simplified)
# In practice, you might want to use a more robust method
# or use a different capture method like gnome-screenshot
# For now, try with ImageMagick convert
convert_process = await asyncio.create_subprocess_exec(
'convert', 'xwd:-', 'png:-',
input=stdout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
png_data, _ = await convert_process.communicate()
if convert_process.returncode == 0:
nparr = np.frombuffer(png_data, np.uint8)
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# Resize if needed
if frame.shape[:2] != (self.height, self.width):
frame = cv2.resize(frame, (self.width, self.height))
return frame
return None
except Exception as e:
self.logger.error(f"Linux screen capture error: {e}")
return None
def _send_frame(self, frame: np.ndarray):
"""Send frame to video source"""
try:
if not self.video_source:
return
# Convert BGR to RGB
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Create video frame
video_frame = rtc.VideoFrame(
width=self.width,
height=self.height,
type=proto_video.VideoBufferType.RGB24,
data=rgb_frame.tobytes()
)
# Send frame (capture_frame is synchronous, not async)
self.video_source.capture_frame(video_frame)
except Exception as e:
self.logger.error(f"Error sending frame: {e}")
def set_quality(self, quality: str):
"""Set video quality (high, medium, low)"""
self.quality = quality
if quality == 'high':
self.width, self.height = 1920, 1080
elif quality == 'medium':
self.width, self.height = 1280, 720
elif quality == 'low':
self.width, self.height = 854, 480
def set_fps(self, fps: int):
"""Set capture frame rate"""
self.fps = max(1, min(60, fps)) # Clamp between 1-60 FPS