305 lines
11 KiB
Python
305 lines
11 KiB
Python
"""
|
|
Screen Share Handler for LiveKit Agent
|
|
|
|
This module handles screen sharing functionality for the LiveKit Chrome automation agent.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import cv2
|
|
import numpy as np
|
|
from typing import Optional, Tuple
|
|
import platform
|
|
import subprocess
|
|
|
|
from livekit import rtc
|
|
from livekit.rtc._proto import video_frame_pb2 as proto_video
|
|
|
|
|
|
class ScreenShareHandler:
|
|
"""Handles screen sharing and capture for the LiveKit agent"""
|
|
|
|
def __init__(self, config: Optional[dict] = None):
|
|
self.config = config or {}
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
# Screen capture settings
|
|
self.fps = self.config.get('video', {}).get('screen_capture', {}).get('fps', 30)
|
|
self.quality = self.config.get('video', {}).get('screen_capture', {}).get('quality', 'high')
|
|
|
|
# Video settings
|
|
self.width = 1920
|
|
self.height = 1080
|
|
|
|
# State
|
|
self.is_sharing = False
|
|
self.video_source: Optional[rtc.VideoSource] = None
|
|
self.video_track: Optional[rtc.LocalVideoTrack] = None
|
|
self.capture_task: Optional[asyncio.Task] = None
|
|
|
|
# Platform-specific capture method
|
|
self.platform = platform.system().lower()
|
|
|
|
async def initialize(self):
|
|
"""Initialize screen capture"""
|
|
try:
|
|
# Test screen capture capability
|
|
test_frame = await self._capture_screen()
|
|
if test_frame is not None:
|
|
self.logger.info("Screen capture initialized successfully")
|
|
else:
|
|
raise Exception("Failed to capture screen")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to initialize screen capture: {e}")
|
|
raise
|
|
|
|
async def start_sharing(self, room: rtc.Room) -> bool:
|
|
"""Start screen sharing in the room"""
|
|
try:
|
|
if self.is_sharing:
|
|
self.logger.warning("Screen sharing already active")
|
|
return True
|
|
|
|
# Create video source and track
|
|
self.video_source = rtc.VideoSource(self.width, self.height)
|
|
self.video_track = rtc.LocalVideoTrack.create_video_track(
|
|
"screen-share",
|
|
self.video_source
|
|
)
|
|
|
|
# Publish track
|
|
options = rtc.TrackPublishOptions()
|
|
options.source = rtc.TrackSource.SOURCE_SCREENSHARE
|
|
options.video_codec = rtc.VideoCodec.H264
|
|
|
|
await room.local_participant.publish_track(self.video_track, options)
|
|
|
|
# Start capture loop
|
|
self.capture_task = asyncio.create_task(self._capture_loop())
|
|
self.is_sharing = True
|
|
|
|
self.logger.info("Screen sharing started")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to start screen sharing: {e}")
|
|
return False
|
|
|
|
async def stop_sharing(self, room: rtc.Room) -> bool:
|
|
"""Stop screen sharing"""
|
|
try:
|
|
if not self.is_sharing:
|
|
return True
|
|
|
|
# Stop capture loop
|
|
if self.capture_task:
|
|
self.capture_task.cancel()
|
|
try:
|
|
await self.capture_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self.capture_task = None
|
|
|
|
# Unpublish track
|
|
if self.video_track:
|
|
publications = room.local_participant.track_publications
|
|
for pub in publications.values():
|
|
if pub.track == self.video_track:
|
|
await room.local_participant.unpublish_track(pub.sid)
|
|
break
|
|
|
|
self.is_sharing = False
|
|
self.video_source = None
|
|
self.video_track = None
|
|
|
|
self.logger.info("Screen sharing stopped")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to stop screen sharing: {e}")
|
|
return False
|
|
|
|
async def update_screen(self):
|
|
"""Force update screen capture (for immediate feedback)"""
|
|
if self.is_sharing and self.video_source:
|
|
frame = await self._capture_screen()
|
|
if frame is not None:
|
|
self._send_frame(frame)
|
|
|
|
async def _capture_loop(self):
|
|
"""Main capture loop"""
|
|
frame_interval = 1.0 / self.fps
|
|
|
|
try:
|
|
while self.is_sharing:
|
|
start_time = asyncio.get_event_loop().time()
|
|
|
|
# Capture screen
|
|
frame = await self._capture_screen()
|
|
if frame is not None:
|
|
self._send_frame(frame)
|
|
|
|
# Wait for next frame
|
|
elapsed = asyncio.get_event_loop().time() - start_time
|
|
sleep_time = max(0, frame_interval - elapsed)
|
|
await asyncio.sleep(sleep_time)
|
|
|
|
except asyncio.CancelledError:
|
|
self.logger.info("Screen capture loop cancelled")
|
|
except Exception as e:
|
|
self.logger.error(f"Error in capture loop: {e}")
|
|
|
|
async def _capture_screen(self) -> Optional[np.ndarray]:
|
|
"""Capture the screen and return as numpy array"""
|
|
try:
|
|
if self.platform == 'windows':
|
|
return await self._capture_screen_windows()
|
|
elif self.platform == 'darwin': # macOS
|
|
return await self._capture_screen_macos()
|
|
elif self.platform == 'linux':
|
|
return await self._capture_screen_linux()
|
|
else:
|
|
self.logger.error(f"Unsupported platform: {self.platform}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error capturing screen: {e}")
|
|
return None
|
|
|
|
async def _capture_screen_windows(self) -> Optional[np.ndarray]:
|
|
"""Capture screen on Windows"""
|
|
try:
|
|
import pyautogui
|
|
|
|
# Capture screenshot
|
|
screenshot = pyautogui.screenshot()
|
|
|
|
# Convert to numpy array
|
|
frame = np.array(screenshot)
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
|
|
|
# Resize if needed
|
|
if frame.shape[:2] != (self.height, self.width):
|
|
frame = cv2.resize(frame, (self.width, self.height))
|
|
|
|
return frame
|
|
|
|
except ImportError:
|
|
self.logger.error("pyautogui not available for Windows screen capture")
|
|
return None
|
|
except Exception as e:
|
|
self.logger.error(f"Windows screen capture error: {e}")
|
|
return None
|
|
|
|
async def _capture_screen_macos(self) -> Optional[np.ndarray]:
|
|
"""Capture screen on macOS"""
|
|
try:
|
|
# Use screencapture command
|
|
process = await asyncio.create_subprocess_exec(
|
|
'screencapture', '-t', 'png', '-',
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
|
|
stdout, stderr = await process.communicate()
|
|
|
|
if process.returncode == 0:
|
|
# Decode image
|
|
nparr = np.frombuffer(stdout, np.uint8)
|
|
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
|
|
|
# Resize if needed
|
|
if frame.shape[:2] != (self.height, self.width):
|
|
frame = cv2.resize(frame, (self.width, self.height))
|
|
|
|
return frame
|
|
else:
|
|
self.logger.error(f"screencapture failed: {stderr.decode()}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"macOS screen capture error: {e}")
|
|
return None
|
|
|
|
async def _capture_screen_linux(self) -> Optional[np.ndarray]:
|
|
"""Capture screen on Linux"""
|
|
try:
|
|
# Use xwd command
|
|
process = await asyncio.create_subprocess_exec(
|
|
'xwd', '-root', '-out', '/dev/stdout',
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
|
|
stdout, stderr = await process.communicate()
|
|
|
|
if process.returncode == 0:
|
|
# Convert xwd to image (this is simplified)
|
|
# In practice, you might want to use a more robust method
|
|
# or use a different capture method like gnome-screenshot
|
|
|
|
# For now, try with ImageMagick convert
|
|
convert_process = await asyncio.create_subprocess_exec(
|
|
'convert', 'xwd:-', 'png:-',
|
|
input=stdout,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
|
|
png_data, _ = await convert_process.communicate()
|
|
|
|
if convert_process.returncode == 0:
|
|
nparr = np.frombuffer(png_data, np.uint8)
|
|
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
|
|
|
# Resize if needed
|
|
if frame.shape[:2] != (self.height, self.width):
|
|
frame = cv2.resize(frame, (self.width, self.height))
|
|
|
|
return frame
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Linux screen capture error: {e}")
|
|
return None
|
|
|
|
def _send_frame(self, frame: np.ndarray):
|
|
"""Send frame to video source"""
|
|
try:
|
|
if not self.video_source:
|
|
return
|
|
|
|
# Convert BGR to RGB
|
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
|
|
# Create video frame
|
|
video_frame = rtc.VideoFrame(
|
|
width=self.width,
|
|
height=self.height,
|
|
type=proto_video.VideoBufferType.RGB24,
|
|
data=rgb_frame.tobytes()
|
|
)
|
|
|
|
# Send frame (capture_frame is synchronous, not async)
|
|
self.video_source.capture_frame(video_frame)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error sending frame: {e}")
|
|
|
|
def set_quality(self, quality: str):
|
|
"""Set video quality (high, medium, low)"""
|
|
self.quality = quality
|
|
|
|
if quality == 'high':
|
|
self.width, self.height = 1920, 1080
|
|
elif quality == 'medium':
|
|
self.width, self.height = 1280, 720
|
|
elif quality == 'low':
|
|
self.width, self.height = 854, 480
|
|
|
|
def set_fps(self, fps: int):
|
|
"""Set capture frame rate"""
|
|
self.fps = max(1, min(60, fps)) # Clamp between 1-60 FPS
|