broswer-automation/agent-livekit/livekit_config.yaml

# LiveKit Server Configuration
livekit:
  # LiveKit server URL (replace with your LiveKit server)
  url: '${LIVEKIT_URL}'

  # API credentials (set these as environment variables for security)
  api_key: '${LIVEKIT_API_KEY}'
  api_secret: '${LIVEKIT_API_SECRET}'

  # Default room settings
  room:
    name: 'mcp-chrome-agent'
    max_participants: 10
    empty_timeout: 300 # seconds
    max_duration: 3600 # seconds

  # Agent settings
  agent:
    name: 'Chrome Automation Agent'
    identity: 'chrome-agent'
    metadata:
      type: 'automation'
      capabilities: ['chrome', 'screen_share', 'voice']

# Audio settings
audio:
  # Input audio settings
  input:
    sample_rate: 16000
    channels: 1
    format: 'pcm'

  # Output audio settings
  output:
    sample_rate: 48000
    channels: 2
    format: 'pcm'

  # Voice activity detection
  vad:
    enabled: true
    threshold: 0.5

# Video settings
video:
  # Screen capture settings
  screen_capture:
    enabled: true
    fps: 30
    quality: 'high'

  # Camera settings
  camera:
    enabled: false
    resolution: '1280x720'
    fps: 30

# Speech recognition
speech:
  # Provider: "openai", "deepgram", "google", "azure"
  provider: 'openai'

  # Language settings
  language: 'en-US'

  # Real-time transcription
  real_time: true

  # Confidence threshold
  confidence_threshold: 0.7

# Text-to-speech
tts:
  # Provider: "openai", "elevenlabs", "azure", "google"
  provider: 'openai'

  # Voice settings
  voice: 'alloy'
  speed: 1.0

# Chrome automation integration
chrome:
  # MCP server connection - using streamable-HTTP for chrome-http
  mcp_server_type: 'http'
  mcp_server_url: '${MCP_SERVER_URL}'
  mcp_server_command: null
  mcp_server_args: []

  # Default browser profile
  browser_profile: 'debug'

  # Automation settings
  automation:
    screenshot_on_action: true
    highlight_elements: true
    action_delay: 1.0