Files
broswer-automation/agent-livekit/demo_enhanced_voice_commands.py
nasir@endelospay.com d97cad1736 first commit
2025-08-12 02:54:17 +05:00

293 lines
9.4 KiB
Python

#!/usr/bin/env python3
"""
Demo script for Enhanced LiveKit Voice Agent
This script demonstrates the enhanced voice command capabilities
with real-time Chrome MCP integration.
"""
import asyncio
import logging
import sys
import os
from pathlib import Path
# Add current directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from mcp_chrome_client import MCPChromeClient
class VoiceCommandDemo:
"""Demo class for enhanced voice command capabilities"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self.mcp_client = None
async def setup(self):
"""Set up demo environment"""
try:
# Initialize MCP client
chrome_config = {
'mcp_server_type': 'http',
'mcp_server_url': 'http://127.0.0.1:12306/mcp',
'mcp_server_command': None,
'mcp_server_args': []
}
self.mcp_client = MCPChromeClient(chrome_config)
await self.mcp_client.connect()
self.logger.info("Demo environment set up successfully")
return True
except Exception as e:
self.logger.error(f"Failed to set up demo environment: {e}")
return False
async def demo_form_filling(self):
"""Demonstrate enhanced form filling capabilities"""
print("\n🔤 FORM FILLING DEMO")
print("=" * 50)
# Navigate to Google for demo
await self.mcp_client._navigate_mcp("https://www.google.com")
await asyncio.sleep(2)
form_commands = [
"search for python tutorials",
"type machine learning in search",
"fill search with artificial intelligence"
]
for command in form_commands:
print(f"\n🗣️ Voice Command: '{command}'")
try:
result = await self.mcp_client.process_natural_language_command(command)
print(f"✅ Result: {result}")
await asyncio.sleep(1)
except Exception as e:
print(f"❌ Error: {e}")
async def demo_smart_clicking(self):
"""Demonstrate smart clicking capabilities"""
print("\n🖱️ SMART CLICKING DEMO")
print("=" * 50)
click_commands = [
"click Google Search",
"press I'm Feeling Lucky",
"click search button"
]
for command in click_commands:
print(f"\n🗣️ Voice Command: '{command}'")
try:
result = await self.mcp_client.process_natural_language_command(command)
print(f"✅ Result: {result}")
await asyncio.sleep(1)
except Exception as e:
print(f"❌ Error: {e}")
async def demo_content_retrieval(self):
"""Demonstrate content retrieval capabilities"""
print("\n📄 CONTENT RETRIEVAL DEMO")
print("=" * 50)
content_commands = [
"what's on this page",
"show me form fields",
"what can I click",
"get interactive elements"
]
for command in content_commands:
print(f"\n🗣️ Voice Command: '{command}'")
try:
result = await self.mcp_client.process_natural_language_command(command)
# Truncate long results for demo
display_result = result[:200] + "..." if len(result) > 200 else result
print(f"✅ Result: {display_result}")
await asyncio.sleep(1)
except Exception as e:
print(f"❌ Error: {e}")
async def demo_navigation(self):
"""Demonstrate navigation capabilities"""
print("\n🧭 NAVIGATION DEMO")
print("=" * 50)
nav_commands = [
"go to google",
"navigate to facebook",
"open twitter"
]
for command in nav_commands:
print(f"\n🗣️ Voice Command: '{command}'")
try:
result = await self.mcp_client.process_natural_language_command(command)
print(f"✅ Result: {result}")
await asyncio.sleep(2) # Wait for navigation
except Exception as e:
print(f"❌ Error: {e}")
async def demo_advanced_parsing(self):
"""Demonstrate advanced command parsing"""
print("\n🧠 ADVANCED PARSING DEMO")
print("=" * 50)
advanced_commands = [
"email john@example.com",
"password secret123",
"phone 123-456-7890",
"username john_doe",
"login",
"submit"
]
for command in advanced_commands:
print(f"\n🗣️ Voice Command: '{command}'")
try:
action, params = self.mcp_client._parse_voice_command(command)
print(f"✅ Parsed Action: {action}")
print(f"📋 Parameters: {params}")
except Exception as e:
print(f"❌ Error: {e}")
async def run_demo(self):
"""Run the complete demo"""
print("🎤 ENHANCED VOICE AGENT DEMO")
print("=" * 60)
print("This demo showcases the enhanced voice command capabilities")
print("with real-time Chrome MCP integration.")
print("=" * 60)
if not await self.setup():
print("❌ Demo setup failed")
return False
try:
# Run all demo sections
await self.demo_advanced_parsing()
await self.demo_navigation()
await self.demo_form_filling()
await self.demo_smart_clicking()
await self.demo_content_retrieval()
print("\n🎉 DEMO COMPLETED SUCCESSFULLY!")
print("=" * 60)
print("The enhanced voice agent demonstrated:")
print("✅ Natural language command parsing")
print("✅ Real-time element discovery")
print("✅ Smart form filling")
print("✅ Intelligent clicking")
print("✅ Content retrieval")
print("✅ Navigation commands")
print("=" * 60)
return True
except Exception as e:
print(f"❌ Demo failed: {e}")
return False
finally:
if self.mcp_client:
await self.mcp_client.disconnect()
async def interactive_demo():
"""Run an interactive demo where users can try commands"""
print("\n🎮 INTERACTIVE DEMO MODE")
print("=" * 50)
print("Enter voice commands to test the enhanced agent.")
print("Type 'quit' to exit, 'help' for examples.")
print("=" * 50)
# Set up MCP client
chrome_config = {
'mcp_server_type': 'http',
'mcp_server_url': 'http://127.0.0.1:12306/mcp',
'mcp_server_command': None,
'mcp_server_args': []
}
mcp_client = MCPChromeClient(chrome_config)
try:
await mcp_client.connect()
print("✅ Connected to Chrome MCP server")
while True:
try:
command = input("\n🗣️ Enter voice command: ").strip()
if command.lower() == 'quit':
break
elif command.lower() == 'help':
print("\n📚 Example Commands:")
print("- fill email with john@example.com")
print("- click login button")
print("- what's on this page")
print("- go to google")
print("- search for python")
continue
elif not command:
continue
print(f"🔄 Processing: {command}")
result = await mcp_client.process_natural_language_command(command)
print(f"✅ Result: {result}")
except KeyboardInterrupt:
break
except Exception as e:
print(f"❌ Error: {e}")
except Exception as e:
print(f"❌ Failed to connect to MCP server: {e}")
finally:
await mcp_client.disconnect()
print("\n👋 Interactive demo ended")
async def main():
"""Main demo function"""
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
print("🎤 Enhanced LiveKit Voice Agent Demo")
print("Choose demo mode:")
print("1. Automated Demo")
print("2. Interactive Demo")
try:
choice = input("\nEnter choice (1 or 2): ").strip()
if choice == "1":
demo = VoiceCommandDemo()
success = await demo.run_demo()
return 0 if success else 1
elif choice == "2":
await interactive_demo()
return 0
else:
print("Invalid choice. Please enter 1 or 2.")
return 1
except KeyboardInterrupt:
print("\n👋 Demo interrupted by user")
return 0
except Exception as e:
print(f"❌ Demo failed: {e}")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)