broswer-automation/agent-livekit/demo_enhanced_voice_commands.py

#!/usr/bin/env python3
"""
Demo script for Enhanced LiveKit Voice Agent

This script demonstrates the enhanced voice command capabilities
with real-time Chrome MCP integration.
"""

import asyncio
import logging
import sys
import os
from pathlib import Path

# Add current directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from mcp_chrome_client import MCPChromeClient


class VoiceCommandDemo:
    """Demo class for enhanced voice command capabilities"""

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.mcp_client = None

    async def setup(self):
        """Set up demo environment"""
        try:
            # Initialize MCP client
            chrome_config = {
                'mcp_server_type': 'http',
                'mcp_server_url': 'http://127.0.0.1:12306/mcp',
                'mcp_server_command': None,
                'mcp_server_args': []
            }
            self.mcp_client = MCPChromeClient(chrome_config)
            await self.mcp_client.connect()

            self.logger.info("Demo environment set up successfully")
            return True

        except Exception as e:
            self.logger.error(f"Failed to set up demo environment: {e}")
            return False

    async def demo_form_filling(self):
        """Demonstrate enhanced form filling capabilities"""
        print("\n🔤 FORM FILLING DEMO")
        print("=" * 50)

        # Navigate to Google for demo
        await self.mcp_client._navigate_mcp("https://www.google.com")
        await asyncio.sleep(2)

        form_commands = [
            "search for python tutorials",
            "type machine learning in search",
            "fill search with artificial intelligence"
        ]

        for command in form_commands:
            print(f"\n🗣️  Voice Command: '{command}'")
            try:
                result = await self.mcp_client.process_natural_language_command(command)
                print(f"✅ Result: {result}")
                await asyncio.sleep(1)
            except Exception as e:
                print(f"❌ Error: {e}")

    async def demo_smart_clicking(self):
        """Demonstrate smart clicking capabilities"""
        print("\n🖱️  SMART CLICKING DEMO")
        print("=" * 50)

        click_commands = [
            "click Google Search",
            "press I'm Feeling Lucky",
            "click search button"
        ]

        for command in click_commands:
            print(f"\n🗣️  Voice Command: '{command}'")
            try:
                result = await self.mcp_client.process_natural_language_command(command)
                print(f"✅ Result: {result}")
                await asyncio.sleep(1)
            except Exception as e:
                print(f"❌ Error: {e}")

    async def demo_content_retrieval(self):
        """Demonstrate content retrieval capabilities"""
        print("\n📄 CONTENT RETRIEVAL DEMO")
        print("=" * 50)

        content_commands = [
            "what's on this page",
            "show me form fields",
            "what can I click",
            "get interactive elements"
        ]

        for command in content_commands:
            print(f"\n🗣️  Voice Command: '{command}'")
            try:
                result = await self.mcp_client.process_natural_language_command(command)
                # Truncate long results for demo
                display_result = result[:200] + "..." if len(result) > 200 else result
                print(f"✅ Result: {display_result}")
                await asyncio.sleep(1)
            except Exception as e:
                print(f"❌ Error: {e}")

    async def demo_navigation(self):
        """Demonstrate navigation capabilities"""
        print("\n🧭 NAVIGATION DEMO")
        print("=" * 50)

        nav_commands = [
            "go to google",
            "navigate to facebook",
            "open twitter"
        ]

        for command in nav_commands:
            print(f"\n🗣️  Voice Command: '{command}'")
            try:
                result = await self.mcp_client.process_natural_language_command(command)
                print(f"✅ Result: {result}")
                await asyncio.sleep(2)  # Wait for navigation
            except Exception as e:
                print(f"❌ Error: {e}")

    async def demo_advanced_parsing(self):
        """Demonstrate advanced command parsing"""
        print("\n🧠 ADVANCED PARSING DEMO")
        print("=" * 50)

        advanced_commands = [
            "email john@example.com",
            "password secret123",
            "phone 123-456-7890",
            "username john_doe",
            "login",
            "submit"
        ]

        for command in advanced_commands:
            print(f"\n🗣️  Voice Command: '{command}'")
            try:
                action, params = self.mcp_client._parse_voice_command(command)
                print(f"✅ Parsed Action: {action}")
                print(f"📋 Parameters: {params}")
            except Exception as e:
                print(f"❌ Error: {e}")

    async def run_demo(self):
        """Run the complete demo"""
        print("🎤 ENHANCED VOICE AGENT DEMO")
        print("=" * 60)
        print("This demo showcases the enhanced voice command capabilities")
        print("with real-time Chrome MCP integration.")
        print("=" * 60)

        if not await self.setup():
            print("❌ Demo setup failed")
            return False

        try:
            # Run all demo sections
            await self.demo_advanced_parsing()
            await self.demo_navigation()
            await self.demo_form_filling()
            await self.demo_smart_clicking()
            await self.demo_content_retrieval()

            print("\n🎉 DEMO COMPLETED SUCCESSFULLY!")
            print("=" * 60)
            print("The enhanced voice agent demonstrated:")
            print("✅ Natural language command parsing")
            print("✅ Real-time element discovery")
            print("✅ Smart form filling")
            print("✅ Intelligent clicking")
            print("✅ Content retrieval")
            print("✅ Navigation commands")
            print("=" * 60)

            return True

        except Exception as e:
            print(f"❌ Demo failed: {e}")
            return False

        finally:
            if self.mcp_client:
                await self.mcp_client.disconnect()


async def interactive_demo():
    """Run an interactive demo where users can try commands"""
    print("\n🎮 INTERACTIVE DEMO MODE")
    print("=" * 50)
    print("Enter voice commands to test the enhanced agent.")
    print("Type 'quit' to exit, 'help' for examples.")
    print("=" * 50)

    # Set up MCP client
    chrome_config = {
        'mcp_server_type': 'http',
        'mcp_server_url': 'http://127.0.0.1:12306/mcp',
        'mcp_server_command': None,
        'mcp_server_args': []
    }
    mcp_client = MCPChromeClient(chrome_config)

    try:
        await mcp_client.connect()
        print("✅ Connected to Chrome MCP server")

        while True:
            try:
                command = input("\n🗣️  Enter voice command: ").strip()

                if command.lower() == 'quit':
                    break
                elif command.lower() == 'help':
                    print("\n📚 Example Commands:")
                    print("- fill email with john@example.com")
                    print("- click login button")
                    print("- what's on this page")
                    print("- go to google")
                    print("- search for python")
                    continue
                elif not command:
                    continue

                print(f"🔄 Processing: {command}")
                result = await mcp_client.process_natural_language_command(command)
                print(f"✅ Result: {result}")

            except KeyboardInterrupt:
                break
            except Exception as e:
                print(f"❌ Error: {e}")

    except Exception as e:
        print(f"❌ Failed to connect to MCP server: {e}")

    finally:
        await mcp_client.disconnect()
        print("\n👋 Interactive demo ended")


async def main():
    """Main demo function"""
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )

    print("🎤 Enhanced LiveKit Voice Agent Demo")
    print("Choose demo mode:")
    print("1. Automated Demo")
    print("2. Interactive Demo")

    try:
        choice = input("\nEnter choice (1 or 2): ").strip()

        if choice == "1":
            demo = VoiceCommandDemo()
            success = await demo.run_demo()
            return 0 if success else 1
        elif choice == "2":
            await interactive_demo()
            return 0
        else:
            print("Invalid choice. Please enter 1 or 2.")
            return 1

    except KeyboardInterrupt:
        print("\n👋 Demo interrupted by user")
        return 0
    except Exception as e:
        print(f"❌ Demo failed: {e}")
        return 1


if __name__ == "__main__":
    exit_code = asyncio.run(main())
    sys.exit(exit_code)