first commit

2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions
--- a/agent-livekit/demo_enhanced_voice_commands.py
+++ b/agent-livekit/demo_enhanced_voice_commands.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+"""
+Demo script for Enhanced LiveKit Voice Agent
+
+This script demonstrates the enhanced voice command capabilities
+with real-time Chrome MCP integration.
+"""
+
+import asyncio
+import logging
+import sys
+import os
+from pathlib import Path
+
+# Add current directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+
+from mcp_chrome_client import MCPChromeClient
+
+
+class VoiceCommandDemo:
+    """Demo class for enhanced voice command capabilities"""
+    
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.mcp_client = None
+        
+    async def setup(self):
+        """Set up demo environment"""
+        try:
+            # Initialize MCP client
+            chrome_config = {
+                'mcp_server_type': 'http',
+                'mcp_server_url': 'http://127.0.0.1:12306/mcp',
+                'mcp_server_command': None,
+                'mcp_server_args': []
+            }
+            self.mcp_client = MCPChromeClient(chrome_config)
+            await self.mcp_client.connect()
+            
+            self.logger.info("Demo environment set up successfully")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Failed to set up demo environment: {e}")
+            return False
+    
+    async def demo_form_filling(self):
+        """Demonstrate enhanced form filling capabilities"""
+        print("\n🔤 FORM FILLING DEMO")
+        print("=" * 50)
+        
+        # Navigate to Google for demo
+        await self.mcp_client._navigate_mcp("https://www.google.com")
+        await asyncio.sleep(2)
+        
+        form_commands = [
+            "search for python tutorials",
+            "type machine learning in search",
+            "fill search with artificial intelligence"
+        ]
+        
+        for command in form_commands:
+            print(f"\n🗣️  Voice Command: '{command}'")
+            try:
+                result = await self.mcp_client.process_natural_language_command(command)
+                print(f"✅ Result: {result}")
+                await asyncio.sleep(1)
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    
+    async def demo_smart_clicking(self):
+        """Demonstrate smart clicking capabilities"""
+        print("\n🖱️  SMART CLICKING DEMO")
+        print("=" * 50)
+        
+        click_commands = [
+            "click Google Search",
+            "press I'm Feeling Lucky",
+            "click search button"
+        ]
+        
+        for command in click_commands:
+            print(f"\n🗣️  Voice Command: '{command}'")
+            try:
+                result = await self.mcp_client.process_natural_language_command(command)
+                print(f"✅ Result: {result}")
+                await asyncio.sleep(1)
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    
+    async def demo_content_retrieval(self):
+        """Demonstrate content retrieval capabilities"""
+        print("\n📄 CONTENT RETRIEVAL DEMO")
+        print("=" * 50)
+        
+        content_commands = [
+            "what's on this page",
+            "show me form fields",
+            "what can I click",
+            "get interactive elements"
+        ]
+        
+        for command in content_commands:
+            print(f"\n🗣️  Voice Command: '{command}'")
+            try:
+                result = await self.mcp_client.process_natural_language_command(command)
+                # Truncate long results for demo
+                display_result = result[:200] + "..." if len(result) > 200 else result
+                print(f"✅ Result: {display_result}")
+                await asyncio.sleep(1)
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    
+    async def demo_navigation(self):
+        """Demonstrate navigation capabilities"""
+        print("\n🧭 NAVIGATION DEMO")
+        print("=" * 50)
+        
+        nav_commands = [
+            "go to google",
+            "navigate to facebook",
+            "open twitter"
+        ]
+        
+        for command in nav_commands:
+            print(f"\n🗣️  Voice Command: '{command}'")
+            try:
+                result = await self.mcp_client.process_natural_language_command(command)
+                print(f"✅ Result: {result}")
+                await asyncio.sleep(2)  # Wait for navigation
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    
+    async def demo_advanced_parsing(self):
+        """Demonstrate advanced command parsing"""
+        print("\n🧠 ADVANCED PARSING DEMO")
+        print("=" * 50)
+        
+        advanced_commands = [
+            "email john@example.com",
+            "password secret123",
+            "phone 123-456-7890",
+            "username john_doe",
+            "login",
+            "submit"
+        ]
+        
+        for command in advanced_commands:
+            print(f"\n🗣️  Voice Command: '{command}'")
+            try:
+                action, params = self.mcp_client._parse_voice_command(command)
+                print(f"✅ Parsed Action: {action}")
+                print(f"📋 Parameters: {params}")
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    
+    async def run_demo(self):
+        """Run the complete demo"""
+        print("🎤 ENHANCED VOICE AGENT DEMO")
+        print("=" * 60)
+        print("This demo showcases the enhanced voice command capabilities")
+        print("with real-time Chrome MCP integration.")
+        print("=" * 60)
+        
+        if not await self.setup():
+            print("❌ Demo setup failed")
+            return False
+        
+        try:
+            # Run all demo sections
+            await self.demo_advanced_parsing()
+            await self.demo_navigation()
+            await self.demo_form_filling()
+            await self.demo_smart_clicking()
+            await self.demo_content_retrieval()
+            
+            print("\n🎉 DEMO COMPLETED SUCCESSFULLY!")
+            print("=" * 60)
+            print("The enhanced voice agent demonstrated:")
+            print("✅ Natural language command parsing")
+            print("✅ Real-time element discovery")
+            print("✅ Smart form filling")
+            print("✅ Intelligent clicking")
+            print("✅ Content retrieval")
+            print("✅ Navigation commands")
+            print("=" * 60)
+            
+            return True
+            
+        except Exception as e:
+            print(f"❌ Demo failed: {e}")
+            return False
+        
+        finally:
+            if self.mcp_client:
+                await self.mcp_client.disconnect()
+
+
+async def interactive_demo():
+    """Run an interactive demo where users can try commands"""
+    print("\n🎮 INTERACTIVE DEMO MODE")
+    print("=" * 50)
+    print("Enter voice commands to test the enhanced agent.")
+    print("Type 'quit' to exit, 'help' for examples.")
+    print("=" * 50)
+    
+    # Set up MCP client
+    chrome_config = {
+        'mcp_server_type': 'http',
+        'mcp_server_url': 'http://127.0.0.1:12306/mcp',
+        'mcp_server_command': None,
+        'mcp_server_args': []
+    }
+    mcp_client = MCPChromeClient(chrome_config)
+    
+    try:
+        await mcp_client.connect()
+        print("✅ Connected to Chrome MCP server")
+        
+        while True:
+            try:
+                command = input("\n🗣️  Enter voice command: ").strip()
+                
+                if command.lower() == 'quit':
+                    break
+                elif command.lower() == 'help':
+                    print("\n📚 Example Commands:")
+                    print("- fill email with john@example.com")
+                    print("- click login button")
+                    print("- what's on this page")
+                    print("- go to google")
+                    print("- search for python")
+                    continue
+                elif not command:
+                    continue
+                
+                print(f"🔄 Processing: {command}")
+                result = await mcp_client.process_natural_language_command(command)
+                print(f"✅ Result: {result}")
+                
+            except KeyboardInterrupt:
+                break
+            except Exception as e:
+                print(f"❌ Error: {e}")
+        
+    except Exception as e:
+        print(f"❌ Failed to connect to MCP server: {e}")
+    
+    finally:
+        await mcp_client.disconnect()
+        print("\n👋 Interactive demo ended")
+
+
+async def main():
+    """Main demo function"""
+    # Set up logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
+    
+    print("🎤 Enhanced LiveKit Voice Agent Demo")
+    print("Choose demo mode:")
+    print("1. Automated Demo")
+    print("2. Interactive Demo")
+    
+    try:
+        choice = input("\nEnter choice (1 or 2): ").strip()
+        
+        if choice == "1":
+            demo = VoiceCommandDemo()
+            success = await demo.run_demo()
+            return 0 if success else 1
+        elif choice == "2":
+            await interactive_demo()
+            return 0
+        else:
+            print("Invalid choice. Please enter 1 or 2.")
+            return 1
+            
+    except KeyboardInterrupt:
+        print("\n👋 Demo interrupted by user")
+        return 0
+    except Exception as e:
+        print(f"❌ Demo failed: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)