first commit

2025-08-12 02:54:17 +05:00
commit d97cad1736
225 changed files with 137626 additions and 0 deletions
--- a/agent-livekit/test_enhanced_voice_agent.py
+++ b/agent-livekit/test_enhanced_voice_agent.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+"""
+Test script for Enhanced LiveKit Voice Agent with Real-time Chrome MCP Integration
+
+This script tests the enhanced voice command processing capabilities including:
+- Natural language form filling
+- Smart element clicking
+- Real-time content retrieval
+- Dynamic element discovery
+"""
+
+import asyncio
+import logging
+import sys
+import os
+from pathlib import Path
+
+# Add current directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+
+from mcp_chrome_client import MCPChromeClient
+from voice_handler import VoiceHandler
+
+
+class EnhancedVoiceAgentTester:
+    """Test suite for the enhanced voice agent capabilities"""
+    
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.mcp_client = None
+        self.voice_handler = None
+        
+    async def setup(self):
+        """Set up test environment"""
+        try:
+            # Initialize MCP client
+            chrome_config = {
+                'mcp_server_type': 'http',
+                'mcp_server_url': 'http://127.0.0.1:12306/mcp',
+                'mcp_server_command': None,
+                'mcp_server_args': []
+            }
+            self.mcp_client = MCPChromeClient(chrome_config)
+            await self.mcp_client.connect()
+            
+            # Initialize voice handler
+            self.voice_handler = VoiceHandler()
+            await self.voice_handler.initialize()
+            
+            self.logger.info("Test environment set up successfully")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Failed to set up test environment: {e}")
+            return False
+    
+    async def test_voice_command_parsing(self):
+        """Test voice command parsing with various natural language inputs"""
+        test_commands = [
+            # Form filling commands
+            "fill email with john@example.com",
+            "enter password secret123",
+            "type hello world in search",
+            "username john_doe",
+            "phone 123-456-7890",
+            "email test@gmail.com",
+            "search for python tutorials",
+            
+            # Click commands
+            "click login button",
+            "press submit",
+            "tap on sign up link",
+            "click menu",
+            "login",
+            "submit",
+            
+            # Content retrieval commands
+            "what's on this page",
+            "show me form fields",
+            "what can I click",
+            "get page content",
+            "list interactive elements",
+            
+            # Navigation commands
+            "go to google",
+            "navigate to facebook",
+            "open twitter"
+        ]
+        
+        results = []
+        for command in test_commands:
+            try:
+                action, params = self.mcp_client._parse_voice_command(command)
+                results.append({
+                    'command': command,
+                    'action': action,
+                    'params': params,
+                    'success': action is not None
+                })
+                self.logger.info(f"✓ Parsed '{command}' -> {action}: {params}")
+            except Exception as e:
+                results.append({
+                    'command': command,
+                    'action': None,
+                    'params': {},
+                    'success': False,
+                    'error': str(e)
+                })
+                self.logger.error(f"✗ Failed to parse '{command}': {e}")
+        
+        # Summary
+        successful = sum(1 for r in results if r['success'])
+        total = len(results)
+        self.logger.info(f"Voice command parsing: {successful}/{total} successful")
+        
+        return results
+    
+    async def test_natural_language_processing(self):
+        """Test the enhanced natural language command processing"""
+        test_commands = [
+            "fill email with test@example.com",
+            "click login button",
+            "what's on this page",
+            "show me the form fields",
+            "enter password mypassword123",
+            "search for machine learning"
+        ]
+        
+        results = []
+        for command in test_commands:
+            try:
+                result = await self.mcp_client.process_natural_language_command(command)
+                results.append({
+                    'command': command,
+                    'result': result,
+                    'success': 'error' not in result.lower()
+                })
+                self.logger.info(f"✓ Processed '{command}' -> {result[:100]}...")
+            except Exception as e:
+                results.append({
+                    'command': command,
+                    'result': str(e),
+                    'success': False
+                })
+                self.logger.error(f"✗ Failed to process '{command}': {e}")
+        
+        return results
+    
+    async def test_element_detection(self):
+        """Test real-time element detection capabilities"""
+        try:
+            # Navigate to a test page first
+            await self.mcp_client._navigate_mcp("https://www.google.com")
+            await asyncio.sleep(2)  # Wait for page load
+            
+            # Test form field detection
+            form_fields_result = await self.mcp_client._get_form_fields_mcp()
+            self.logger.info(f"Form fields detection: {form_fields_result[:200]}...")
+            
+            # Test interactive elements detection
+            interactive_result = await self.mcp_client._get_interactive_elements_mcp()
+            self.logger.info(f"Interactive elements detection: {interactive_result[:200]}...")
+            
+            # Test page content retrieval
+            content_result = await self.mcp_client._get_page_content_mcp()
+            self.logger.info(f"Page content retrieval: {content_result[:200]}...")
+            
+            return {
+                'form_fields': form_fields_result,
+                'interactive_elements': interactive_result,
+                'page_content': content_result
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Element detection test failed: {e}")
+            return None
+    
+    async def test_smart_clicking(self):
+        """Test smart clicking functionality"""
+        test_descriptions = [
+            "search",
+            "Google Search",
+            "I'm Feeling Lucky",
+            "button",
+            "link"
+        ]
+        
+        results = []
+        for description in test_descriptions:
+            try:
+                result = await self.mcp_client._smart_click_mcp(description)
+                results.append({
+                    'description': description,
+                    'result': result,
+                    'success': 'clicked' in result.lower() or 'success' in result.lower()
+                })
+                self.logger.info(f"Smart click '{description}': {result}")
+            except Exception as e:
+                results.append({
+                    'description': description,
+                    'result': str(e),
+                    'success': False
+                })
+                self.logger.error(f"Smart click failed for '{description}': {e}")
+        
+        return results
+    
+    async def run_all_tests(self):
+        """Run all test suites"""
+        self.logger.info("Starting Enhanced Voice Agent Tests...")
+        
+        if not await self.setup():
+            self.logger.error("Test setup failed, aborting tests")
+            return False
+        
+        try:
+            # Test 1: Voice command parsing
+            self.logger.info("\n=== Testing Voice Command Parsing ===")
+            parsing_results = await self.test_voice_command_parsing()
+            
+            # Test 2: Natural language processing
+            self.logger.info("\n=== Testing Natural Language Processing ===")
+            nlp_results = await self.test_natural_language_processing()
+            
+            # Test 3: Element detection
+            self.logger.info("\n=== Testing Element Detection ===")
+            detection_results = await self.test_element_detection()
+            
+            # Test 4: Smart clicking
+            self.logger.info("\n=== Testing Smart Clicking ===")
+            clicking_results = await self.test_smart_clicking()
+            
+            # Summary
+            self.logger.info("\n=== Test Summary ===")
+            parsing_success = sum(1 for r in parsing_results if r['success'])
+            nlp_success = sum(1 for r in nlp_results if r['success'])
+            clicking_success = sum(1 for r in clicking_results if r['success'])
+            
+            self.logger.info(f"Voice Command Parsing: {parsing_success}/{len(parsing_results)} successful")
+            self.logger.info(f"Natural Language Processing: {nlp_success}/{len(nlp_results)} successful")
+            self.logger.info(f"Element Detection: {'✓' if detection_results else '✗'}")
+            self.logger.info(f"Smart Clicking: {clicking_success}/{len(clicking_results)} successful")
+            
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Test execution failed: {e}")
+            return False
+        
+        finally:
+            if self.mcp_client:
+                await self.mcp_client.disconnect()
+
+
+async def main():
+    """Main test function"""
+    # Set up logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.StreamHandler(),
+            logging.FileHandler('enhanced_voice_agent_test.log')
+        ]
+    )
+    
+    # Run tests
+    tester = EnhancedVoiceAgentTester()
+    success = await tester.run_all_tests()
+    
+    if success:
+        print("\n✓ All tests completed successfully!")
+        return 0
+    else:
+        print("\n✗ Some tests failed. Check the logs for details.")
+        return 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)