#!/usr/bin/env python3 """ Test script for Enhanced LiveKit Voice Agent with Real-time Chrome MCP Integration This script tests the enhanced voice command processing capabilities including: - Natural language form filling - Smart element clicking - Real-time content retrieval - Dynamic element discovery """ import asyncio import logging import sys import os from pathlib import Path # Add current directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) from mcp_chrome_client import MCPChromeClient from voice_handler import VoiceHandler class EnhancedVoiceAgentTester: """Test suite for the enhanced voice agent capabilities""" def __init__(self): self.logger = logging.getLogger(__name__) self.mcp_client = None self.voice_handler = None async def setup(self): """Set up test environment""" try: # Initialize MCP client chrome_config = { 'mcp_server_type': 'http', 'mcp_server_url': 'http://127.0.0.1:12306/mcp', 'mcp_server_command': None, 'mcp_server_args': [] } self.mcp_client = MCPChromeClient(chrome_config) await self.mcp_client.connect() # Initialize voice handler self.voice_handler = VoiceHandler() await self.voice_handler.initialize() self.logger.info("Test environment set up successfully") return True except Exception as e: self.logger.error(f"Failed to set up test environment: {e}") return False async def test_voice_command_parsing(self): """Test voice command parsing with various natural language inputs""" test_commands = [ # Form filling commands "fill email with john@example.com", "enter password secret123", "type hello world in search", "username john_doe", "phone 123-456-7890", "email test@gmail.com", "search for python tutorials", # Click commands "click login button", "press submit", "tap on sign up link", "click menu", "login", "submit", # Content retrieval commands "what's on this page", "show me form fields", "what can I click", "get page content", "list interactive elements", # Navigation commands "go to google", "navigate to facebook", "open twitter" ] results = [] for command in test_commands: try: action, params = self.mcp_client._parse_voice_command(command) results.append({ 'command': command, 'action': action, 'params': params, 'success': action is not None }) self.logger.info(f"✓ Parsed '{command}' -> {action}: {params}") except Exception as e: results.append({ 'command': command, 'action': None, 'params': {}, 'success': False, 'error': str(e) }) self.logger.error(f"✗ Failed to parse '{command}': {e}") # Summary successful = sum(1 for r in results if r['success']) total = len(results) self.logger.info(f"Voice command parsing: {successful}/{total} successful") return results async def test_natural_language_processing(self): """Test the enhanced natural language command processing""" test_commands = [ "fill email with test@example.com", "click login button", "what's on this page", "show me the form fields", "enter password mypassword123", "search for machine learning" ] results = [] for command in test_commands: try: result = await self.mcp_client.process_natural_language_command(command) results.append({ 'command': command, 'result': result, 'success': 'error' not in result.lower() }) self.logger.info(f"✓ Processed '{command}' -> {result[:100]}...") except Exception as e: results.append({ 'command': command, 'result': str(e), 'success': False }) self.logger.error(f"✗ Failed to process '{command}': {e}") return results async def test_element_detection(self): """Test real-time element detection capabilities""" try: # Navigate to a test page first await self.mcp_client._navigate_mcp("https://www.google.com") await asyncio.sleep(2) # Wait for page load # Test form field detection form_fields_result = await self.mcp_client._get_form_fields_mcp() self.logger.info(f"Form fields detection: {form_fields_result[:200]}...") # Test interactive elements detection interactive_result = await self.mcp_client._get_interactive_elements_mcp() self.logger.info(f"Interactive elements detection: {interactive_result[:200]}...") # Test page content retrieval content_result = await self.mcp_client._get_page_content_mcp() self.logger.info(f"Page content retrieval: {content_result[:200]}...") return { 'form_fields': form_fields_result, 'interactive_elements': interactive_result, 'page_content': content_result } except Exception as e: self.logger.error(f"Element detection test failed: {e}") return None async def test_smart_clicking(self): """Test smart clicking functionality""" test_descriptions = [ "search", "Google Search", "I'm Feeling Lucky", "button", "link" ] results = [] for description in test_descriptions: try: result = await self.mcp_client._smart_click_mcp(description) results.append({ 'description': description, 'result': result, 'success': 'clicked' in result.lower() or 'success' in result.lower() }) self.logger.info(f"Smart click '{description}': {result}") except Exception as e: results.append({ 'description': description, 'result': str(e), 'success': False }) self.logger.error(f"Smart click failed for '{description}': {e}") return results async def run_all_tests(self): """Run all test suites""" self.logger.info("Starting Enhanced Voice Agent Tests...") if not await self.setup(): self.logger.error("Test setup failed, aborting tests") return False try: # Test 1: Voice command parsing self.logger.info("\n=== Testing Voice Command Parsing ===") parsing_results = await self.test_voice_command_parsing() # Test 2: Natural language processing self.logger.info("\n=== Testing Natural Language Processing ===") nlp_results = await self.test_natural_language_processing() # Test 3: Element detection self.logger.info("\n=== Testing Element Detection ===") detection_results = await self.test_element_detection() # Test 4: Smart clicking self.logger.info("\n=== Testing Smart Clicking ===") clicking_results = await self.test_smart_clicking() # Summary self.logger.info("\n=== Test Summary ===") parsing_success = sum(1 for r in parsing_results if r['success']) nlp_success = sum(1 for r in nlp_results if r['success']) clicking_success = sum(1 for r in clicking_results if r['success']) self.logger.info(f"Voice Command Parsing: {parsing_success}/{len(parsing_results)} successful") self.logger.info(f"Natural Language Processing: {nlp_success}/{len(nlp_results)} successful") self.logger.info(f"Element Detection: {'✓' if detection_results else '✗'}") self.logger.info(f"Smart Clicking: {clicking_success}/{len(clicking_results)} successful") return True except Exception as e: self.logger.error(f"Test execution failed: {e}") return False finally: if self.mcp_client: await self.mcp_client.disconnect() async def main(): """Main test function""" # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), logging.FileHandler('enhanced_voice_agent_test.log') ] ) # Run tests tester = EnhancedVoiceAgentTester() success = await tester.run_all_tests() if success: print("\n✓ All tests completed successfully!") return 0 else: print("\n✗ Some tests failed. Check the logs for details.") return 1 if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)