broswer-automation/agent-livekit/test_enhanced_voice_agent.py

#!/usr/bin/env python3
"""
Test script for Enhanced LiveKit Voice Agent with Real-time Chrome MCP Integration

This script tests the enhanced voice command processing capabilities including:
- Natural language form filling
- Smart element clicking
- Real-time content retrieval
- Dynamic element discovery
"""

import asyncio
import logging
import sys
import os
from pathlib import Path

# Add current directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from mcp_chrome_client import MCPChromeClient
from voice_handler import VoiceHandler


class EnhancedVoiceAgentTester:
    """Test suite for the enhanced voice agent capabilities"""

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.mcp_client = None
        self.voice_handler = None

    async def setup(self):
        """Set up test environment"""
        try:
            # Initialize MCP client
            chrome_config = {
                'mcp_server_type': 'http',
                'mcp_server_url': 'http://127.0.0.1:12306/mcp',
                'mcp_server_command': None,
                'mcp_server_args': []
            }
            self.mcp_client = MCPChromeClient(chrome_config)
            await self.mcp_client.connect()

            # Initialize voice handler
            self.voice_handler = VoiceHandler()
            await self.voice_handler.initialize()

            self.logger.info("Test environment set up successfully")
            return True

        except Exception as e:
            self.logger.error(f"Failed to set up test environment: {e}")
            return False

    async def test_voice_command_parsing(self):
        """Test voice command parsing with various natural language inputs"""
        test_commands = [
            # Form filling commands
            "fill email with john@example.com",
            "enter password secret123",
            "type hello world in search",
            "username john_doe",
            "phone 123-456-7890",
            "email test@gmail.com",
            "search for python tutorials",

            # Click commands
            "click login button",
            "press submit",
            "tap on sign up link",
            "click menu",
            "login",
            "submit",

            # Content retrieval commands
            "what's on this page",
            "show me form fields",
            "what can I click",
            "get page content",
            "list interactive elements",

            # Navigation commands
            "go to google",
            "navigate to facebook",
            "open twitter"
        ]

        results = []
        for command in test_commands:
            try:
                action, params = self.mcp_client._parse_voice_command(command)
                results.append({
                    'command': command,
                    'action': action,
                    'params': params,
                    'success': action is not None
                })
                self.logger.info(f"✓ Parsed '{command}' -> {action}: {params}")
            except Exception as e:
                results.append({
                    'command': command,
                    'action': None,
                    'params': {},
                    'success': False,
                    'error': str(e)
                })
                self.logger.error(f"✗ Failed to parse '{command}': {e}")

        # Summary
        successful = sum(1 for r in results if r['success'])
        total = len(results)
        self.logger.info(f"Voice command parsing: {successful}/{total} successful")

        return results

    async def test_natural_language_processing(self):
        """Test the enhanced natural language command processing"""
        test_commands = [
            "fill email with test@example.com",
            "click login button",
            "what's on this page",
            "show me the form fields",
            "enter password mypassword123",
            "search for machine learning"
        ]

        results = []
        for command in test_commands:
            try:
                result = await self.mcp_client.process_natural_language_command(command)
                results.append({
                    'command': command,
                    'result': result,
                    'success': 'error' not in result.lower()
                })
                self.logger.info(f"✓ Processed '{command}' -> {result[:100]}...")
            except Exception as e:
                results.append({
                    'command': command,
                    'result': str(e),
                    'success': False
                })
                self.logger.error(f"✗ Failed to process '{command}': {e}")

        return results

    async def test_element_detection(self):
        """Test real-time element detection capabilities"""
        try:
            # Navigate to a test page first
            await self.mcp_client._navigate_mcp("https://www.google.com")
            await asyncio.sleep(2)  # Wait for page load

            # Test form field detection
            form_fields_result = await self.mcp_client._get_form_fields_mcp()
            self.logger.info(f"Form fields detection: {form_fields_result[:200]}...")

            # Test interactive elements detection
            interactive_result = await self.mcp_client._get_interactive_elements_mcp()
            self.logger.info(f"Interactive elements detection: {interactive_result[:200]}...")

            # Test page content retrieval
            content_result = await self.mcp_client._get_page_content_mcp()
            self.logger.info(f"Page content retrieval: {content_result[:200]}...")

            return {
                'form_fields': form_fields_result,
                'interactive_elements': interactive_result,
                'page_content': content_result
            }

        except Exception as e:
            self.logger.error(f"Element detection test failed: {e}")
            return None

    async def test_smart_clicking(self):
        """Test smart clicking functionality"""
        test_descriptions = [
            "search",
            "Google Search",
            "I'm Feeling Lucky",
            "button",
            "link"
        ]

        results = []
        for description in test_descriptions:
            try:
                result = await self.mcp_client._smart_click_mcp(description)
                results.append({
                    'description': description,
                    'result': result,
                    'success': 'clicked' in result.lower() or 'success' in result.lower()
                })
                self.logger.info(f"Smart click '{description}': {result}")
            except Exception as e:
                results.append({
                    'description': description,
                    'result': str(e),
                    'success': False
                })
                self.logger.error(f"Smart click failed for '{description}': {e}")

        return results

    async def run_all_tests(self):
        """Run all test suites"""
        self.logger.info("Starting Enhanced Voice Agent Tests...")

        if not await self.setup():
            self.logger.error("Test setup failed, aborting tests")
            return False

        try:
            # Test 1: Voice command parsing
            self.logger.info("\n=== Testing Voice Command Parsing ===")
            parsing_results = await self.test_voice_command_parsing()

            # Test 2: Natural language processing
            self.logger.info("\n=== Testing Natural Language Processing ===")
            nlp_results = await self.test_natural_language_processing()

            # Test 3: Element detection
            self.logger.info("\n=== Testing Element Detection ===")
            detection_results = await self.test_element_detection()

            # Test 4: Smart clicking
            self.logger.info("\n=== Testing Smart Clicking ===")
            clicking_results = await self.test_smart_clicking()

            # Summary
            self.logger.info("\n=== Test Summary ===")
            parsing_success = sum(1 for r in parsing_results if r['success'])
            nlp_success = sum(1 for r in nlp_results if r['success'])
            clicking_success = sum(1 for r in clicking_results if r['success'])

            self.logger.info(f"Voice Command Parsing: {parsing_success}/{len(parsing_results)} successful")
            self.logger.info(f"Natural Language Processing: {nlp_success}/{len(nlp_results)} successful")
            self.logger.info(f"Element Detection: {'✓' if detection_results else '✗'}")
            self.logger.info(f"Smart Clicking: {clicking_success}/{len(clicking_results)} successful")

            return True

        except Exception as e:
            self.logger.error(f"Test execution failed: {e}")
            return False

        finally:
            if self.mcp_client:
                await self.mcp_client.disconnect()


async def main():
    """Main test function"""
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler(),
            logging.FileHandler('enhanced_voice_agent_test.log')
        ]
    )

    # Run tests
    tester = EnhancedVoiceAgentTester()
    success = await tester.run_all_tests()

    if success:
        print("\n✓ All tests completed successfully!")
        return 0
    else:
        print("\n✗ Some tests failed. Check the logs for details.")
        return 1


if __name__ == "__main__":
    exit_code = asyncio.run(main())
    sys.exit(exit_code)