Files
broswer-automation/agent-livekit/test_enhanced_voice_agent.py
nasir@endelospay.com d97cad1736 first commit
2025-08-12 02:54:17 +05:00

282 lines
9.9 KiB
Python

#!/usr/bin/env python3
"""
Test script for Enhanced LiveKit Voice Agent with Real-time Chrome MCP Integration
This script tests the enhanced voice command processing capabilities including:
- Natural language form filling
- Smart element clicking
- Real-time content retrieval
- Dynamic element discovery
"""
import asyncio
import logging
import sys
import os
from pathlib import Path
# Add current directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from mcp_chrome_client import MCPChromeClient
from voice_handler import VoiceHandler
class EnhancedVoiceAgentTester:
"""Test suite for the enhanced voice agent capabilities"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self.mcp_client = None
self.voice_handler = None
async def setup(self):
"""Set up test environment"""
try:
# Initialize MCP client
chrome_config = {
'mcp_server_type': 'http',
'mcp_server_url': 'http://127.0.0.1:12306/mcp',
'mcp_server_command': None,
'mcp_server_args': []
}
self.mcp_client = MCPChromeClient(chrome_config)
await self.mcp_client.connect()
# Initialize voice handler
self.voice_handler = VoiceHandler()
await self.voice_handler.initialize()
self.logger.info("Test environment set up successfully")
return True
except Exception as e:
self.logger.error(f"Failed to set up test environment: {e}")
return False
async def test_voice_command_parsing(self):
"""Test voice command parsing with various natural language inputs"""
test_commands = [
# Form filling commands
"fill email with john@example.com",
"enter password secret123",
"type hello world in search",
"username john_doe",
"phone 123-456-7890",
"email test@gmail.com",
"search for python tutorials",
# Click commands
"click login button",
"press submit",
"tap on sign up link",
"click menu",
"login",
"submit",
# Content retrieval commands
"what's on this page",
"show me form fields",
"what can I click",
"get page content",
"list interactive elements",
# Navigation commands
"go to google",
"navigate to facebook",
"open twitter"
]
results = []
for command in test_commands:
try:
action, params = self.mcp_client._parse_voice_command(command)
results.append({
'command': command,
'action': action,
'params': params,
'success': action is not None
})
self.logger.info(f"✓ Parsed '{command}' -> {action}: {params}")
except Exception as e:
results.append({
'command': command,
'action': None,
'params': {},
'success': False,
'error': str(e)
})
self.logger.error(f"✗ Failed to parse '{command}': {e}")
# Summary
successful = sum(1 for r in results if r['success'])
total = len(results)
self.logger.info(f"Voice command parsing: {successful}/{total} successful")
return results
async def test_natural_language_processing(self):
"""Test the enhanced natural language command processing"""
test_commands = [
"fill email with test@example.com",
"click login button",
"what's on this page",
"show me the form fields",
"enter password mypassword123",
"search for machine learning"
]
results = []
for command in test_commands:
try:
result = await self.mcp_client.process_natural_language_command(command)
results.append({
'command': command,
'result': result,
'success': 'error' not in result.lower()
})
self.logger.info(f"✓ Processed '{command}' -> {result[:100]}...")
except Exception as e:
results.append({
'command': command,
'result': str(e),
'success': False
})
self.logger.error(f"✗ Failed to process '{command}': {e}")
return results
async def test_element_detection(self):
"""Test real-time element detection capabilities"""
try:
# Navigate to a test page first
await self.mcp_client._navigate_mcp("https://www.google.com")
await asyncio.sleep(2) # Wait for page load
# Test form field detection
form_fields_result = await self.mcp_client._get_form_fields_mcp()
self.logger.info(f"Form fields detection: {form_fields_result[:200]}...")
# Test interactive elements detection
interactive_result = await self.mcp_client._get_interactive_elements_mcp()
self.logger.info(f"Interactive elements detection: {interactive_result[:200]}...")
# Test page content retrieval
content_result = await self.mcp_client._get_page_content_mcp()
self.logger.info(f"Page content retrieval: {content_result[:200]}...")
return {
'form_fields': form_fields_result,
'interactive_elements': interactive_result,
'page_content': content_result
}
except Exception as e:
self.logger.error(f"Element detection test failed: {e}")
return None
async def test_smart_clicking(self):
"""Test smart clicking functionality"""
test_descriptions = [
"search",
"Google Search",
"I'm Feeling Lucky",
"button",
"link"
]
results = []
for description in test_descriptions:
try:
result = await self.mcp_client._smart_click_mcp(description)
results.append({
'description': description,
'result': result,
'success': 'clicked' in result.lower() or 'success' in result.lower()
})
self.logger.info(f"Smart click '{description}': {result}")
except Exception as e:
results.append({
'description': description,
'result': str(e),
'success': False
})
self.logger.error(f"Smart click failed for '{description}': {e}")
return results
async def run_all_tests(self):
"""Run all test suites"""
self.logger.info("Starting Enhanced Voice Agent Tests...")
if not await self.setup():
self.logger.error("Test setup failed, aborting tests")
return False
try:
# Test 1: Voice command parsing
self.logger.info("\n=== Testing Voice Command Parsing ===")
parsing_results = await self.test_voice_command_parsing()
# Test 2: Natural language processing
self.logger.info("\n=== Testing Natural Language Processing ===")
nlp_results = await self.test_natural_language_processing()
# Test 3: Element detection
self.logger.info("\n=== Testing Element Detection ===")
detection_results = await self.test_element_detection()
# Test 4: Smart clicking
self.logger.info("\n=== Testing Smart Clicking ===")
clicking_results = await self.test_smart_clicking()
# Summary
self.logger.info("\n=== Test Summary ===")
parsing_success = sum(1 for r in parsing_results if r['success'])
nlp_success = sum(1 for r in nlp_results if r['success'])
clicking_success = sum(1 for r in clicking_results if r['success'])
self.logger.info(f"Voice Command Parsing: {parsing_success}/{len(parsing_results)} successful")
self.logger.info(f"Natural Language Processing: {nlp_success}/{len(nlp_results)} successful")
self.logger.info(f"Element Detection: {'' if detection_results else ''}")
self.logger.info(f"Smart Clicking: {clicking_success}/{len(clicking_results)} successful")
return True
except Exception as e:
self.logger.error(f"Test execution failed: {e}")
return False
finally:
if self.mcp_client:
await self.mcp_client.disconnect()
async def main():
"""Main test function"""
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('enhanced_voice_agent_test.log')
]
)
# Run tests
tester = EnhancedVoiceAgentTester()
success = await tester.run_all_tests()
if success:
print("\n✓ All tests completed successfully!")
return 0
else:
print("\n✗ Some tests failed. Check the logs for details.")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)