#!/usr/bin/env python3 """ Debug Utilities for LiveKit Chrome Agent This module provides debugging utilities that can be used during development and troubleshooting of browser automation issues. """ import logging import json import asyncio from typing import Dict, Any, List, Optional from datetime import datetime class SelectorDebugger: """Utility class for debugging selector discovery and execution""" def __init__(self, mcp_client, logger: Optional[logging.Logger] = None): self.mcp_client = mcp_client self.logger = logger or logging.getLogger(__name__) self.debug_history = [] async def debug_voice_command(self, command: str) -> Dict[str, Any]: """Debug a voice command end-to-end""" debug_session = { "timestamp": datetime.now().isoformat(), "command": command, "steps": [], "final_result": None, "success": False } try: # Step 1: Parse command self.logger.info(f"🔍 DEBUG: Parsing voice command '{command}'") action, params = self.mcp_client._parse_voice_command(command) step1 = { "step": "parse_command", "input": command, "output": {"action": action, "params": params}, "success": action is not None } debug_session["steps"].append(step1) if not action: debug_session["final_result"] = "Command parsing failed" return debug_session # Step 2: If it's a click command, debug selector discovery if action == "click": element_description = params.get("text", "") selector_debug = await self._debug_selector_discovery(element_description) debug_session["steps"].append(selector_debug) # Step 3: Test action execution if selectors were found if selector_debug.get("selectors_found"): execution_debug = await self._debug_action_execution( action, params, selector_debug.get("best_selector") ) debug_session["steps"].append(execution_debug) debug_session["success"] = execution_debug.get("success", False) # Step 4: Execute the actual command for comparison try: actual_result = await self.mcp_client.execute_voice_command(command) debug_session["final_result"] = actual_result debug_session["success"] = "success" in actual_result.lower() or "clicked" in actual_result.lower() except Exception as e: debug_session["final_result"] = f"Execution failed: {e}" except Exception as e: debug_session["final_result"] = f"Debug failed: {e}" self.logger.error(f"💥 Debug session failed: {e}") # Store in history self.debug_history.append(debug_session) return debug_session async def _debug_selector_discovery(self, element_description: str) -> Dict[str, Any]: """Debug the selector discovery process""" step = { "step": "selector_discovery", "input": element_description, "interactive_elements_found": 0, "matching_elements": [], "selectors_found": False, "best_selector": None, "errors": [] } try: # Get interactive elements interactive_result = await self.mcp_client._call_mcp_tool("chrome_get_interactive_elements", { "types": ["button", "a", "input", "select"] }) if interactive_result and "elements" in interactive_result: elements = interactive_result["elements"] step["interactive_elements_found"] = len(elements) # Find matching elements for i, element in enumerate(elements): if self.mcp_client._element_matches_description(element, element_description): selector = self.mcp_client._extract_best_selector(element) match_reason = self.mcp_client._get_match_reason(element, element_description) match_info = { "index": i, "selector": selector, "match_reason": match_reason, "tag": element.get("tagName", "unknown"), "text": element.get("textContent", "")[:50], "attributes": {k: v for k, v in element.get("attributes", {}).items() if k in ["id", "class", "name", "type", "value", "aria-label"]} } step["matching_elements"].append(match_info) if step["matching_elements"]: step["selectors_found"] = True step["best_selector"] = step["matching_elements"][0]["selector"] except Exception as e: step["errors"].append(f"Selector discovery failed: {e}") return step async def _debug_action_execution(self, action: str, params: Dict[str, Any], selector: str) -> Dict[str, Any]: """Debug action execution""" step = { "step": "action_execution", "action": action, "params": params, "selector": selector, "validation_result": None, "execution_result": None, "success": False, "errors": [] } try: # First validate the selector validation = await self.mcp_client._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) step["validation_result"] = { "selector_valid": validation.get("content") is not None, "element_found": bool(validation.get("content")) } if step["validation_result"]["element_found"]: # Try executing the action if action == "click": execution_result = await self.mcp_client._call_mcp_tool("chrome_click_element", { "selector": selector }) step["execution_result"] = execution_result step["success"] = True else: step["errors"].append("Selector validation failed - element not found") except Exception as e: step["errors"].append(f"Action execution failed: {e}") return step async def test_common_selectors(self, selector_list: List[str]) -> Dict[str, Any]: """Test a list of common selectors to see which ones work""" results = { "timestamp": datetime.now().isoformat(), "total_selectors": len(selector_list), "working_selectors": [], "failed_selectors": [], "test_results": [] } for selector in selector_list: test_result = { "selector": selector, "validation": None, "clickable": None, "error": None } try: # Test if selector finds an element validation = await self.mcp_client._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if validation.get("content"): test_result["validation"] = "found" results["working_selectors"].append(selector) # Test if it's clickable (without actually clicking) try: # We can't safely test clicking without side effects, # so we just mark it as potentially clickable test_result["clickable"] = "potentially_clickable" except Exception as click_error: test_result["clickable"] = "not_clickable" test_result["error"] = str(click_error) else: test_result["validation"] = "not_found" results["failed_selectors"].append(selector) except Exception as e: test_result["validation"] = "error" test_result["error"] = str(e) results["failed_selectors"].append(selector) results["test_results"].append(test_result) return results def get_debug_summary(self) -> Dict[str, Any]: """Get a summary of all debug sessions""" if not self.debug_history: return {"message": "No debug sessions recorded"} summary = { "total_sessions": len(self.debug_history), "successful_sessions": sum(1 for session in self.debug_history if session.get("success")), "failed_sessions": sum(1 for session in self.debug_history if not session.get("success")), "common_failures": {}, "recent_sessions": self.debug_history[-5:] # Last 5 sessions } # Analyze common failure patterns for session in self.debug_history: if not session.get("success"): failure_reason = session.get("final_result", "unknown") summary["common_failures"][failure_reason] = summary["common_failures"].get(failure_reason, 0) + 1 return summary def export_debug_log(self, filename: str = None) -> str: """Export debug history to a JSON file""" if filename is None: filename = f"debug_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(filename, 'w') as f: json.dump({ "export_timestamp": datetime.now().isoformat(), "debug_history": self.debug_history, "summary": self.get_debug_summary() }, f, indent=2, default=str) return filename class BrowserStateMonitor: """Monitor browser state and detect issues""" def __init__(self, mcp_client, logger: Optional[logging.Logger] = None): self.mcp_client = mcp_client self.logger = logger or logging.getLogger(__name__) self.state_history = [] async def capture_state(self) -> Dict[str, Any]: """Capture current browser state""" state = { "timestamp": datetime.now().isoformat(), "connection_status": None, "page_info": None, "interactive_elements_count": 0, "errors": [] } try: # Check connection validation = await self.mcp_client.validate_browser_connection() state["connection_status"] = validation # Get page info try: page_result = await self.mcp_client._call_mcp_tool("chrome_get_web_content", { "selector": "title", "textOnly": True }) if page_result.get("content"): state["page_info"] = { "title": page_result["content"][0].get("text", "Unknown"), "accessible": True } except Exception as e: state["errors"].append(f"Could not get page info: {e}") # Count interactive elements try: elements_result = await self.mcp_client._call_mcp_tool("chrome_get_interactive_elements", { "types": ["button", "a", "input", "select", "textarea"] }) if elements_result.get("elements"): state["interactive_elements_count"] = len(elements_result["elements"]) except Exception as e: state["errors"].append(f"Could not count interactive elements: {e}") except Exception as e: state["errors"].append(f"State capture failed: {e}") self.state_history.append(state) return state def detect_issues(self, current_state: Dict[str, Any]) -> List[str]: """Detect potential issues based on current state""" issues = [] # Check connection issues connection = current_state.get("connection_status", {}) if not connection.get("mcp_connected"): issues.append("MCP server not connected") if not connection.get("browser_responsive"): issues.append("Browser not responsive") if not connection.get("page_accessible"): issues.append("Current page not accessible") # Check for errors if current_state.get("errors"): issues.extend([f"Error: {error}" for error in current_state["errors"]]) # Check element count (might indicate page loading issues) if current_state.get("interactive_elements_count", 0) == 0: issues.append("No interactive elements found on page") return issues