broswer-automation/agent-livekit/debug_utils.py

#!/usr/bin/env python3
"""
Debug Utilities for LiveKit Chrome Agent

This module provides debugging utilities that can be used during development
and troubleshooting of browser automation issues.
"""

import logging
import json
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime


class SelectorDebugger:
    """Utility class for debugging selector discovery and execution"""

    def __init__(self, mcp_client, logger: Optional[logging.Logger] = None):
        self.mcp_client = mcp_client
        self.logger = logger or logging.getLogger(__name__)
        self.debug_history = []

    async def debug_voice_command(self, command: str) -> Dict[str, Any]:
        """Debug a voice command end-to-end"""
        debug_session = {
            "timestamp": datetime.now().isoformat(),
            "command": command,
            "steps": [],
            "final_result": None,
            "success": False
        }

        try:
            # Step 1: Parse command
            self.logger.info(f"🔍 DEBUG: Parsing voice command '{command}'")
            action, params = self.mcp_client._parse_voice_command(command)

            step1 = {
                "step": "parse_command",
                "input": command,
                "output": {"action": action, "params": params},
                "success": action is not None
            }
            debug_session["steps"].append(step1)

            if not action:
                debug_session["final_result"] = "Command parsing failed"
                return debug_session

            # Step 2: If it's a click command, debug selector discovery
            if action == "click":
                element_description = params.get("text", "")
                selector_debug = await self._debug_selector_discovery(element_description)
                debug_session["steps"].append(selector_debug)

                # Step 3: Test action execution if selectors were found
                if selector_debug.get("selectors_found"):
                    execution_debug = await self._debug_action_execution(
                        action, params, selector_debug.get("best_selector")
                    )
                    debug_session["steps"].append(execution_debug)
                    debug_session["success"] = execution_debug.get("success", False)

            # Step 4: Execute the actual command for comparison
            try:
                actual_result = await self.mcp_client.execute_voice_command(command)
                debug_session["final_result"] = actual_result
                debug_session["success"] = "success" in actual_result.lower() or "clicked" in actual_result.lower()
            except Exception as e:
                debug_session["final_result"] = f"Execution failed: {e}"

        except Exception as e:
            debug_session["final_result"] = f"Debug failed: {e}"
            self.logger.error(f"💥 Debug session failed: {e}")

        # Store in history
        self.debug_history.append(debug_session)

        return debug_session

    async def _debug_selector_discovery(self, element_description: str) -> Dict[str, Any]:
        """Debug the selector discovery process"""
        step = {
            "step": "selector_discovery",
            "input": element_description,
            "interactive_elements_found": 0,
            "matching_elements": [],
            "selectors_found": False,
            "best_selector": None,
            "errors": []
        }

        try:
            # Get interactive elements
            interactive_result = await self.mcp_client._call_mcp_tool("chrome_get_interactive_elements", {
                "types": ["button", "a", "input", "select"]
            })

            if interactive_result and "elements" in interactive_result:
                elements = interactive_result["elements"]
                step["interactive_elements_found"] = len(elements)

                # Find matching elements
                for i, element in enumerate(elements):
                    if self.mcp_client._element_matches_description(element, element_description):
                        selector = self.mcp_client._extract_best_selector(element)
                        match_reason = self.mcp_client._get_match_reason(element, element_description)

                        match_info = {
                            "index": i,
                            "selector": selector,
                            "match_reason": match_reason,
                            "tag": element.get("tagName", "unknown"),
                            "text": element.get("textContent", "")[:50],
                            "attributes": {k: v for k, v in element.get("attributes", {}).items()
                                         if k in ["id", "class", "name", "type", "value", "aria-label"]}
                        }
                        step["matching_elements"].append(match_info)

                if step["matching_elements"]:
                    step["selectors_found"] = True
                    step["best_selector"] = step["matching_elements"][0]["selector"]

        except Exception as e:
            step["errors"].append(f"Selector discovery failed: {e}")

        return step

    async def _debug_action_execution(self, action: str, params: Dict[str, Any], selector: str) -> Dict[str, Any]:
        """Debug action execution"""
        step = {
            "step": "action_execution",
            "action": action,
            "params": params,
            "selector": selector,
            "validation_result": None,
            "execution_result": None,
            "success": False,
            "errors": []
        }

        try:
            # First validate the selector
            validation = await self.mcp_client._call_mcp_tool("chrome_get_web_content", {
                "selector": selector,
                "textOnly": False
            })

            step["validation_result"] = {
                "selector_valid": validation.get("content") is not None,
                "element_found": bool(validation.get("content"))
            }

            if step["validation_result"]["element_found"]:
                # Try executing the action
                if action == "click":
                    execution_result = await self.mcp_client._call_mcp_tool("chrome_click_element", {
                        "selector": selector
                    })
                    step["execution_result"] = execution_result
                    step["success"] = True

            else:
                step["errors"].append("Selector validation failed - element not found")

        except Exception as e:
            step["errors"].append(f"Action execution failed: {e}")

        return step

    async def test_common_selectors(self, selector_list: List[str]) -> Dict[str, Any]:
        """Test a list of common selectors to see which ones work"""
        results = {
            "timestamp": datetime.now().isoformat(),
            "total_selectors": len(selector_list),
            "working_selectors": [],
            "failed_selectors": [],
            "test_results": []
        }

        for selector in selector_list:
            test_result = {
                "selector": selector,
                "validation": None,
                "clickable": None,
                "error": None
            }

            try:
                # Test if selector finds an element
                validation = await self.mcp_client._call_mcp_tool("chrome_get_web_content", {
                    "selector": selector,
                    "textOnly": False
                })

                if validation.get("content"):
                    test_result["validation"] = "found"
                    results["working_selectors"].append(selector)

                    # Test if it's clickable (without actually clicking)
                    try:
                        # We can't safely test clicking without side effects,
                        # so we just mark it as potentially clickable
                        test_result["clickable"] = "potentially_clickable"
                    except Exception as click_error:
                        test_result["clickable"] = "not_clickable"
                        test_result["error"] = str(click_error)
                else:
                    test_result["validation"] = "not_found"
                    results["failed_selectors"].append(selector)

            except Exception as e:
                test_result["validation"] = "error"
                test_result["error"] = str(e)
                results["failed_selectors"].append(selector)

            results["test_results"].append(test_result)

        return results

    def get_debug_summary(self) -> Dict[str, Any]:
        """Get a summary of all debug sessions"""
        if not self.debug_history:
            return {"message": "No debug sessions recorded"}

        summary = {
            "total_sessions": len(self.debug_history),
            "successful_sessions": sum(1 for session in self.debug_history if session.get("success")),
            "failed_sessions": sum(1 for session in self.debug_history if not session.get("success")),
            "common_failures": {},
            "recent_sessions": self.debug_history[-5:]  # Last 5 sessions
        }

        # Analyze common failure patterns
        for session in self.debug_history:
            if not session.get("success"):
                failure_reason = session.get("final_result", "unknown")
                summary["common_failures"][failure_reason] = summary["common_failures"].get(failure_reason, 0) + 1

        return summary

    def export_debug_log(self, filename: str = None) -> str:
        """Export debug history to a JSON file"""
        if filename is None:
            filename = f"debug_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

        with open(filename, 'w') as f:
            json.dump({
                "export_timestamp": datetime.now().isoformat(),
                "debug_history": self.debug_history,
                "summary": self.get_debug_summary()
            }, f, indent=2, default=str)

        return filename


class BrowserStateMonitor:
    """Monitor browser state and detect issues"""

    def __init__(self, mcp_client, logger: Optional[logging.Logger] = None):
        self.mcp_client = mcp_client
        self.logger = logger or logging.getLogger(__name__)
        self.state_history = []

    async def capture_state(self) -> Dict[str, Any]:
        """Capture current browser state"""
        state = {
            "timestamp": datetime.now().isoformat(),
            "connection_status": None,
            "page_info": None,
            "interactive_elements_count": 0,
            "errors": []
        }

        try:
            # Check connection
            validation = await self.mcp_client.validate_browser_connection()
            state["connection_status"] = validation

            # Get page info
            try:
                page_result = await self.mcp_client._call_mcp_tool("chrome_get_web_content", {
                    "selector": "title",
                    "textOnly": True
                })
                if page_result.get("content"):
                    state["page_info"] = {
                        "title": page_result["content"][0].get("text", "Unknown"),
                        "accessible": True
                    }
            except Exception as e:
                state["errors"].append(f"Could not get page info: {e}")

            # Count interactive elements
            try:
                elements_result = await self.mcp_client._call_mcp_tool("chrome_get_interactive_elements", {
                    "types": ["button", "a", "input", "select", "textarea"]
                })
                if elements_result.get("elements"):
                    state["interactive_elements_count"] = len(elements_result["elements"])
            except Exception as e:
                state["errors"].append(f"Could not count interactive elements: {e}")

        except Exception as e:
            state["errors"].append(f"State capture failed: {e}")

        self.state_history.append(state)
        return state

    def detect_issues(self, current_state: Dict[str, Any]) -> List[str]:
        """Detect potential issues based on current state"""
        issues = []

        # Check connection issues
        connection = current_state.get("connection_status", {})
        if not connection.get("mcp_connected"):
            issues.append("MCP server not connected")
        if not connection.get("browser_responsive"):
            issues.append("Browser not responsive")
        if not connection.get("page_accessible"):
            issues.append("Current page not accessible")

        # Check for errors
        if current_state.get("errors"):
            issues.extend([f"Error: {error}" for error in current_state["errors"]])

        # Check element count (might indicate page loading issues)
        if current_state.get("interactive_elements_count", 0) == 0:
            issues.append("No interactive elements found on page")

        return issues