""" MCP Chrome Client for LiveKit Integration This module provides a client interface to the MCP Chrome server with voice command processing capabilities. """ import asyncio import aiohttp import json import logging import subprocess from typing import Dict, Any, Optional, List import re class MCPResponseHandler: """ Handler for processing MCP tool responses and extracting target element information. """ @staticmethod def parse_mcp_response(mcp_result: Dict[str, Any]) -> Dict[str, Any]: """ Parse MCP tool response and extract meaningful data including target element. Args: mcp_result: Raw MCP tool response Returns: Parsed response data with success status, target element, and details """ try: # Check primary error indicator is_error = mcp_result.get("isError", False) if is_error: # Handle error response error_message = "Unknown error" if "content" in mcp_result and mcp_result["content"]: error_message = mcp_result["content"][0].get("text", error_message) return { "success": False, "error": error_message, "is_mcp_error": True, "target_element": None, "optimal_selector": None } # Parse successful response content if "content" not in mcp_result or not mcp_result["content"]: return { "success": False, "error": "No content in MCP response", "is_mcp_error": False, "target_element": None, "optimal_selector": None } content_text = mcp_result["content"][0].get("text", "") if not content_text: return { "success": False, "error": "Empty content in MCP response", "is_mcp_error": False, "target_element": None, "optimal_selector": None } # Parse JSON content try: parsed_content = json.loads(content_text) except json.JSONDecodeError as e: return { "success": False, "error": f"Invalid JSON in MCP response: {e}", "is_mcp_error": False, "raw_content": content_text, "target_element": None, "optimal_selector": None } # Extract operation success status operation_success = parsed_content.get("success", False) # Extract target element information target_element = parsed_content.get("targetElement", {}) # Generate optimal selector from target element optimal_selector = MCPResponseHandler.generate_optimal_selector(target_element) return { "success": operation_success, "message": parsed_content.get("message", ""), "target_element": target_element, "optimal_selector": optimal_selector, "results": parsed_content.get("results", []), "element_info": parsed_content.get("elementInfo", {}), "navigation_occurred": parsed_content.get("navigationOccurred", False), "raw_content": parsed_content, "is_mcp_error": False } except Exception as e: logging.getLogger(__name__).error(f"Error parsing MCP response: {e}") return { "success": False, "error": f"Exception parsing MCP response: {str(e)}", "is_mcp_error": False, "target_element": None, "optimal_selector": None } @staticmethod def generate_optimal_selector(target_element: Dict[str, Any]) -> Optional[str]: """ Generate the most specific and reliable CSS selector from target element info. Args: target_element: Target element information from MCP response Returns: Optimal CSS selector string or None if no element info """ if not target_element: return None # Priority order for selector generation: # 1. ID (most specific and reliable) # 2. Name attribute with tag # 3. Class with tag (if unique enough) # 4. Type with additional attributes element_id = target_element.get("id") tag_name = target_element.get("tagName", "").lower() class_name = target_element.get("className", "") element_type = target_element.get("type", "") name_attr = target_element.get("name", "") # 1. Use ID if available (most reliable) if element_id: return f"#{element_id}" # 2. Use name attribute with tag if name_attr and tag_name: return f"{tag_name}[name='{name_attr}']" # 3. Use type attribute with tag for inputs if element_type and tag_name == "input": return f"input[type='{element_type}']" # 4. Use class with tag (be careful with complex class names) if class_name and tag_name: # Use first class if multiple classes first_class = class_name.split()[0] if class_name else "" if first_class: return f"{tag_name}.{first_class}" # 5. Fallback to just tag name (least specific) if tag_name: return tag_name return None class MCPChromeClient: """Client for interacting with MCP Chrome server""" def __init__(self, config: Dict[str, Any]): self.config = config self.server_type = config.get('mcp_server_type', 'http') self.server_url = config.get('mcp_server_url', 'http://127.0.0.1:12306/mcp') self.session: Optional[aiohttp.ClientSession] = None self.process: Optional[subprocess.Popen] = None self.session_id: Optional[str] = None self.logger = logging.getLogger(__name__) # Input field cache for automatic detection (includes all input types) self.cached_input_fields: Dict[str, Any] = {} self.current_page_url: Optional[str] = None self.auto_detect_inputs: bool = True # Target element tracking for intelligent selector reuse self.last_target_element: Optional[Dict[str, Any]] = None self.last_optimal_selector: Optional[str] = None self.response_handler = MCPResponseHandler() # Enhanced voice command patterns for natural language processing # Order matters! Specific patterns should come before general ones self.command_patterns = { 'fill_field_by_name': [ # Explicit fill commands with "with" r'fill (?:the )?(.+?) (?:field )?with (.+)', r'populate (?:the )?(.+?) (?:field )?with (.+)', r'set (?:the )?(.+?) (?:field )?to (.+)', # Enter/input commands r'enter (.+) in (?:the )?(.+?) (?:field|input|box|area)', r'input (.+) in (?:the )?(.+?) (?:field|input|box|area)', r'type (.+) in (?:the )?(.+?) (?:field|input|box|area)', r'write (.+) in (?:the )?(.+?) (?:field|input|box|area)', r'put (.+) in (?:the )?(.+?) (?:field|input|box|area)', r'add (.+) to (?:the )?(.+?) (?:field|input|box|area)', # Direct field-value patterns r'(.+?) field (.+)', # "email field john@example.com" r'(.+?) input (.+)', # "search input python" r'(.+?) box (.+)', # "text box hello world" r'(.+?) area (.+)', # "text area hello world" # Email patterns (high priority) r'(?:email|e-mail) (.+@.+)', # "email john@example.com" r'(.+@.+) (?:in|for) (?:the )?email', # "john@example.com in email" # Phone patterns r'(?:phone|telephone|mobile) ([\d\-\+\(\)\s]+)', # "phone 123-456-7890" r'([\d\-\+\(\)\s]{10,}) (?:in|for) (?:the )?phone', # "123-456-7890 in phone" # Password patterns r'(?:password|pass) (.+)', # "password secret123" r'(.+) (?:in|for) (?:the )?password', # "secret123 in password" # Username patterns r'(?:username|user) (.+)', # "username john_doe" r'(.+) (?:in|for) (?:the )?username', # "john_doe in username" # Search patterns r'search (?:for )?(.+)', # "search for python" r'(.+) (?:in|for) (?:the )?search', # "python in search" # Generic field value pair (lowest priority) r'(.+?) (.+)', # Generic field value pair ], 'type_in_focused': [ r'^type (.+)$', r'^enter (.+)$', r'^input (.+)$', r'^write (.+)$', r'^text (.+)$', ], 'keyboard': [ r'press (?:the )?(enter)(?:\s+key)?$', r'hit (?:the )?(enter)(?:\s+key)?$', r'press (?:the )?(.+) key', r'hit (?:the )?(.+) key', r'keyboard (.+)' ], 'go_to_google': [ r'^(?:go to )?google(?:\.com)?$', r'^open google(?:\.com)?$', r'^navigate to google(?:\.com)?$', r'^take me to google$', r'^show me google$' ], 'go_to_facebook': [ r'^(?:go to )?facebook(?:\.com)?$', r'^open facebook(?:\.com)?$', r'^navigate to facebook(?:\.com)?$', r'^take me to facebook$', r'^show me facebook$', r'^facbook$', # Common speech recognition error r'^face book$' # Another common variation ], 'go_to_twitter': [ r'^(?:go to )?(?:twitter|tweets)(?:\.com)?$', r'^open (?:twitter|tweets)(?:\.com)?$', r'^navigate to (?:twitter|tweets)(?:\.com)?$', r'^take me to (?:twitter|tweets)$', r'^show me (?:twitter|tweets)$', r'^tweet$', # Single form r'^x\.com$' # New Twitter domain ], 'navigate': [ r'(?:go to|navigate to|open|visit|browse to|load) (.+)', r'take me to (.+)', r'show me (.+)', r'open up (.+)', r'pull up (.+)' ], 'search_google': [ r'search (?:google )?for (.+)', r'google search (.+)', r'find (.+) (?:on google|using google)', r'look up (.+)', r'search google for (.+)', r'google (.+)', r'search for (.+)', r'find information about (.+)', r'what is (.+)', r'tell me about (.+)' ], 'click': [ # Explicit click commands r'click (?:on )?(?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', r'press (?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', r'tap (?:on )?(?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', r'select (?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', r'choose (?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', r'hit (?:the )?(.+?)(?:\s+button|\s+link|\s+element)?$', # Button-specific patterns r'(?:click|press|tap) (?:the )?(.+?) button', r'(?:click|press|tap) button (.+)', r'button (.+)', # Link-specific patterns r'(?:click|press|tap) (?:the )?(.+?) link', r'(?:click|press|tap) link (.+)', r'link (.+)', r'go to (.+)', # Login/Submit specific patterns r'(?:click|press|tap) (?:the )?(?:login|log in|sign in|submit)', r'(?:login|log in|sign in|submit)', # Common UI elements r'(?:click|press|tap) (?:the )?(?:menu|dropdown|checkbox|radio)', r'(?:menu|dropdown|checkbox|radio)', # Generic element patterns r'(?:click|press|tap) (.+)', r'activate (.+)', r'trigger (.+)' ], 'type': [ r'type (.+)', r'enter (.+)', r'input (.+)', r'write (.+)', r'fill in (.+)', r'put in (.+)', r'add (.+)' ], 'scroll': [ r'scroll (up|down|left|right)', r'scroll to (.+)', r'go (up|down)', r'move (up|down)', r'page (up|down)', r'scroll to the (top|bottom)', r'go to the (top|bottom)' ], 'screenshot': [ r'^take (?:a )?screenshot$', r'^capture (?:the )?screen$', r'^show me (?:the )?page$', r'^save (?:the )?page$', r'^grab (?:a )?screenshot$', r'^screenshot this$' ], 'get_search_results': [ r'^get search results$', r'^show (?:me )?(?:the )?results$', r'^what (?:are )?(?:the )?results$', r'^extract results$', r'^read (?:the )?results$', r'^what did (?:we|I) find$', r'^show what we found$' ], 'get_page_content': [ r'(?:get|show|read|extract) (?:the )?(?:page )?content', r'what(?:\'s| is) on (?:the|this) page', r'(?:show|tell) me what(?:\'s| is) on (?:the|this) page', r'read (?:the|this) page', r'extract (?:all )?text', r'get (?:all )?text content', r'what does (?:the|this) page say', r'page content', r'page text' ], 'get_form_fields': [ r'(?:get|show|find|list) (?:all )?(?:form )?fields', r'what fields are (?:on )?(?:the|this) page', r'(?:show|tell) me (?:the|all) (?:form )?fields', r'list (?:all )?inputs', r'find (?:all )?form elements', r'what can I fill (?:in|out)', r'available fields', r'form elements' ], 'get_interactive_elements': [ r'(?:get|show|find|list) (?:all )?(?:interactive|clickable) elements', r'what can I click', r'(?:show|tell) me (?:all )?(?:buttons|links)', r'list (?:all )?(?:buttons|links|clickable elements)', r'find (?:all )?clickable (?:elements|items)', r'available (?:buttons|links|actions)', r'interactive elements', r'clickable elements' ], 'wait': [ r'wait (?:for )?(\d+) seconds?', r'pause (?:for )?(\d+) seconds?', r'hold on (?:for )?(\d+) seconds?', r'give it (\d+) seconds?' ], 'back': [ r'^go back$', r'^back$', r'^previous page$', r'^navigate back$' ], 'forward': [ r'^go forward$', r'^forward$', r'^next page$', r'^navigate forward$' ], 'refresh': [ r'^refresh$', r'^reload$', r'^refresh (?:the )?page$', r'^reload (?:the )?page$' ] } async def connect(self): """Connect to the MCP Chrome server""" if self.server_type == 'stdio': await self._connect_stdio() else: await self._connect_http() async def _connect_stdio(self): """Connect to MCP server via stdio""" try: command = self.config.get('mcp_server_command', 'node') args = self.config.get('mcp_server_args', []) self.process = subprocess.Popen( [command] + args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) self.logger.info("Connected to MCP Chrome server via stdio") except Exception as e: self.logger.error(f"Failed to connect to MCP server via stdio: {e}") raise async def _connect_http(self): """Connect to MCP server via streamable-HTTP""" # Create session with proper timeout and headers for MCP timeout = aiohttp.ClientTimeout(total=30) headers = { 'Content-Type': 'application/json', 'Accept': 'application/json, text/event-stream' } self.session = aiohttp.ClientSession(timeout=timeout, headers=headers) try: # Test connection with MCP initialization init_payload = { "jsonrpc": "2.0", "id": 1, "method": "initialize", "params": { "protocolVersion": "2024-11-05", "capabilities": { "tools": {} }, "clientInfo": { "name": "LiveKit-Chrome-Agent", "version": "1.0.0" } } } async with self.session.post(self.server_url, json=init_payload) as response: if response.status == 200: # Extract session ID from response headers if available session_id = response.headers.get('mcp-session-id') if session_id: self.session_id = session_id self.logger.info(f"Connected to MCP Chrome server via streamable-HTTP with session ID: {session_id}") else: self.logger.info("Connected to MCP Chrome server via streamable-HTTP") # Handle different content types content_type = response.headers.get('content-type', '') if 'application/json' in content_type: result = await response.json() if "error" in result: raise Exception(f"MCP initialization error: {result['error']}") elif 'text/event-stream' in content_type: # For SSE responses, we just need to confirm the connection is established self.logger.info("Received SSE response, connection established") else: # Try to read as text for debugging text_response = await response.text() self.logger.debug(f"Unexpected content type: {content_type}, response: {text_response[:200]}") # Send initialized notification initialized_payload = { "jsonrpc": "2.0", "method": "notifications/initialized" } headers = {} if self.session_id: headers['mcp-session-id'] = self.session_id async with self.session.post(self.server_url, json=initialized_payload, headers=headers) as init_response: if init_response.status not in [200, 204]: self.logger.warning(f"Initialized notification failed with status: {init_response.status}") return else: raise Exception(f"Server connection failed: {response.status}") except Exception as e: self.logger.error(f"Failed to connect to MCP server via HTTP: {e}") if self.session: await self.session.close() self.session = None raise async def disconnect(self): """Disconnect from the MCP Chrome server""" if self.session: await self.session.close() self.session = None if self.process: self.process.terminate() try: self.process.wait(timeout=5) except subprocess.TimeoutExpired: self.process.kill() self.process = None async def validate_browser_connection(self) -> Dict[str, Any]: """Validate that the browser is connected and responsive""" validation_result = { "mcp_connected": False, "browser_responsive": False, "page_accessible": False, "current_url": None, "page_title": None, "errors": [] } try: # Check MCP connection if self.session: validation_result["mcp_connected"] = True self.logger.info("✅ MCP server connection: OK") else: validation_result["errors"].append("MCP server not connected") self.logger.error("❌ MCP server connection: FAILED") return validation_result # Test browser responsiveness with a simple call try: result = await self._call_mcp_tool("chrome_get_web_content", { "selector": "title", "textOnly": True }) validation_result["browser_responsive"] = True self.logger.info("✅ Browser responsiveness: OK") # Extract page info if result.get("content"): content = result["content"] if isinstance(content, list) and len(content) > 0: validation_result["page_title"] = content[0].get("text", "Unknown") validation_result["page_accessible"] = True self.logger.info(f"✅ Page accessible: {validation_result['page_title']}") except Exception as e: validation_result["errors"].append(f"Browser not responsive: {e}") self.logger.error(f"❌ Browser responsiveness: FAILED - {e}") # Try to get current URL try: url_result = await self._call_mcp_tool("chrome_get_web_content", { "format": "url" }) if url_result.get("url"): validation_result["current_url"] = url_result["url"] self.logger.info(f"✅ Current URL: {validation_result['current_url']}") except Exception as e: validation_result["errors"].append(f"Could not get current URL: {e}") self.logger.warning(f"⚠️ Could not get current URL: {e}") except Exception as e: validation_result["errors"].append(f"Validation failed: {e}") self.logger.error(f"💥 Browser validation failed: {e}") return validation_result async def execute_voice_command(self, command: str) -> str: """Execute a voice command and return the result with enhanced logging""" try: self.logger.info(f"🎤 VOICE COMMAND: '{command}'") # Parse the voice command action, params = self._parse_voice_command(command) if not action: self.logger.warning(f"❓ COMMAND NOT UNDERSTOOD: '{command}'") return f"❓ I didn't understand the command: {command}" self.logger.info(f"📋 PARSED COMMAND: action='{action}', params={params}") # Execute the parsed command result = await self._execute_action(action, params) self.logger.info(f"✅ COMMAND COMPLETED: '{command}' -> {result[:100]}...") return result except Exception as e: self.logger.error(f"💥 VOICE COMMAND ERROR: '{command}' failed with: {e}") return f"💥 Error executing command: {str(e)}" def _parse_voice_command(self, command: str) -> tuple[Optional[str], Dict[str, Any]]: """Parse a voice command into action and parameters""" command = command.lower().strip() for action, patterns in self.command_patterns.items(): for pattern in patterns: match = re.search(pattern, command, re.IGNORECASE) if match: if action == 'fill_field_by_name': # Handle different parameter orders for field filling groups = match.groups() if len(groups) >= 2: # Determine which group is field name and which is value group1, group2 = groups[0].strip(), groups[1].strip() # Enhanced heuristics to determine field name vs value # Email pattern: if group contains @, it's likely the value if '@' in group2 and '@' not in group1: params = {'field_name': group1, 'value': group2} elif '@' in group1 and '@' not in group2: params = {'field_name': group2, 'value': group1} # Phone pattern: if group contains phone number pattern, it's the value elif re.match(r'[\d\-\+\(\)\s]{10,}', group2) and not re.match(r'[\d\-\+\(\)\s]{10,}', group1): params = {'field_name': group1, 'value': group2} elif re.match(r'[\d\-\+\(\)\s]{10,}', group1) and not re.match(r'[\d\-\+\(\)\s]{10,}', group2): params = {'field_name': group2, 'value': group1} # Common field names: if one group is a common field name, use it as field_name elif group1 in ['email', 'e-mail', 'password', 'pass', 'phone', 'telephone', 'mobile', 'name', 'username', 'user', 'search', 'query']: params = {'field_name': group1, 'value': group2} elif group2 in ['email', 'e-mail', 'password', 'pass', 'phone', 'telephone', 'mobile', 'name', 'username', 'user', 'search', 'query']: params = {'field_name': group2, 'value': group1} # Pattern-based detection: check if pattern indicates order elif 'with' in pattern or 'to' in pattern: # "fill X with Y" or "set X to Y" patterns params = {'field_name': group1, 'value': group2} elif 'in' in pattern: # "enter X in Y" patterns params = {'field_name': group2, 'value': group1} # Default: assume first group is field name, second is value else: params = {'field_name': group1, 'value': group2} elif len(groups) == 1: # Single group - try to extract field and value text = groups[0].strip() if '@' in text: params = {'field_name': 'email', 'value': text} elif re.match(r'[\d\-\+\(\)\s]{10,}', text): params = {'field_name': 'phone', 'value': text} else: params = {'field_name': 'search', 'value': text} else: params = {'field_name': '', 'value': ''} elif action in ['get_page_content', 'get_form_fields', 'get_interactive_elements']: # Content retrieval commands don't need parameters params = {} else: # For other actions, use the first captured group as text params = {'text': match.group(1).strip() if match.groups() else ''} return action, params return None, {} async def _execute_action(self, action: str, params: Dict[str, Any]) -> str: """Execute a specific action with parameters""" if self.server_type == 'stdio': return await self._execute_action_stdio(action, params) else: return await self._execute_action_http(action, params) async def _execute_action_stdio(self, action: str, params: Dict[str, Any]) -> str: """Execute action via stdio (simplified for now)""" if not self.process: raise Exception("Not connected to MCP server") # For now, return success messages since full MCP protocol is complex try: if action == 'navigate': return f"Would navigate to {params['text']} (stdio mode - not implemented yet)" elif action == 'go_to_google': return "Would open Google (stdio mode - not implemented yet)" elif action == 'go_to_facebook': return "Would open Facebook (stdio mode - not implemented yet)" elif action == 'go_to_twitter': return "Would open Twitter/X (stdio mode - not implemented yet)" elif action == 'click': return f"Would click on {params['text']} (stdio mode - not implemented yet)" elif action == 'type': return f"Would type: {params['text']} (stdio mode - not implemented yet)" elif action == 'scroll': return f"Would scroll {params['text']} (stdio mode - not implemented yet)" elif action == 'screenshot': return "Would take screenshot (stdio mode - not implemented yet)" elif action == 'search': return f"Would search for {params['text']} (stdio mode - not implemented yet)" elif action == 'wait': await asyncio.sleep(int(params['text'])) return f"Waited for {params['text']} seconds" elif action == 'back': return "Would go back (stdio mode - not implemented yet)" elif action == 'forward': return "Would go forward (stdio mode - not implemented yet)" elif action == 'refresh': return "Would refresh page (stdio mode - not implemented yet)" elif action == 'keyboard': return f"Would press key: {params['text']} (stdio mode - not implemented yet)" else: return f"Unknown action: {action}" except Exception as e: self.logger.error(f"Error executing action {action}: {e}") return f"Error executing {action}: {str(e)}" async def _execute_action_http(self, action: str, params: Dict[str, Any]) -> str: """Execute action via HTTP using MCP tools""" if not self.session: raise Exception("Not connected to MCP server") try: if action == 'navigate': return await self._navigate_mcp(params['text']) elif action == 'go_to_google': return await self._go_to_google_mcp() elif action == 'go_to_facebook': return await self._go_to_facebook_mcp() elif action == 'go_to_twitter': return await self._go_to_twitter_mcp() elif action == 'search_google': return await self._search_google_mcp(params['text']) elif action == 'click': # Use the new smart click method with enhanced discovery and fallback return await self.smart_click_with_target_tracking(params['text']) elif action == 'type': return await self._type_text_mcp(params['text']) elif action == 'fill_field_by_name': # Use the new smart fill method with enhanced discovery and fallback return await self.smart_fill_with_target_tracking(params['field_name'], params['value']) elif action == 'type_in_focused': return await self._type_in_focused_element(params['text']) elif action == 'scroll': return await self._scroll_mcp(params['text']) elif action == 'screenshot': return await self._take_screenshot_mcp() elif action == 'get_search_results': return await self._get_search_results_mcp() elif action == 'get_page_content': return await self._get_page_content_mcp() elif action == 'get_form_fields': return await self._get_form_fields_mcp() elif action == 'get_interactive_elements': return await self._get_interactive_elements_mcp() elif action == 'wait': return await self._wait(int(params['text'])) elif action == 'back': return await self._go_back_mcp() elif action == 'forward': return await self._go_forward_mcp() elif action == 'refresh': return await self._refresh_mcp() elif action == 'keyboard': return await self._keyboard_mcp(params['text']) else: return f"Unknown action: {action}" except Exception as e: self.logger.error(f"Error executing action {action}: {e}") return f"Error executing {action}: {str(e)}" async def _call_mcp_tool(self, tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: """Call an MCP tool and return the result with retry logic and enhanced logging""" if not self.session: raise Exception("Not connected to MCP server") payload = { "jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": { "name": tool_name, "arguments": args } } # Enhanced logging for browser actions if tool_name in ["chrome_click_element", "chrome_fill_or_select", "chrome_keyboard"]: self.logger.info(f"🔧 MCP TOOL CALL: {tool_name} with args: {args}") else: self.logger.debug(f"🔧 MCP TOOL CALL: {tool_name} with args: {args}") retry_attempts = 3 retry_delay = 1.0 for attempt in range(retry_attempts): try: self.logger.debug(f"📡 HTTP REQUEST: Calling MCP tool {tool_name} (attempt {attempt + 1})") # Prepare headers with session ID if available headers = {} if self.session_id: headers['mcp-session-id'] = self.session_id async with self.session.post(self.server_url, json=payload, headers=headers) as response: if response.status != 200: error_text = await response.text() self.logger.error(f"❌ HTTP ERROR: {response.status} - {error_text}") raise Exception(f"HTTP {response.status}: {error_text}") # Handle different content types content_type = response.headers.get('content-type', '') if 'application/json' in content_type: result = await response.json() elif 'text/event-stream' in content_type: # For SSE responses, read the stream and parse JSON from events text_response = await response.text() # Look for JSON data in SSE format lines = text_response.strip().split('\n') json_data = None for line in lines: if line.startswith('data: '): try: json_data = json.loads(line[6:]) # Remove 'data: ' prefix break except json.JSONDecodeError: continue if json_data: result = json_data else: self.logger.error(f"❌ SSE PARSE ERROR: No valid JSON in response: {text_response[:200]}") raise Exception(f"No valid JSON found in SSE response: {text_response[:200]}") else: # Try to parse as JSON anyway try: result = await response.json() except: text_response = await response.text() self.logger.error(f"❌ JSON PARSE ERROR: Unexpected content type {content_type}: {text_response[:200]}") raise Exception(f"Unexpected content type {content_type}: {text_response[:200]}") # Enhanced error handling and logging if "error" in result: error_msg = result['error'] if isinstance(error_msg, dict): error_msg = error_msg.get('message', str(error_msg)) self.logger.error(f"❌ MCP TOOL ERROR: {tool_name} failed with error: {error_msg}") raise Exception(f"MCP tool error: {error_msg}") # Log successful results for browser actions tool_result = result.get("result", {}) if tool_name in ["chrome_click_element", "chrome_fill_or_select", "chrome_keyboard"]: self.logger.info(f"✅ MCP TOOL SUCCESS: {tool_name} completed successfully") self.logger.debug(f"📝 MCP RESULT: {tool_result}") # Parse response to extract target element information parsed_response = self.response_handler.parse_mcp_response(tool_result) if parsed_response["success"] and parsed_response["target_element"]: self.last_target_element = parsed_response["target_element"] self.last_optimal_selector = parsed_response["optimal_selector"] self.logger.info(f"🎯 TARGET ELEMENT: {self.last_target_element}") self.logger.info(f"🔍 OPTIMAL SELECTOR: {self.last_optimal_selector}") else: self.logger.debug(f"✅ MCP TOOL SUCCESS: {tool_name} completed") return tool_result except Exception as e: self.logger.warning(f"⚠️ MCP RETRY: Tool call attempt {attempt + 1} failed: {e}") if attempt == retry_attempts - 1: self.logger.error(f"❌ MCP FINAL FAILURE: Tool {tool_name} failed after {retry_attempts} attempts: {str(e)}") raise Exception(f"MCP tool {tool_name} failed after {retry_attempts} attempts: {str(e)}") await asyncio.sleep(retry_delay) return {} async def fill_using_target_element(self, value: str, fallback_selectors: List[str] = None) -> str: """ Fill a field using the last discovered target element information. This method prioritizes the actual target element found by MCP tools. Args: value: Value to fill in the field fallback_selectors: List of fallback selectors if target element is not available Returns: Result message """ try: # First priority: Use the optimal selector from last target element if self.last_optimal_selector: self.logger.info(f"🎯 Using target element selector: {self.last_optimal_selector}") try: result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": self.last_optimal_selector, "value": value }) return f"✅ Filled using target element selector '{self.last_optimal_selector}' with value: '{value}'" except Exception as e: self.logger.warning(f"⚠️ Target element selector failed: {e}") # Second priority: Use fallback selectors if fallback_selectors: for selector in fallback_selectors: try: self.logger.info(f"🔄 Trying fallback selector: {selector}") result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": selector, "value": value }) return f"✅ Filled using fallback selector '{selector}' with value: '{value}'" except Exception as e: self.logger.debug(f"Fallback selector '{selector}' failed: {e}") continue return "❌ No valid selectors available for filling" except Exception as e: self.logger.error(f"Error in fill_using_target_element: {e}") return f"❌ Error filling field: {str(e)}" async def click_using_target_element(self, fallback_selectors: List[str] = None) -> str: """ Click an element using the last discovered target element information. Args: fallback_selectors: List of fallback selectors if target element is not available Returns: Result message """ try: # First priority: Use the optimal selector from last target element if self.last_optimal_selector: self.logger.info(f"🎯 Clicking target element: {self.last_optimal_selector}") try: result = await self._call_mcp_tool("chrome_click_element", { "selector": self.last_optimal_selector }) return f"✅ Clicked target element: {self.last_optimal_selector}" except Exception as e: self.logger.warning(f"⚠️ Target element click failed: {e}") # Second priority: Use fallback selectors if fallback_selectors: for selector in fallback_selectors: try: self.logger.info(f"🔄 Trying fallback click selector: {selector}") result = await self._call_mcp_tool("chrome_click_element", { "selector": selector }) return f"✅ Clicked using fallback selector: {selector}" except Exception as e: self.logger.debug(f"Fallback click selector '{selector}' failed: {e}") continue return "❌ No valid selectors available for clicking" except Exception as e: self.logger.error(f"Error in click_using_target_element: {e}") return f"❌ Error clicking element: {str(e)}" async def _navigate_mcp(self, url: str) -> str: """Navigate to a URL using MCP chrome_navigate tool""" # Add protocol if missing if not url.startswith(('http://', 'https://')): url = f"https://{url}" try: result = await self._call_mcp_tool("chrome_navigate", {"url": url}) self.current_page_url = url # Auto-detect all input fields after navigation if enabled if self.auto_detect_inputs: await asyncio.sleep(2) # Wait for page to load await self._auto_detect_input_fields() return f"Navigated to {url}" except Exception as e: return f"Failed to navigate to {url}: {str(e)}" async def _click_mcp(self, selector: str) -> str: """Click on an element using MCP chrome_click_element tool""" try: result = await self._call_mcp_tool("chrome_click_element", {"selector": selector}) return f"Clicked on {selector}" except Exception as e: return f"Failed to click on {selector}: {str(e)}" async def _type_text_mcp(self, text: str) -> str: """Type text using MCP chrome_fill_or_select tool""" try: # Try to use focused element first, then fallback to common input selectors selectors = [ "input:focus, textarea:focus, [contenteditable]:focus", "input[name='q'], textarea[name='q']", # Google search box "input[type='search'], input[type='text']", # General search/text inputs "input:not([type]), textarea" # Any input without type or textarea ] for selector in selectors: try: result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": selector, "value": text }) return f"Typed: {text}" except Exception: continue return f"Failed to find suitable input field to type: {text}" except Exception as e: return f"Failed to type text: {str(e)}" async def _keyboard_mcp(self, key: str) -> str: """Press a keyboard key using MCP chrome_keyboard tool""" try: # Normalize key names for common variations key_map = { "enter": "Enter", "return": "Enter", "space": " ", "spacebar": " ", "tab": "Tab", "escape": "Escape", "esc": "Escape", "backspace": "Backspace", "delete": "Delete", "up": "ArrowUp", "down": "ArrowDown", "left": "ArrowLeft", "right": "ArrowRight", "page up": "PageUp", "page down": "PageDown", "home": "Home", "end": "End" } # Handle compound keys (like ctrl+a, shift+tab, etc.) if '+' in key: # Split compound key and normalize each part parts = [part.strip() for part in key.split('+')] normalized_parts = [] for part in parts: # Normalize modifier keys if part.lower() in ['ctrl', 'control']: normalized_parts.append('Control') elif part.lower() in ['shift']: normalized_parts.append('Shift') elif part.lower() in ['alt']: normalized_parts.append('Alt') elif part.lower() in ['cmd', 'command', 'meta']: normalized_parts.append('Meta') else: # Use the key map for the actual key normalized_parts.append(key_map.get(part.lower(), part)) normalized_key = '+'.join(normalized_parts) else: # Single key - use the key map normalized_key = key_map.get(key.lower().strip(), key) # Try both "keys" and "key" parameters as different MCP servers may expect different formats try: result = await self._call_mcp_tool("chrome_keyboard", {"keys": normalized_key}) except Exception: # Fallback to "key" parameter result = await self._call_mcp_tool("chrome_keyboard", {"key": normalized_key}) return f"Pressed key: {normalized_key}" except Exception as e: return f"Failed to press key '{key}': {str(e)}" async def _scroll_mcp(self, direction: str) -> str: """Scroll the page using keyboard commands""" try: key_map = { "up": "ArrowUp", "down": "ArrowDown", "left": "ArrowLeft", "right": "ArrowRight" } key = key_map.get(direction.lower(), "ArrowDown") result = await self._call_mcp_tool("chrome_keyboard", {"key": key}) return f"Scrolled {direction}" except Exception as e: return f"Failed to scroll: {str(e)}" async def _take_screenshot_mcp(self) -> str: """Take a screenshot using MCP chrome_screenshot tool""" try: result = await self._call_mcp_tool("chrome_screenshot", {"fullPage": True}) return "Screenshot taken successfully" except Exception as e: return f"Failed to take screenshot: {str(e)}" async def _wait(self, seconds: int) -> str: """Wait for a specified number of seconds""" await asyncio.sleep(seconds) return f"Waited for {seconds} seconds" async def _go_to_google_mcp(self) -> str: """Open Google using MCP chrome_navigate tool""" try: result = await self._call_mcp_tool("chrome_navigate", {"url": "https://www.google.com"}) return "Opened Google" except Exception as e: return f"Failed to open Google: {str(e)}" async def _go_to_facebook_mcp(self) -> str: """Open Facebook using MCP chrome_navigate tool""" try: result = await self._call_mcp_tool("chrome_navigate", {"url": "https://www.facebook.com"}) return "Opened Facebook" except Exception as e: return f"Failed to open Facebook: {str(e)}" async def _go_to_twitter_mcp(self) -> str: """Open Twitter/X using MCP chrome_navigate tool""" try: result = await self._call_mcp_tool("chrome_navigate", {"url": "https://www.x.com"}) return "Opened Twitter (X)" except Exception as e: return f"Failed to open Twitter: {str(e)}" async def _search_google_mcp(self, query: str) -> str: """Search Google for a query and return results using MCP tools""" try: # First, navigate to Google await self._go_to_google_mcp() await asyncio.sleep(3) # Wait for page to load # Try multiple selectors for the search box (Google uses textarea, not input) search_selectors = [ "#APjFqb", # Main Google search box ID "textarea[name='q']", # Google search textarea "[role='combobox']", # Role-based selector ".gLFyf", # Google search box class "textarea[aria-label*='Search']" # Aria-label based ] search_success = False for selector in search_selectors: try: # Click to focus the search box await self._call_mcp_tool("chrome_click_element", {"selector": selector}) await asyncio.sleep(0.5) # Clear any existing text and fill the search box await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.2) await self._call_mcp_tool("chrome_fill_or_select", { "selector": selector, "value": query }) await asyncio.sleep(1) # Click the Google Search button instead of pressing Enter # (Enter just shows autocomplete, doesn't submit search) search_button_selectors = [ "input[value='Google Search']", "button[aria-label*='Google Search']", "input[type='submit'][value*='Google Search']", ".gNO89b", # Google Search button class "center input[type='submit']:first-of-type" # First submit button in center ] button_clicked = False for button_selector in search_button_selectors: try: await self._call_mcp_tool("chrome_click_element", {"selector": button_selector}) button_clicked = True self.logger.info(f"Successfully clicked search button: {button_selector}") break except Exception as e: self.logger.debug(f"Failed to click button {button_selector}: {e}") continue if not button_clicked: # Fallback: try Enter key as last resort await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) self.logger.info("Fallback: used Enter key for search") await asyncio.sleep(5) # Wait longer for search results to load search_success = True self.logger.info(f"Successfully performed search using selector: {selector}") break except Exception as e: self.logger.debug(f"Failed to search with selector {selector}: {e}") continue if not search_success: return f"Failed to find search input field on Google for query: '{query}'" # Get search results return await self._get_search_results_mcp() except Exception as e: self.logger.error(f"Error searching Google: {e}") return f"Error searching Google for '{query}': {str(e)}" async def _get_search_results_mcp(self) -> str: """Extract search results from the current page using MCP tools""" try: # Try multiple selectors for Google search results (Google's structure changes frequently) result_selectors = [ ".tF2Cxc", # Current Google search result container ".g", # Traditional Google search result "#rso .g", # Results container with .g class "[data-ved]", # Elements with data-ved attribute (Google results) ".yuRUbf", # Google result link container "#search .g", # Search container with .g class ".rc", # Another Google result class ".r" # Simple result class ] content = [] successful_selector = None for selector in result_selectors: try: result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) temp_content = result.get("content", []) # Check if we got valid content (not error messages) if temp_content and not any("Error" in str(item) for item in temp_content): content = temp_content successful_selector = selector self.logger.info(f"Successfully extracted results using selector: {selector}") break else: self.logger.debug(f"No valid content found for selector: {selector}") except Exception as e: self.logger.debug(f"Failed to get content with selector {selector}: {e}") continue if not content: # If no results found, try to get any text content from the page try: result = await self._call_mcp_tool("chrome_get_web_content", { "selector": "body", "textOnly": True }) page_content = result.get("content", []) if page_content: page_text = str(page_content[0]).lower() if "no results found" in page_text or "did not match" in page_text: return "No search results found for this query" elif "search" in page_text: return "Search was performed but could not extract structured results. The page may have loaded but results are in an unexpected format." return "No search results found on this page" except Exception: return "No search results found on this page" # Parse the content to extract search results formatted_results = [] for i, item in enumerate(content[:10], 1): # Limit to top 10 results try: # Handle different content formats if isinstance(item, dict): text_content = item.get("text", "") href = item.get("href", "") else: text_content = str(item) href = "" if not text_content.strip(): continue # For Google search results, the text content is often JSON # Try to parse it if it looks like JSON if text_content.startswith('{"success":true'): try: import json data = json.loads(text_content) actual_content = data.get("textContent", "") if actual_content: text_content = actual_content except json.JSONDecodeError: pass # Use original text_content # Try to extract title, URL, and snippet from the text lines = [line.strip() for line in text_content.split('\n') if line.strip()] if not lines: continue # For Google results, often the first line is the title # and subsequent lines are the snippet title = lines[0] if lines else "No title" # Skip very short titles that might be navigation elements if len(title) < 10 and len(lines) > 1: title = lines[1] if len(lines) > 1 else title # Extract URL from the text content (Google shows URLs in the results) extracted_url = "URL not available" # Look for URLs in the text content import re url_patterns = [ r'https?://[^\s\n›]+', # Standard HTTP URLs r'[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(?:/[^\s\n›]*)?', # Domain-based URLs r'[a-zA-Z0-9.-]+\.(?:com|org|net|edu|gov|io|co\.uk|de|fr|jp)(?:\s*›\s*[^\n]*)?' # Common TLDs with › separator ] for pattern in url_patterns: matches = re.findall(pattern, text_content) if matches: # Take the first URL found found_url = matches[0].strip() # Clean up the URL (remove › and trailing text) found_url = found_url.split('›')[0].strip() if not found_url.startswith('http'): found_url = 'https://' + found_url extracted_url = found_url break # Get snippet from remaining lines (skip URL lines) snippet_lines = [] for line in lines[1:]: # Skip lines that are just URLs or domain names if not re.match(r'^https?://', line) and not re.match(r'^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', line): snippet_lines.append(line) snippet = ' '.join(snippet_lines[:3]) if snippet_lines else "No description" # Clean up title and snippet title = title[:100] + "..." if len(title) > 100 else title snippet = snippet[:200] + "..." if len(snippet) > 200 else snippet # Skip results that are too generic or empty if title.lower() in ['no title', 'gmail', 'images'] or len(title.strip()) < 5: continue # Use extracted URL or href if available url = href if href else extracted_url formatted_results.append(f"{i}. {title}\n {snippet}\n {url}") except Exception as e: self.logger.debug(f"Error processing result item {i}: {e}") continue if formatted_results: return f"Search Results (using {successful_selector}):\n\n" + "\n\n".join(formatted_results) else: return f"Found {len(content)} search result elements but could not extract readable content" except Exception as e: return f"Failed to extract search results: {str(e)}" async def _go_back_mcp(self) -> str: """Navigate back in browser history using MCP tools""" try: await self._call_mcp_tool("chrome_keyboard", {"key": "Alt+Left"}) return "Navigated back to previous page" except Exception as e: self.logger.error(f"Error going back: {e}") return f"Error going back: {str(e)}" async def _go_forward_mcp(self) -> str: """Navigate forward in browser history using MCP tools""" try: await self._call_mcp_tool("chrome_keyboard", {"key": "Alt+Right"}) return "Navigated forward to next page" except Exception as e: self.logger.error(f"Error going forward: {e}") return f"Error going forward: {str(e)}" async def _refresh_mcp(self) -> str: """Refresh the current page using MCP tools""" try: await self._call_mcp_tool("chrome_keyboard", {"key": "F5"}) return "Page refreshed successfully" except Exception as e: self.logger.error(f"Error refreshing page: {e}") return f"Error refreshing page: {str(e)}" async def get_form_fields(self) -> str: """Get all form fields on the current page with enhanced detection""" try: # Method 1: Get all interactive elements that are form fields result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select"] }) elements = [] if result: # Parse the nested JSON response from MCP tool try: if "content" in result and result["content"]: content_text = result["content"][0].get("text", "") if content_text: import json parsed_data = json.loads(content_text) elements = parsed_data.get("elements", []) else: # Fallback: try direct access for backward compatibility elements = result.get("elements", []) except (json.JSONDecodeError, KeyError, IndexError) as e: self.logger.error(f"Error parsing MCP response: {e}") elements = result.get("elements", []) # Method 2: If no elements found, try enhanced detection with JavaScript if not elements: self.logger.info("No elements found with standard method, trying enhanced detection...") try: enhanced_result = await self._call_mcp_tool("chrome_execute_script", { "script": """ function findAllFormElements() { const elements = []; // Find all input elements document.querySelectorAll('input, textarea, select').forEach((el, index) => { const rect = el.getBoundingClientRect(); const isVisible = rect.width > 0 && rect.height > 0 && window.getComputedStyle(el).display !== 'none' && window.getComputedStyle(el).visibility !== 'hidden'; elements.push({ tag: el.tagName.toLowerCase(), type: el.type || 'text', name: el.name || '', id: el.id || '', placeholder: el.placeholder || '', value: el.value || '', className: el.className || '', selector: generateSelector(el), visible: isVisible, required: el.required || false, disabled: el.disabled || false }); }); function generateSelector(element) { if (element.id) return '#' + element.id; if (element.name) return `[name="${element.name}"]`; if (element.className) { const classes = element.className.split(' ').filter(c => c.length > 0); if (classes.length > 0) return '.' + classes.join('.'); } return element.tagName.toLowerCase() + ':nth-of-type(' + (Array.from(element.parentNode.children).indexOf(element) + 1) + ')'; } return elements; } return findAllFormElements(); """ }) if enhanced_result and "content" in enhanced_result: content_text = enhanced_result["content"][0].get("text", "") if content_text: elements = json.loads(content_text) self.logger.info(f"Enhanced detection found {len(elements)} elements") except Exception as e: self.logger.error(f"Enhanced detection failed: {e}") if not elements: return "No form fields found on the current page" # Format the form fields information form_fields = [] for i, element in enumerate(elements, 1): field_info = { "index": i, "selector": element.get("selector", ""), "type": element.get("type", ""), "name": element.get("name", ""), "id": element.get("id", ""), "placeholder": element.get("placeholder", ""), "value": element.get("value", ""), "required": element.get("required", False), "label": element.get("label", "") } # Create a readable description description = f"Field {i}: " if field_info["label"]: description += f"'{field_info['label']}' " if field_info["type"]: description += f"({field_info['type']}) " if field_info["name"]: description += f"name='{field_info['name']}' " if field_info["id"]: description += f"id='{field_info['id']}' " if field_info["placeholder"]: description += f"placeholder='{field_info['placeholder']}' " if field_info["required"]: description += "(required) " description += f"selector: {field_info['selector']}" form_fields.append(description) return f"Found {len(form_fields)} form fields:\n\n" + "\n".join(form_fields) except Exception as e: self.logger.error(f"Error getting form fields: {e}") return f"Error getting form fields: {str(e)}" async def fill_form_field(self, field_selector: str, value: str) -> str: """Fill a specific form field with a value""" try: # First click to focus the field await self._call_mcp_tool("chrome_click_element", {"selector": field_selector}) await asyncio.sleep(0.3) # Clear existing content await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.1) # Fill the field result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": field_selector, "value": value }) return f"Successfully filled field '{field_selector}' with value: '{value}'" except Exception as e: self.logger.error(f"Error filling form field: {e}") return f"Error filling form field '{field_selector}': {str(e)}" async def get_form_field_info(self, field_selector: str) -> str: """Get detailed information about a specific form field""" try: # Get element information result = await self._call_mcp_tool("chrome_get_web_content", { "selector": field_selector, "textOnly": False }) if not result or not result.get("content"): return f"Form field '{field_selector}' not found" content = result.get("content", []) if content: field_data = content[0] if isinstance(content, list) else content # Extract field information info = [] info.append(f"Selector: {field_selector}") if isinstance(field_data, dict): for key, value in field_data.items(): if value and key not in ['content', 'textContent']: info.append(f"{key.capitalize()}: {value}") else: info.append(f"Content: {str(field_data)}") return "Form field information:\n" + "\n".join(info) else: return f"No information found for field '{field_selector}'" except Exception as e: self.logger.error(f"Error getting form field info: {e}") return f"Error getting form field info for '{field_selector}': {str(e)}" async def fill_form_step_by_step(self, form_data: str) -> str: """Fill form fields one by one with provided data (JSON format)""" try: import json # Parse the form data try: data = json.loads(form_data) except json.JSONDecodeError: return f"Invalid JSON format in form_data: {form_data}" if not isinstance(data, dict): return "Form data must be a JSON object with field selectors as keys and values as values" results = [] successful_fields = 0 for field_selector, value in data.items(): try: self.logger.info(f"Filling field '{field_selector}' with value '{value}'") # Fill the field result = await self.fill_form_field(field_selector, str(value)) results.append(f"✓ {field_selector}: {result}") successful_fields += 1 # Small delay between fields await asyncio.sleep(0.5) except Exception as e: error_msg = f"✗ {field_selector}: Error - {str(e)}" results.append(error_msg) self.logger.error(f"Error filling field {field_selector}: {e}") summary = f"Form filling completed: {successful_fields}/{len(data)} fields filled successfully" return f"{summary}\n\nDetails:\n" + "\n".join(results) except Exception as e: self.logger.error(f"Error in step-by-step form filling: {e}") return f"Error in step-by-step form filling: {str(e)}" async def fill_qubecare_login(self, email: str, password: str) -> str: """Specialized method to fill QuBeCare login form""" try: self.logger.info("Starting QuBeCare login form filling...") # Wait for page to load completely await asyncio.sleep(2) # Try multiple strategies to find and fill the login form strategies = [ # Strategy 1: Common login selectors { "email_selectors": [ "input[type='email']", "input[name='email']", "input[name='username']", "input[name='login']", "#email", "#username", "#login", ".email", ".username" ], "password_selectors": [ "input[type='password']", "input[name='password']", "#password", ".password" ] }, # Strategy 2: QuBeCare specific selectors (if they use specific patterns) { "email_selectors": [ "input[placeholder*='email']", "input[placeholder*='Email']", "input[aria-label*='email']", "input[aria-label*='Email']" ], "password_selectors": [ "input[placeholder*='password']", "input[placeholder*='Password']", "input[aria-label*='password']", "input[aria-label*='Password']" ] } ] email_filled = False password_filled = False for strategy_num, strategy in enumerate(strategies, 1): self.logger.info(f"Trying strategy {strategy_num}...") # Try to fill email field if not email_filled: for email_selector in strategy["email_selectors"]: try: result = await self.fill_form_field(email_selector, email) if "Successfully filled" in result: self.logger.info(f"Email filled with selector: {email_selector}") email_filled = True break except Exception as e: self.logger.debug(f"Email selector {email_selector} failed: {e}") continue # Try to fill password field if not password_filled: for password_selector in strategy["password_selectors"]: try: result = await self.fill_form_field(password_selector, password) if "Successfully filled" in result: self.logger.info(f"Password filled with selector: {password_selector}") password_filled = True break except Exception as e: self.logger.debug(f"Password selector {password_selector} failed: {e}") continue if email_filled and password_filled: break # Summary results = [] if email_filled: results.append("✓ Email field filled successfully") else: results.append("✗ Could not find or fill email field") if password_filled: results.append("✓ Password field filled successfully") else: results.append("✗ Could not find or fill password field") success_count = sum([email_filled, password_filled]) summary = f"QuBeCare login form filling: {success_count}/2 fields filled successfully" return f"{summary}\n\nDetails:\n" + "\n".join(results) except Exception as e: self.logger.error(f"Error filling QuBeCare login form: {e}") return f"Error filling QuBeCare login form: {str(e)}" async def submit_form(self, form_selector: str = "form") -> str: """Submit a form on the current page""" try: # Try multiple methods to submit the form submit_methods = [ # Method 1: Click submit button { "method": "submit_button", "selectors": [ "input[type='submit']", "button[type='submit']", "button:contains('Submit')", "button:contains('Send')", "button:contains('Save')", "input[value*='Submit']", "input[value*='Send']", ".submit-btn", ".btn-submit" ] }, # Method 2: Press Enter on form { "method": "enter_key", "selector": form_selector } ] for method_info in submit_methods: if method_info["method"] == "submit_button": # Try to find and click submit button for selector in method_info["selectors"]: try: await self._call_mcp_tool("chrome_click_element", {"selector": selector}) return f"Form submitted successfully by clicking submit button: {selector}" except Exception: continue elif method_info["method"] == "enter_key": # Try to submit by pressing Enter on the form try: await self._call_mcp_tool("chrome_click_element", {"selector": form_selector}) await asyncio.sleep(0.2) await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) return f"Form submitted successfully using Enter key on: {form_selector}" except Exception: continue return "Could not find a way to submit the form. Please check if there's a submit button or try manually." except Exception as e: self.logger.error(f"Error submitting form: {e}") return f"Error submitting form: {str(e)}" async def _auto_detect_input_fields(self) -> None: """Automatically detect and cache all input fields on the current page""" try: self.logger.info("Auto-detecting all input fields on current page...") # Get all interactive elements including all input types result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select", "button"] }) if not result: self.logger.debug("No input fields found during auto-detection") return # Parse the nested JSON response from MCP tool elements = [] try: if "content" in result and result["content"]: content_text = result["content"][0].get("text", "") if content_text: import json parsed_data = json.loads(content_text) elements = parsed_data.get("elements", []) self.logger.debug(f"Parsed {len(elements)} elements from MCP response") else: # Fallback: try direct access for backward compatibility elements = result.get("elements", []) except (json.JSONDecodeError, KeyError, IndexError) as e: self.logger.error(f"Error parsing MCP response: {e}") # Fallback: try direct access elements = result.get("elements", []) if not elements: self.logger.debug("No input field elements found during auto-detection") return # Cache all input fields with enhanced metadata self.cached_input_fields = {} for element in elements: field_info = { "selector": element.get("selector", ""), "type": element.get("type", ""), "name": element.get("name", ""), "id": element.get("id", ""), "placeholder": element.get("placeholder", ""), "value": element.get("value", ""), "required": element.get("required", False), "label": element.get("label", ""), "aria_label": element.get("aria-label", ""), "title": element.get("title", "") } # Create multiple lookup keys for flexible field matching lookup_keys = [] # Add name-based keys if field_info["name"]: lookup_keys.extend([ field_info["name"].lower(), field_info["name"].lower().replace("_", " "), field_info["name"].lower().replace("-", " ") ]) # Add ID-based keys if field_info["id"]: lookup_keys.extend([ field_info["id"].lower(), field_info["id"].lower().replace("_", " "), field_info["id"].lower().replace("-", " ") ]) # Add label-based keys if field_info["label"]: lookup_keys.append(field_info["label"].lower()) # Add aria-label keys if field_info["aria_label"]: lookup_keys.append(field_info["aria_label"].lower()) # Add placeholder-based keys if field_info["placeholder"]: lookup_keys.append(field_info["placeholder"].lower()) # Add type-based keys for all input types field_type = field_info["type"].lower() if field_type: lookup_keys.append(field_type) # Add variations of the type if field_type == "email": lookup_keys.extend(["mail", "e-mail"]) elif field_type == "tel": lookup_keys.extend(["phone", "telephone"]) elif field_type == "search": lookup_keys.extend(["find", "query", "q"]) # Add common field name patterns (expanded for all input types) common_patterns = { "email": ["email", "e-mail", "mail", "email address"], "password": ["password", "pass", "pwd"], "phone": ["phone", "telephone", "tel", "mobile", "cell"], "name": ["name", "full name", "username", "user name"], "first name": ["first name", "firstname", "fname"], "last name": ["last name", "lastname", "lname", "surname"], "address": ["address", "street", "location"], "city": ["city", "town"], "zip": ["zip", "postal", "postcode", "zip code"], "country": ["country", "nation"], "state": ["state", "province", "region"], "message": ["message", "comment", "description", "notes"], "subject": ["subject", "title", "topic"], "search": ["search", "find", "query", "q", "lookup"], "text": ["text", "input", "field"], "number": ["number", "num", "amount", "quantity"], "date": ["date", "when", "time"], "url": ["url", "link", "website", "site"], "file": ["file", "upload", "attach", "document"], "checkbox": ["check", "checkbox", "tick", "select"], "radio": ["radio", "option", "choice"], "submit": ["submit", "send", "save", "go", "enter"], "button": ["button", "click", "press"] } # Match field to common patterns for pattern_key, pattern_values in common_patterns.items(): for lookup_key in lookup_keys: if any(pattern in lookup_key for pattern in pattern_values): lookup_keys.append(pattern_key) break # Store field info under all lookup keys for key in lookup_keys: if key and key not in self.cached_input_fields: self.cached_input_fields[key] = field_info self.logger.info(f"Auto-detected {len(elements)} input fields with {len(self.cached_input_fields)} lookup keys") except Exception as e: self.logger.error(f"Error during auto input field detection: {e}") async def fill_field_by_name(self, field_name: str, value: str) -> str: """Fill any input field using ONLY real-time MCP discovery - no cache""" try: field_name_lower = field_name.lower().strip() self.logger.info(f"Starting REAL-TIME form filling for field: '{field_name}' with value: '{value}' (NO CACHE)") # Step 1: Real-time MCP discovery - get fresh interactive elements self.logger.info(f"Getting real-time form elements using MCP tools...") discovery_result = await self._discover_form_fields_dynamically(field_name, value) if discovery_result["success"]: return discovery_result["message"] # Step 2: Enhanced field detection with retry mechanism (real-time only) self.logger.info(f"Real-time discovery failed, trying enhanced detection with retry...") enhanced_result = await self._enhanced_field_detection_with_retry(field_name, value, max_retries=3) if enhanced_result["success"]: return enhanced_result["message"] # Step 3: Content analysis as final fallback (real-time only) self.logger.info(f"Enhanced detection failed, trying real-time content analysis...") content_result = await self._analyze_page_content_for_field(field_name, value) if content_result["success"]: return content_result["message"] # Step 4: Direct MCP element search as last resort self.logger.info(f"All methods failed, trying direct MCP element search...") direct_result = await self._direct_mcp_element_search(field_name, value) if direct_result["success"]: return direct_result["message"] return f"✗ Could not find field '{field_name}' using real-time MCP discovery methods." except Exception as e: self.logger.error(f"Error filling field by name: {e}") return f"Error filling field '{field_name}': {str(e)}" async def fill_input_field(self, field_selector: str, value: str) -> str: """Fill any input field with enhanced typing support and target element tracking""" try: # First click to focus the field - this will capture target element info click_result = await self._call_mcp_tool("chrome_click_element", {"selector": field_selector}) await asyncio.sleep(0.3) # Clear existing content for input fields (not for buttons) try: # Get field type to determine if we should clear content field_info_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": field_selector, "textOnly": False }) field_type = "text" # default if field_info_result and field_info_result.get("content"): content = field_info_result["content"][0] if isinstance(field_info_result["content"], list) else field_info_result["content"] if isinstance(content, dict): field_type = content.get("type", "text").lower() # Only clear content for input fields that accept text if field_type in ["text", "email", "password", "search", "tel", "url", "number", "textarea"]: await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.1) except Exception as e: self.logger.debug(f"Could not determine field type, proceeding with fill: {e}") # Fill the field using target element approach try: # Use target element approach with fallback to original selector result = await self.fill_using_target_element(value, [field_selector]) if "✅" in result: return result else: # If target element approach failed, try original method result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": field_selector, "value": value }) return f"Successfully filled field '{field_selector}' with value: '{value}'" except Exception as e1: self.logger.debug(f"fill_or_select failed, trying keyboard input: {e1}") # Fallback: type character by character try: # Clear any existing content first await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.1) # Type the value character by character for better compatibility for char in value: if char == ' ': await self._call_mcp_tool("chrome_keyboard", {"keys": "Space"}) elif char == '\n': await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) elif char == '\t': await self._call_mcp_tool("chrome_keyboard", {"keys": "Tab"}) else: await self._call_mcp_tool("chrome_keyboard", {"keys": char}) await asyncio.sleep(0.05) # Small delay between characters return f"Successfully typed into field '{field_selector}' with value: '{value}'" except Exception as e2: self.logger.error(f"Both fill methods failed: fill_or_select={e1}, keyboard={e2}") raise e2 except Exception as e: self.logger.error(f"Error filling input field: {e}") return f"Error filling input field '{field_selector}': {str(e)}" async def enhanced_element_discovery_with_fallback(self, element_description: str, action_type: str = "fill", value: str = "") -> Dict[str, Any]: """ Enhanced element discovery with intelligent fallback mechanism. Process: 1. Try chrome_get_interactive_elements first 2. If that fails (isError: True), fall back to chrome_get_web_content 3. Extract original selectors and use them for the action Args: element_description: Description of element to find (e.g., "username", "login button") action_type: Type of action ("fill", "click") value: Value to fill (for fill actions) Returns: Dictionary with success status, selector, and result message """ try: self.logger.info(f"🔍 ENHANCED DISCOVERY: Looking for '{element_description}' for {action_type} action") # Step 1: Try chrome_get_interactive_elements first self.logger.info("📋 Step 1: Trying chrome_get_interactive_elements...") try: interactive_result = await self._call_mcp_tool("chrome_get_interactive_elements", { "textQuery": element_description }) # Check if the result has an error if not interactive_result.get("isError", False): # Parse the interactive elements response elements = [] try: if "content" in interactive_result and interactive_result["content"]: content_text = interactive_result["content"][0].get("text", "") if content_text: parsed_data = json.loads(content_text) elements = parsed_data.get("elements", []) except (json.JSONDecodeError, KeyError, IndexError): elements = interactive_result.get("elements", []) if elements: # Found elements, use the first suitable one for element in elements: selector = element.get("selector", "") if selector: self.logger.info(f"✅ Found element with interactive discovery: {selector}") return { "success": True, "selector": selector, "method": "interactive_elements", "element": element } self.logger.warning("⚠️ chrome_get_interactive_elements failed or returned no elements") except Exception as e: self.logger.warning(f"⚠️ chrome_get_interactive_elements error: {e}") # Step 2: Fallback to chrome_get_web_content self.logger.info("🔄 Step 2: Falling back to chrome_get_web_content...") try: web_content_result = await self._call_mcp_tool("chrome_get_web_content", { "textOnly": False }) if not web_content_result.get("isError", False): # Parse web content to find selectors selector = await self._extract_selector_from_web_content(web_content_result, element_description, action_type) if selector: self.logger.info(f"✅ Found element with web content discovery: {selector}") return { "success": True, "selector": selector, "method": "web_content", "element": {"selector": selector} } self.logger.warning("⚠️ chrome_get_web_content failed or no suitable selector found") except Exception as e: self.logger.warning(f"⚠️ chrome_get_web_content error: {e}") # Step 3: Try intelligent selector generation as last resort self.logger.info("🎯 Step 3: Trying intelligent selector generation...") intelligent_selectors = self._generate_intelligent_selectors(element_description) for selector in intelligent_selectors[:3]: # Try first 3 intelligent selectors try: # Test if selector exists test_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if test_result and not test_result.get("isError", False) and test_result.get("content"): self.logger.info(f"✅ Found element with intelligent selector: {selector}") return { "success": True, "selector": selector, "method": "intelligent_generation", "element": {"selector": selector} } except Exception as e: self.logger.debug(f"Intelligent selector '{selector}' failed: {e}") continue return { "success": False, "error": f"Could not find element '{element_description}' using any discovery method", "method": "none" } except Exception as e: self.logger.error(f"Error in enhanced_element_discovery_with_fallback: {e}") return { "success": False, "error": str(e), "method": "error" } async def _extract_selector_from_web_content(self, web_content_result: Dict[str, Any], element_description: str, action_type: str) -> Optional[str]: """ Extract a suitable selector from web content based on element description. Args: web_content_result: Result from chrome_get_web_content element_description: Description of element to find action_type: Type of action ("fill", "click") Returns: Suitable CSS selector or None """ try: # Parse web content content_text = "" if "content" in web_content_result and web_content_result["content"]: content_item = web_content_result["content"][0] if isinstance(content_item, dict): content_text = content_item.get("text", "") else: content_text = str(content_item) if not content_text: return None element_description_lower = element_description.lower() # Generate selectors based on element description and action type if action_type == "fill": # For form fields if "username" in element_description_lower or "user" in element_description_lower: return self._find_selector_in_content(content_text, ["input[name*='user']", "input[id*='user']", "input[type='text']"]) elif "email" in element_description_lower or "mail" in element_description_lower: return self._find_selector_in_content(content_text, ["input[type='email']", "input[name*='email']", "input[id*='email']"]) elif "password" in element_description_lower or "pass" in element_description_lower: return self._find_selector_in_content(content_text, ["input[type='password']", "input[name*='password']", "input[id*='pass']"]) elif "search" in element_description_lower: return self._find_selector_in_content(content_text, ["input[type='search']", "input[name='q']", "textarea[name='q']"]) elif "phone" in element_description_lower or "tel" in element_description_lower: return self._find_selector_in_content(content_text, ["input[type='tel']", "input[name*='phone']", "input[name*='tel']"]) else: # Generic input field return self._find_selector_in_content(content_text, ["input[type='text']", "input", "textarea"]) elif action_type == "click": # For clickable elements if "login" in element_description_lower: return self._find_selector_in_content(content_text, ["button[type='submit']", "input[type='submit']", "button", "[role='button']"]) elif "submit" in element_description_lower: return self._find_selector_in_content(content_text, ["button[type='submit']", "input[type='submit']", "button"]) elif "button" in element_description_lower: return self._find_selector_in_content(content_text, ["button", "input[type='button']", "[role='button']"]) elif "link" in element_description_lower: return self._find_selector_in_content(content_text, ["a", "[role='link']"]) else: # Generic clickable element return self._find_selector_in_content(content_text, ["button", "a", "[role='button']", "input[type='submit']"]) return None except Exception as e: self.logger.error(f"Error extracting selector from web content: {e}") return None def _find_selector_in_content(self, content: str, selectors: List[str]) -> Optional[str]: """ Find the first selector that appears to be present in the content. Args: content: Web page content selectors: List of selectors to check Returns: First matching selector or None """ try: # Simple heuristic: check if selector patterns appear in content for selector in selectors: # Extract the key parts of the selector for matching if "input" in selector and "input" in content.lower(): return selector elif "button" in selector and "button" in content.lower(): return selector elif "textarea" in selector and "textarea" in content.lower(): return selector elif selector.startswith("#") or selector.startswith("."): # ID or class selectors - harder to validate from content continue elif "[" in selector: # Attribute selectors - check if attribute name appears attr_match = re.search(r'\[([^=\]]+)', selector) if attr_match: attr_name = attr_match.group(1) if attr_name in content.lower(): return selector # If no specific match, return the first selector as fallback return selectors[0] if selectors else None except Exception as e: self.logger.error(f"Error finding selector in content: {e}") return selectors[0] if selectors else None async def smart_fill_with_target_tracking(self, field_name: str, value: str) -> str: """ Enhanced field filling with intelligent fallback mechanism. Process: 1. Use enhanced discovery (chrome_get_interactive_elements -> chrome_get_web_content fallback) 2. Extract and store actual target element information from MCP response 3. Use specific target element selector for filling 4. Store target element for potential reuse Args: field_name: Name or description of the field to find value: Value to fill in the field Returns: Result message with details about the operation """ try: field_name_lower = field_name.lower().strip() self.logger.info(f"🎯 SMART FILL: Starting enhanced filling for '{field_name}' with '{value}'") # Clear previous target element to start fresh self.last_target_element = None self.last_optimal_selector = None # Step 1: Use enhanced discovery with fallback mechanism self.logger.info("🔍 Step 1: Using enhanced discovery with fallback...") discovery_result = await self.enhanced_element_discovery_with_fallback(field_name, "fill", value) if discovery_result["success"]: selector = discovery_result["selector"] method = discovery_result["method"] self.logger.info(f"✅ Element found using {method}: {selector}") # Step 2: Try to fill the field using the discovered selector try: # First click to focus and capture target element await self._call_mcp_tool("chrome_click_element", {"selector": selector}) await asyncio.sleep(0.3) # Clear existing content await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.1) # Fill the field - this will capture target element info fill_result = await self._call_mcp_tool("chrome_fill_or_select", { "selector": selector, "value": value }) return f"🎯 ENHANCED FILL SUCCESS: Filled '{field_name}' using {method} method\n🔍 Selector: {selector}\n📍 Target Element: {self.last_target_element}" except Exception as e: self.logger.warning(f"⚠️ Direct fill failed: {e}") # Fallback to target element approach if available if self.last_optimal_selector: fallback_selectors = self._generate_fallback_selectors_from_target() fill_result = await self.fill_using_target_element(value, fallback_selectors) if "✅" in fill_result: return f"🔄 FALLBACK SUCCESS: {fill_result}" # Step 3: If enhanced discovery failed, try traditional methods self.logger.info("🔄 Step 2: Enhanced discovery failed, trying traditional methods...") traditional_result = await self.fill_field_by_name(field_name, value) if "✗" not in traditional_result and "Error" not in traditional_result: return f"🔄 TRADITIONAL SUCCESS: {traditional_result}" return f"❌ SMART FILL FAILED: Could not find or fill field '{field_name}' using any method\n🔍 Discovery Error: {discovery_result.get('error', 'Unknown error')}" except Exception as e: self.logger.error(f"Error in smart_fill_with_target_tracking: {e}") return f"❌ Error in smart fill: {str(e)}" def _generate_fallback_selectors_from_target(self) -> List[str]: """ Generate intelligent fallback selectors based on the last target element. Returns: List of fallback selectors """ if not self.last_target_element: return [] fallback_selectors = [] target = self.last_target_element # Add variations of the target element if target.get("id"): fallback_selectors.append(f"#{target['id']}") if target.get("name"): tag = target.get("tagName", "input").lower() fallback_selectors.extend([ f"{tag}[name='{target['name']}']", f"[name='{target['name']}']" ]) if target.get("className"): tag = target.get("tagName", "input").lower() classes = target["className"].split() for cls in classes[:2]: # Use first 2 classes fallback_selectors.append(f"{tag}.{cls}") if target.get("type"): fallback_selectors.append(f"input[type='{target['type']}']") return fallback_selectors async def smart_click_with_target_tracking(self, element_description: str) -> str: """ Enhanced element clicking with intelligent fallback mechanism. Process: 1. Use enhanced discovery (chrome_get_interactive_elements -> chrome_get_web_content fallback) 2. Extract and store actual target element information from MCP response 3. Use specific target element selector for clicking 4. Store target element for potential reuse Args: element_description: Description of element to click (e.g., "login button", "submit") Returns: Result message with details about the operation """ try: self.logger.info(f"🎯 SMART CLICK: Starting enhanced clicking for '{element_description}'") # Clear previous target element to start fresh self.last_target_element = None self.last_optimal_selector = None # Step 1: Use enhanced discovery with fallback mechanism self.logger.info("🔍 Step 1: Using enhanced discovery with fallback...") discovery_result = await self.enhanced_element_discovery_with_fallback(element_description, "click") if discovery_result["success"]: selector = discovery_result["selector"] method = discovery_result["method"] self.logger.info(f"✅ Element found using {method}: {selector}") # Step 2: Try to click the element using the discovered selector try: # Click the element - this will capture target element info click_result = await self._call_mcp_tool("chrome_click_element", {"selector": selector}) return f"🎯 ENHANCED CLICK SUCCESS: Clicked '{element_description}' using {method} method\n🔍 Selector: {selector}\n📍 Target Element: {self.last_target_element}" except Exception as e: self.logger.warning(f"⚠️ Direct click failed: {e}") # Fallback to target element approach if available if self.last_optimal_selector: fallback_selectors = self._generate_fallback_selectors_from_target() click_result = await self.click_using_target_element(fallback_selectors) if "✅" in click_result: return f"🔄 FALLBACK SUCCESS: {click_result}" # Step 3: If enhanced discovery failed, try traditional smart click self.logger.info("🔄 Step 2: Enhanced discovery failed, trying traditional smart click...") traditional_result = await self._smart_click_mcp(element_description) if "❌" not in traditional_result and "Error" not in traditional_result: return f"🔄 TRADITIONAL SUCCESS: {traditional_result}" return f"❌ SMART CLICK FAILED: Could not find or click element '{element_description}' using any method\n🔍 Discovery Error: {discovery_result.get('error', 'Unknown error')}" except Exception as e: self.logger.error(f"Error in smart_click_with_target_tracking: {e}") return f"❌ Error in smart click: {str(e)}" async def get_cached_input_fields(self) -> str: """Get the currently cached input fields""" try: if not self.cached_input_fields: await self._auto_detect_input_fields() if not self.cached_input_fields: return "No input fields found on the current page" # Group fields by their actual input field (to avoid duplicates from multiple lookup keys) unique_fields = {} for key, field_info in self.cached_input_fields.items(): selector = field_info["selector"] if selector not in unique_fields: unique_fields[selector] = field_info # Format the cached input fields information input_fields = [] for i, (selector, field_info) in enumerate(unique_fields.items(), 1): # Create a readable description description = f"Field {i}: " # Add all possible names for this field field_names = [] for cached_key, cached_field in self.cached_input_fields.items(): if cached_field["selector"] == selector: field_names.append(f"'{cached_key}'") description += f"Names: {', '.join(field_names[:5])}{'...' if len(field_names) > 5 else ''} " if field_info["type"]: description += f"({field_info['type']}) " if field_info["required"]: description += "(required) " description += f"selector: {field_info['selector']}" input_fields.append(description) return f"Cached input fields ({len(unique_fields)} fields, {len(self.cached_input_fields)} lookup keys):\n\n" + "\n".join(input_fields) except Exception as e: self.logger.error(f"Error getting cached input fields: {e}") return f"Error getting cached input fields: {str(e)}" async def refresh_input_fields(self) -> str: """Manually refresh the input field cache""" try: self.cached_input_fields = {} await self._auto_detect_input_fields() return await self.get_cached_input_fields() except Exception as e: self.logger.error(f"Error refreshing input fields: {e}") return f"Error refreshing input fields: {str(e)}" async def _enhanced_field_detection_and_fill(self, field_name: str, value: str) -> str: """Enhanced field detection using chrome_get_content when standard methods fail""" try: field_name_lower = field_name.lower().strip() self.logger.info(f"Starting enhanced field detection for '{field_name}'") # Step 1: Get page content to analyze for field-related text page_content_result = await self._call_mcp_tool("chrome_get_web_content", { "textOnly": True }) if not page_content_result or not page_content_result.get("content"): self.logger.debug("Could not get page content for enhanced detection") return None page_text = str(page_content_result["content"][0]).lower() # Step 2: Look for field-related keywords in page content field_keywords = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace("_", " "), field_name_lower.replace("-", " ") ] # Step 3: Get HTML content to analyze form structure html_content_result = await self._call_mcp_tool("chrome_get_web_content", { "textOnly": False, "selector": "form, [role='form'], .form, #form" }) # Step 4: Try intelligent selector generation based on field name intelligent_selectors = self._generate_intelligent_selectors(field_name) for selector in intelligent_selectors: try: # Test if selector exists and is fillable test_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if test_result and test_result.get("content"): # Try to fill the field fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled field using enhanced detection with selector: {selector}") return f"✓ Filled '{field_name}' field (enhanced detection): {fill_result}" except Exception as e: self.logger.debug(f"Enhanced selector '{selector}' failed: {e}") continue # Step 5: Try to find fields by analyzing labels and surrounding text label_based_result = await self._find_field_by_label_analysis(field_name, value) if label_based_result: return label_based_result self.logger.info(f"Enhanced field detection failed for '{field_name}'") return None except Exception as e: self.logger.error(f"Error in enhanced field detection: {e}") return None def _generate_intelligent_selectors(self, field_name: str) -> list: """Generate intelligent CSS selectors based on field name""" field_name_lower = field_name.lower().strip() field_variations = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace(" ", "_"), field_name_lower.replace(" ", "-"), field_name_lower.replace("_", ""), field_name_lower.replace("-", ""), field_name_lower.replace("_", "-"), field_name_lower.replace("-", "_") ] selectors = [] # Generate selectors for each variation for variation in field_variations: # Direct attribute selectors selectors.extend([ f"input[name='{variation}']", f"input[id='{variation}']", f"input[placeholder*='{variation}']", f"textarea[name='{variation}']", f"textarea[id='{variation}']", f"select[name='{variation}']", f"select[id='{variation}']", f"input[data-testid*='{variation}']", f"input[data-test*='{variation}']", f"input[class*='{variation}']", f"[aria-label*='{variation}']", f"[aria-labelledby*='{variation}']" ]) # Partial match selectors selectors.extend([ f"input[name*='{variation}']", f"input[id*='{variation}']", f"textarea[name*='{variation}']", f"textarea[id*='{variation}']", f"select[name*='{variation}']", f"select[id*='{variation}']" ]) # Common field type patterns if any(keyword in field_name_lower for keyword in ['email', 'mail']): selectors.extend([ "input[type='email']", "input[name*='email']", "input[id*='email']" ]) if any(keyword in field_name_lower for keyword in ['password', 'pass']): selectors.extend([ "input[type='password']", "input[name*='password']", "input[id*='password']" ]) if any(keyword in field_name_lower for keyword in ['username', 'user', 'login']): selectors.extend([ "input[name*='username']", "input[name*='user']", "input[name*='login']", "input[id*='username']", "input[id*='user']", "input[id*='login']" ]) # Remove duplicates while preserving order unique_selectors = [] seen = set() for selector in selectors: if selector not in seen: unique_selectors.append(selector) seen.add(selector) return unique_selectors async def _find_field_by_label_analysis(self, field_name: str, value: str) -> str: """Find fields by analyzing labels and surrounding text""" try: field_name_lower = field_name.lower().strip() self.logger.info(f"Analyzing labels for field '{field_name}'") # Get all interactive elements to analyze their context interactive_result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select"] }) if not interactive_result: return None # Parse the interactive elements response elements = [] try: if "content" in interactive_result and interactive_result["content"]: content_text = interactive_result["content"][0].get("text", "") if content_text: import json parsed_data = json.loads(content_text) elements = parsed_data.get("elements", []) except (json.JSONDecodeError, KeyError, IndexError): elements = interactive_result.get("elements", []) # Analyze each element for potential matches for element in elements: try: # Check element properties element_text = "" if "text" in element: element_text += element["text"].lower() if "placeholder" in element: element_text += " " + element["placeholder"].lower() if "ariaLabel" in element: element_text += " " + element["ariaLabel"].lower() # Check if field name matches element context if any(keyword in element_text for keyword in [field_name_lower, field_name_lower.replace(" ", "")]): selector = element.get("selector") if selector: try: fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled field using label analysis with selector: {selector}") return f"✓ Filled '{field_name}' field (label analysis): {fill_result}" except Exception as e: self.logger.debug(f"Failed to fill field with selector '{selector}': {e}") continue except Exception as e: self.logger.debug(f"Error analyzing element: {e}") continue # Try to find fields by looking for labels that contain the field name label_selectors = [ f"label:contains('{field_name}') + input", f"label:contains('{field_name}') input", f"label[for] input[id]", # Will need to be processed differently ] # Get HTML content to search for labels try: html_result = await self._call_mcp_tool("chrome_get_web_content", { "textOnly": False }) if html_result and html_result.get("content"): html_content = str(html_result["content"][0]) # Simple regex to find label-input associations import re # Look for labels containing the field name label_pattern = rf']*>.*?{re.escape(field_name)}.*?' label_matches = re.findall(label_pattern, html_content, re.IGNORECASE | re.DOTALL) for label_match in label_matches: # Extract 'for' attribute if present for_match = re.search(r'for=["\']([^"\']+)["\']', label_match) if for_match: input_id = for_match.group(1) try: fill_result = await self.fill_input_field(f"#{input_id}", value) self.logger.info(f"Successfully filled field using label 'for' attribute: #{input_id}") return f"✓ Filled '{field_name}' field (label for): {fill_result}" except Exception: continue except Exception as e: self.logger.debug(f"Error in HTML label analysis: {e}") return None except Exception as e: self.logger.error(f"Error in label analysis: {e}") return None async def execute_field_workflow(self, field_name: str, field_value: str, actions: list = None, max_retries: int = 3) -> dict: """ Execute the complete workflow: detect field, fill it, and execute actions. This implements the enhanced workflow for handling missing webpage fields: 1. Use MCP to automatically detect and retrieve the correct CSS selector 2. Use the retrieved selector to locate and fill the field 3. Execute required actions (form submission, button click, navigation) Args: field_name: Name or identifier of the field to find field_value: Value to fill in the field actions: List of actions to execute after successful field filling Format: [{"type": "submit", "selector": "form"}, {"type": "click", "selector": "button"}] max_retries: Maximum number of detection attempts Returns: Dictionary containing workflow results and status """ workflow_start = asyncio.get_event_loop().time() results = { "success": False, "field_filled": False, "actions_executed": [], "detection_method": None, "errors": [], "execution_time": 0.0, "field_selector": None } if actions is None: actions = [] try: self.logger.info(f"Starting enhanced field workflow for '{field_name}'") # Step 1: Attempt to detect and fill the field using multiple strategies detection_result = await self._workflow_detect_and_fill_field(field_name, field_value, max_retries) if not detection_result["success"]: results["errors"].append(f"Field detection failed: {detection_result.get('error', 'Unknown error')}") results["execution_time"] = asyncio.get_event_loop().time() - workflow_start return results results["field_filled"] = True results["detection_method"] = detection_result["method"] results["field_selector"] = detection_result.get("selector") self.logger.info(f"Successfully filled field '{field_name}' using {detection_result['method']}") # Step 2: Execute post-fill actions if actions: action_results = await self._execute_workflow_actions(actions) results["actions_executed"] = action_results # Check if all required actions succeeded required_actions_success = all( result["success"] for result in action_results if result.get("required", True) ) results["success"] = required_actions_success if not required_actions_success: failed_actions = [r for r in action_results if not r["success"]] results["errors"].extend([f"Action failed: {r.get('error', 'Unknown error')}" for r in failed_actions]) else: results["success"] = True except Exception as e: self.logger.error(f"Workflow execution error: {e}") results["errors"].append(f"Workflow error: {str(e)}") finally: results["execution_time"] = asyncio.get_event_loop().time() - workflow_start return results async def _workflow_detect_and_fill_field(self, field_name: str, field_value: str, max_retries: int) -> dict: """ Attempt to detect and fill a field using multiple MCP-based strategies. Detection strategies in order of preference: 1. Cached fields (fastest, most reliable) 2. Enhanced field detection (intelligent selectors) 3. Label analysis (context-based) 4. Content analysis (page text analysis) 5. Fallback patterns (last resort) """ strategies = [ ("cached_fields", self._try_cached_field_detection), ("enhanced_detection", self._try_enhanced_field_detection), ("label_analysis", self._try_label_field_detection), ("content_analysis", self._try_content_field_detection), ("fallback_patterns", self._try_fallback_field_detection) ] for attempt in range(max_retries): self.logger.info(f"Field detection attempt {attempt + 1}/{max_retries} for '{field_name}'") for strategy_name, strategy_func in strategies: try: result = await strategy_func(field_name, field_value) if result["success"]: result["method"] = strategy_name return result except Exception as e: self.logger.debug(f"Strategy {strategy_name} failed: {e}") continue # Wait before retry if attempt < max_retries - 1: await asyncio.sleep(1.0) return { "success": False, "error": f"All detection strategies failed after {max_retries} attempts" } async def _try_cached_field_detection(self, field_name: str, field_value: str) -> dict: """Try using cached field information.""" try: field_name_lower = field_name.lower().strip() # Refresh cache if empty if not self.cached_input_fields: await self._auto_detect_input_fields() if field_name_lower in self.cached_input_fields: field_info = self.cached_input_fields[field_name_lower] selector = field_info["selector"] result = await self.fill_input_field(selector, field_value) return { "success": True, "selector": selector, "result": result, "confidence": 0.9 } else: return {"success": False, "error": "Field not found in cache"} except Exception as e: return {"success": False, "error": str(e)} async def _try_enhanced_field_detection(self, field_name: str, field_value: str) -> dict: """Try using enhanced field detection with intelligent selectors.""" try: enhanced_result = await self._enhanced_field_detection_and_fill(field_name, field_value) if enhanced_result and "✓" in enhanced_result: return { "success": True, "result": enhanced_result, "confidence": 0.8 } else: return {"success": False, "error": "Enhanced detection did not find field"} except Exception as e: return {"success": False, "error": str(e)} async def _try_label_field_detection(self, field_name: str, field_value: str) -> dict: """Try using label analysis to find fields.""" try: label_result = await self._find_field_by_label_analysis(field_name, field_value) if label_result and "✓" in label_result: return { "success": True, "result": label_result, "confidence": 0.7 } else: return {"success": False, "error": "Label analysis did not find field"} except Exception as e: return {"success": False, "error": str(e)} async def _try_content_field_detection(self, field_name: str, field_value: str) -> dict: """Try using page content analysis to find fields.""" try: # Get page content for analysis page_content = await self._call_mcp_tool("chrome_get_web_content", {"textOnly": True}) if not page_content or not page_content.get("content"): return {"success": False, "error": "Could not get page content"} # Analyze content for field-related keywords content_text = str(page_content["content"][0]).lower() field_keywords = [ field_name.lower(), field_name.lower().replace(" ", ""), field_name.lower().replace("_", " "), field_name.lower().replace("-", " ") ] # Look for form elements if keywords are found in content if any(keyword in content_text for keyword in field_keywords): # Get all form elements form_elements = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select"] }) if form_elements and form_elements.get("elements"): # Try to match elements based on proximity to keywords for element in form_elements["elements"]: if isinstance(element, dict): element_text = str(element).lower() if any(keyword in element_text for keyword in field_keywords): selector = element.get("selector") if selector: try: result = await self.fill_input_field(selector, field_value) return { "success": True, "selector": selector, "result": result, "confidence": 0.6 } except Exception: continue return {"success": False, "error": "Content analysis did not find matching field"} except Exception as e: return {"success": False, "error": str(e)} async def _try_fallback_field_detection(self, field_name: str, field_value: str) -> dict: """Try using fallback patterns as last resort.""" try: # Common fallback selectors fallback_selectors = [ "input:not([type='hidden']):not([type='submit']):not([type='button'])", "textarea", "select", "input[type='text']", "input[type='email']", "input[type='password']", "input:first-of-type", "form input:first-child", "[contenteditable='true']" ] for selector in fallback_selectors: try: # Check if element exists and is visible test_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if test_result and test_result.get("content"): # Try to fill the field result = await self.fill_input_field(selector, field_value) return { "success": True, "selector": selector, "result": result, "confidence": 0.3 } except Exception: continue return {"success": False, "error": "No fallback patterns worked"} except Exception as e: return {"success": False, "error": str(e)} async def _execute_workflow_actions(self, actions: list) -> list: """ Execute a list of actions after successful field filling. Supported action types: - submit: Submit a form - click: Click an element - navigate: Navigate to a URL - wait: Wait for a specified time - keyboard: Send keyboard input """ action_results = [] for i, action in enumerate(actions): action_type = action.get("type", "").lower() target = action.get("target", "") delay = action.get("delay", 0.0) required = action.get("required", True) self.logger.info(f"Executing action {i+1}/{len(actions)}: {action_type}") result = { "action_index": i, "action_type": action_type, "target": target, "success": False, "required": required, "error": None } try: # Add delay before action if specified if delay > 0: await asyncio.sleep(delay) if action_type == "submit": # Submit form if target: await self._call_mcp_tool("chrome_click_element", {"selector": target}) else: # Try common submit methods await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) result["success"] = True elif action_type == "click": # Click element if not target: raise ValueError("Click action requires a target selector") await self._call_mcp_tool("chrome_click_element", {"selector": target}) result["success"] = True elif action_type == "navigate": # Navigate to URL if not target: raise ValueError("Navigate action requires a target URL") await self._navigate_mcp(target) result["success"] = True elif action_type == "wait": # Wait for specified time wait_time = float(target) if target else 1.0 await asyncio.sleep(wait_time) result["success"] = True elif action_type == "keyboard": # Send keyboard input if not target: raise ValueError("Keyboard action requires target keys") await self._call_mcp_tool("chrome_keyboard", {"keys": target}) result["success"] = True else: raise ValueError(f"Unknown action type: {action_type}") except Exception as e: self.logger.error(f"Action {action_type} failed: {e}") result["error"] = str(e) # If this is a required action and it failed, we might want to stop if required: self.logger.warning(f"Required action {action_type} failed, continuing with remaining actions") action_results.append(result) return action_results # Legacy methods for backward compatibility async def get_cached_form_fields(self) -> str: """Legacy method - redirects to get_cached_input_fields""" return await self.get_cached_input_fields() async def refresh_form_fields(self) -> str: """Legacy method - redirects to refresh_input_fields""" return await self.refresh_input_fields() async def _auto_detect_form_fields(self) -> None: """Legacy method - redirects to _auto_detect_input_fields""" await self._auto_detect_input_fields() async def _type_in_focused_element(self, text: str) -> str: """Type text in the currently focused element or find a suitable input field""" try: # First try to type in the currently focused element try: # Try typing directly - this works if an element is already focused for char in text: if char == ' ': await self._call_mcp_tool("chrome_keyboard", {"keys": "Space"}) elif char == '\n': await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) elif char == '\t': await self._call_mcp_tool("chrome_keyboard", {"keys": "Tab"}) else: await self._call_mcp_tool("chrome_keyboard", {"keys": char}) await asyncio.sleep(0.05) # Small delay between characters return f"✓ Typed text: '{text}' in focused element" except Exception as e: self.logger.debug(f"Direct typing failed, trying to find input field: {e}") # If direct typing fails, try to find and focus a suitable input field # Look for common input field selectors input_selectors = [ "input:focus, textarea:focus, [contenteditable]:focus", # Already focused "input[type='text']:visible, input[type='search']:visible, textarea:visible", # Visible text inputs "input:not([type]):visible", # Input without type "input[type='email']:visible, input[type='password']:visible", # Common input types "[contenteditable='true']:visible", # Contenteditable elements "input:visible, textarea:visible" # Any visible input ] for selector in input_selectors: try: # Click to focus the input await self._call_mcp_tool("chrome_click_element", {"selector": selector}) await asyncio.sleep(0.3) # Clear existing content await self._call_mcp_tool("chrome_keyboard", {"keys": "Control+a"}) await asyncio.sleep(0.1) # Type the text for char in text: if char == ' ': await self._call_mcp_tool("chrome_keyboard", {"keys": "Space"}) elif char == '\n': await self._call_mcp_tool("chrome_keyboard", {"keys": "Enter"}) elif char == '\t': await self._call_mcp_tool("chrome_keyboard", {"keys": "Tab"}) else: await self._call_mcp_tool("chrome_keyboard", {"keys": char}) await asyncio.sleep(0.05) return f"✓ Typed text: '{text}' in input field (selector: {selector})" except Exception: continue # Last resort: try the old fill method return await self._type_text_mcp(text) except Exception as e: self.logger.error(f"Error typing in focused element: {e}") return f"Error typing text: {str(e)}" async def _discover_form_fields_dynamically(self, field_name: str, value: str) -> dict: """ Dynamically discover form fields using MCP tools without relying on cached data. This method uses chrome_get_interactive_elements and chrome_get_content_web_form to find form fields in real-time. """ try: field_name_lower = field_name.lower().strip() self.logger.info(f"Starting dynamic discovery for field: '{field_name}'") # Strategy 1: Use chrome_get_interactive_elements to get all form elements try: interactive_result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select"] }) if interactive_result and "elements" in interactive_result: elements = interactive_result["elements"] self.logger.info(f"Found {len(elements)} interactive form elements") # Search for matching field by various attributes for element in elements: if self._is_field_match(element, field_name_lower): selector = self._extract_best_selector(element) if selector: try: fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled field using dynamic discovery: {selector}") return { "success": True, "message": f"✓ Filled '{field_name}' field using dynamic discovery: {fill_result}", "method": "interactive_elements", "selector": selector } except Exception as e: self.logger.debug(f"Failed to fill with selector {selector}: {e}") continue except Exception as e: self.logger.debug(f"chrome_get_interactive_elements failed: {e}") # Strategy 2: Use chrome_get_content_web_form to get form-specific content try: form_result = await self._call_mcp_tool("chrome_get_content_web_form", {}) if form_result and "content" in form_result: form_content = form_result["content"] self.logger.info(f"Retrieved form content for analysis") # Parse form content to find matching fields selector = self._parse_form_content_for_field(form_content, field_name_lower) if selector: try: fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled field using form content analysis: {selector}") return { "success": True, "message": f"✓ Filled '{field_name}' field using form content analysis: {fill_result}", "method": "form_content", "selector": selector } except Exception as e: self.logger.debug(f"Failed to fill with form content selector {selector}: {e}") except Exception as e: self.logger.debug(f"chrome_get_content_web_form failed: {e}") return {"success": False, "message": "Dynamic discovery failed"} except Exception as e: self.logger.error(f"Error in dynamic form field discovery: {e}") return {"success": False, "message": f"Error in dynamic discovery: {str(e)}"} def _is_field_match(self, element: dict, field_name_lower: str) -> bool: """ Check if an element matches the requested field name using various attributes. """ # Get element attributes attrs = element.get("attributes", {}) tag_name = element.get("tagName", "").lower() text_content = element.get("textContent", "").lower() # Extract relevant attributes name = attrs.get("name", "").lower() id_attr = attrs.get("id", "").lower() placeholder = attrs.get("placeholder", "").lower() aria_label = attrs.get("aria-label", "").lower() class_attr = attrs.get("class", "").lower() type_attr = attrs.get("type", "").lower() # Define field name variations field_variations = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace("_", ""), field_name_lower.replace("-", ""), field_name_lower.replace(" ", "_"), field_name_lower.replace(" ", "-") ] # Check for matches in various attributes for variation in field_variations: if (variation in name or variation in id_attr or variation in placeholder or variation in aria_label or variation in class_attr or variation in text_content): return True # Special handling for common field types if variation in ["email", "mail"] and ("email" in name or "mail" in name or type_attr == "email"): return True if variation in ["password", "pass"] and (type_attr == "password" or "password" in name): return True if variation in ["search"] and (type_attr == "search" or "search" in name or "search" in placeholder): return True if variation in ["phone", "tel"] and (type_attr == "tel" or "phone" in name or "tel" in name): return True if variation in ["name", "username", "user"] and ("name" in name or "user" in name): return True return False def _extract_best_selector(self, element: dict) -> str: """ Extract the best CSS selector for an element, prioritizing reliability with enhanced logging. """ attrs = element.get("attributes", {}) tag_name = element.get("tagName", "").lower() self.logger.debug(f"🔧 SELECTOR GENERATION: tag='{tag_name}', attrs={attrs}") # Priority order: id > name > type+name > class > tag+attributes if attrs.get("id"): selector = f"#{attrs['id']}" self.logger.debug(f"🎯 SELECTOR: Using ID selector: {selector}") return selector if attrs.get("name"): selector = f"{tag_name}[name='{attrs['name']}']" self.logger.debug(f"🎯 SELECTOR: Using name selector: {selector}") return selector if attrs.get("type") and attrs.get("name"): selector = f"{tag_name}[type='{attrs['type']}'][name='{attrs['name']}']" self.logger.debug(f"🎯 SELECTOR: Using type+name selector: {selector}") return selector if attrs.get("type"): selector = f"{tag_name}[type='{attrs['type']}']" self.logger.debug(f"🎯 SELECTOR: Using type selector: {selector}") return selector if attrs.get("class"): # Use first class for selector first_class = attrs["class"].split()[0] if attrs["class"].split() else "" if first_class: selector = f"{tag_name}.{first_class}" self.logger.debug(f"🎯 SELECTOR: Using class selector: {selector}") return selector if attrs.get("placeholder"): selector = f"{tag_name}[placeholder='{attrs['placeholder']}']" self.logger.debug(f"🎯 SELECTOR: Using placeholder selector: {selector}") return selector if attrs.get("aria-label"): selector = f"{tag_name}[aria-label='{attrs['aria-label']}']" self.logger.debug(f"🎯 SELECTOR: Using aria-label selector: {selector}") return selector # Fallback to tag name (least reliable) selector = tag_name self.logger.debug(f"⚠️ SELECTOR: Using fallback tag selector: {selector}") return selector def _parse_form_content_for_field(self, form_content: list, field_name_lower: str) -> str: """ Parse form content to find a selector for the requested field. """ try: # Convert form content to string for analysis content_text = "" if isinstance(form_content, list): for item in form_content: if isinstance(item, dict) and "text" in item: content_text += item["text"] + " " elif isinstance(item, str): content_text += item + " " else: content_text = str(form_content) content_lower = content_text.lower() # Look for field patterns in the content field_variations = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace("_", ""), field_name_lower.replace("-", "") ] # Generate potential selectors based on field name potential_selectors = [] for variation in field_variations: potential_selectors.extend([ f"input[name*='{variation}']", f"input[id*='{variation}']", f"input[placeholder*='{variation}']", f"textarea[name*='{variation}']", f"textarea[id*='{variation}']", f"select[name*='{variation}']", f"[aria-label*='{variation}']" ]) # Return the first potential selector (could be enhanced with content analysis) return potential_selectors[0] if potential_selectors else "" except Exception as e: self.logger.debug(f"Error parsing form content: {e}") return "" async def _enhanced_field_detection_with_retry(self, field_name: str, value: str, max_retries: int = 3) -> dict: """ Enhanced field detection with retry mechanism using multiple MCP strategies. """ field_name_lower = field_name.lower().strip() for attempt in range(max_retries): try: self.logger.info(f"Enhanced detection attempt {attempt + 1}/{max_retries} for field: '{field_name}'") # Strategy 1: Get all interactive elements and retry field matching try: interactive_result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select", "button"] }) if interactive_result and "elements" in interactive_result: elements = interactive_result["elements"] # Try more flexible matching on each retry for element in elements: if self._is_flexible_field_match(element, field_name_lower, attempt): selector = self._extract_best_selector(element) if selector: try: fill_result = await self.fill_input_field(selector, value) return { "success": True, "message": f"✓ Filled '{field_name}' field using enhanced detection (attempt {attempt + 1}): {fill_result}", "method": f"enhanced_retry_{attempt + 1}", "selector": selector } except Exception as e: self.logger.debug(f"Failed to fill with enhanced selector {selector}: {e}") continue except Exception as e: self.logger.debug(f"Enhanced detection attempt {attempt + 1} failed: {e}") # Wait before retry if attempt < max_retries - 1: await asyncio.sleep(1) except Exception as e: self.logger.debug(f"Enhanced detection attempt {attempt + 1} error: {e}") return {"success": False, "message": "Enhanced detection with retry failed"} def _is_flexible_field_match(self, element: dict, field_name_lower: str, attempt: int) -> bool: """ Flexible field matching that becomes more permissive with each retry attempt. """ # Get element attributes attrs = element.get("attributes", {}) text_content = element.get("textContent", "").lower() # Extract relevant attributes name = attrs.get("name", "").lower() id_attr = attrs.get("id", "").lower() placeholder = attrs.get("placeholder", "").lower() aria_label = attrs.get("aria-label", "").lower() class_attr = attrs.get("class", "").lower() type_attr = attrs.get("type", "").lower() # Attempt 0: Exact matching if attempt == 0: return (field_name_lower in name or field_name_lower in id_attr or field_name_lower in placeholder or field_name_lower in aria_label) # Attempt 1: Partial matching elif attempt == 1: field_parts = field_name_lower.split() for part in field_parts: if (part in name or part in id_attr or part in placeholder or part in aria_label or part in class_attr or part in text_content): return True # Attempt 2: Very flexible matching elif attempt >= 2: # Remove common words and try matching common_words = ["field", "input", "box", "text", "enter", "type"] field_clean = field_name_lower for word in common_words: field_clean = field_clean.replace(word, "").strip() if field_clean and (field_clean in name or field_clean in id_attr or field_clean in placeholder or field_clean in aria_label or field_clean in class_attr): return True # Type-based matching as last resort if field_name_lower in ["email", "mail"] and type_attr == "email": return True if field_name_lower in ["password", "pass"] and type_attr == "password": return True if field_name_lower in ["search"] and type_attr == "search": return True return False async def _analyze_page_content_for_field(self, field_name: str, value: str) -> dict: """ Analyze page content to find form fields as a final fallback method. """ try: field_name_lower = field_name.lower().strip() self.logger.info(f"Starting content analysis for field: '{field_name}'") # Get page content for analysis try: content_result = await self._call_mcp_tool("chrome_get_web_content", { "textOnly": False }) if not content_result or "content" not in content_result: return {"success": False, "message": "Could not get page content for analysis"} # Generate intelligent selectors based on field name and content analysis intelligent_selectors = self._generate_intelligent_selectors_from_content(field_name_lower) for selector in intelligent_selectors: try: # Test if selector exists test_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if test_result and test_result.get("content"): # Try to fill the field fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled field using content analysis: {selector}") return { "success": True, "message": f"✓ Filled '{field_name}' field using content analysis: {fill_result}", "method": "content_analysis", "selector": selector } except Exception as e: self.logger.debug(f"Content analysis selector '{selector}' failed: {e}") continue except Exception as e: self.logger.debug(f"Content analysis failed: {e}") return {"success": False, "message": "Content analysis failed to find field"} except Exception as e: self.logger.error(f"Error in content analysis: {e}") return {"success": False, "message": f"Error in content analysis: {str(e)}"} def _generate_intelligent_selectors_from_content(self, field_name_lower: str) -> list: """ Generate intelligent CSS selectors based on field name and common patterns. """ selectors = [] # Field name variations variations = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace("_", ""), field_name_lower.replace("-", ""), field_name_lower.replace(" ", "_"), field_name_lower.replace(" ", "-") ] # Generate selectors for each variation for variation in variations: selectors.extend([ f"input[name*='{variation}']", f"input[id*='{variation}']", f"input[placeholder*='{variation}']", f"textarea[name*='{variation}']", f"textarea[id*='{variation}']", f"select[name*='{variation}']", f"[aria-label*='{variation}']", f".{variation}", f"#{variation}", f"input[class*='{variation}']", f"textarea[class*='{variation}']" ]) # Add type-specific selectors if field_name_lower in ["email", "mail"]: selectors.extend([ "input[type='email']", "input[name*='email']", "input[name*='mail']" ]) elif field_name_lower in ["password", "pass"]: selectors.extend([ "input[type='password']", "input[name*='password']", "input[name*='pass']" ]) elif field_name_lower in ["search"]: selectors.extend([ "input[type='search']", "input[name*='search']", "input[name='q']", "textarea[name='q']" ]) elif field_name_lower in ["phone", "tel"]: selectors.extend([ "input[type='tel']", "input[name*='phone']", "input[name*='tel']" ]) elif field_name_lower in ["name", "username", "user"]: selectors.extend([ "input[name*='name']", "input[name*='user']" ]) return selectors async def _direct_mcp_element_search(self, field_name: str, value: str) -> dict: """ Direct MCP element search as final fallback - uses only real-time MCP tools. This method exhaustively searches for form elements using various MCP approaches. """ try: field_name_lower = field_name.lower().strip() self.logger.info(f"Starting direct MCP element search for field: '{field_name}'") # Strategy 1: Get ALL interactive elements and search exhaustively try: all_elements_result = await self._call_mcp_tool("chrome_get_interactive_elements", {}) if all_elements_result and "elements" in all_elements_result: elements = all_elements_result["elements"] self.logger.info(f"Found {len(elements)} total interactive elements") # Search through ALL elements with very flexible matching for element in elements: if self._is_very_flexible_match(element, field_name_lower): selector = self._extract_best_selector(element) if selector: try: fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled using direct search: {selector}") return { "success": True, "message": f"✓ Filled '{field_name}' using direct MCP search: {fill_result}", "method": "direct_mcp_search", "selector": selector } except Exception as e: self.logger.debug(f"Direct search selector {selector} failed: {e}") continue except Exception as e: self.logger.debug(f"Direct MCP element search failed: {e}") # Strategy 2: Use chrome_get_web_content to find ANY input elements try: input_search_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": "input, textarea, select", "textOnly": False }) if input_search_result and input_search_result.get("content"): self.logger.info("Found input elements via web content search") # Generate and test common selectors common_selectors = self._generate_common_selectors(field_name_lower) for selector in common_selectors: try: # Test if selector exists test_result = await self._call_mcp_tool("chrome_get_web_content", { "selector": selector, "textOnly": False }) if test_result and test_result.get("content"): fill_result = await self.fill_input_field(selector, value) self.logger.info(f"Successfully filled using common selector: {selector}") return { "success": True, "message": f"✓ Filled '{field_name}' using common selector: {fill_result}", "method": "common_selector", "selector": selector } except Exception as e: self.logger.debug(f"Common selector {selector} failed: {e}") continue except Exception as e: self.logger.debug(f"Web content search failed: {e}") return {"success": False, "message": "Direct MCP search failed"} except Exception as e: self.logger.error(f"Error in direct MCP element search: {e}") return {"success": False, "message": f"Error in direct search: {str(e)}"} def _is_very_flexible_match(self, element: dict, field_name_lower: str) -> bool: """ Very flexible matching for direct search - matches almost anything related. """ # Get element attributes attrs = element.get("attributes", {}) tag_name = element.get("tagName", "").lower() text_content = element.get("textContent", "").lower() # Only consider form elements if tag_name not in ["input", "textarea", "select"]: return False # Extract all text-based attributes all_text = " ".join([ attrs.get("name", ""), attrs.get("id", ""), attrs.get("placeholder", ""), attrs.get("aria-label", ""), attrs.get("class", ""), attrs.get("title", ""), text_content ]).lower() # Very flexible matching - any partial match field_parts = field_name_lower.replace("-", " ").replace("_", " ").split() for part in field_parts: if len(part) > 2 and part in all_text: # Only match parts longer than 2 chars return True # Type-based matching for common fields type_attr = attrs.get("type", "").lower() if field_name_lower in ["email", "mail"] and type_attr == "email": return True if field_name_lower in ["password", "pass"] and type_attr == "password": return True if field_name_lower in ["search", "query"] and type_attr == "search": return True if field_name_lower in ["phone", "tel"] and type_attr == "tel": return True return False def _generate_common_selectors(self, field_name_lower: str) -> list: """ Generate common CSS selectors for field names. """ selectors = [] # Clean field name variations variations = [ field_name_lower, field_name_lower.replace(" ", ""), field_name_lower.replace("_", ""), field_name_lower.replace("-", ""), field_name_lower.replace(" ", "_"), field_name_lower.replace(" ", "-") ] # Generate selectors for each variation for variation in variations: if variation: # Only if not empty selectors.extend([ f"input[name='{variation}']", f"input[id='{variation}']", f"textarea[name='{variation}']", f"textarea[id='{variation}']", f"select[name='{variation}']", f"select[id='{variation}']", f"#{variation}", f".{variation}", f"input[name*='{variation}']", f"input[id*='{variation}']", f"input[placeholder*='{variation}']", f"[aria-label*='{variation}']" ]) # Add type-specific selectors if field_name_lower in ["email", "mail"]: selectors.extend([ "input[type='email']", "input[name*='email']", "input[name*='mail']", "input[id*='email']", "input[id*='mail']" ]) elif field_name_lower in ["password", "pass"]: selectors.extend([ "input[type='password']", "input[name*='password']", "input[name*='pass']" ]) elif field_name_lower in ["search", "query"]: selectors.extend([ "input[type='search']", "input[name*='search']", "input[name='q']", "textarea[name='q']", "[role='searchbox']" ]) elif field_name_lower in ["phone", "tel"]: selectors.extend([ "input[type='tel']", "input[name*='phone']", "input[name*='tel']" ]) elif field_name_lower in ["name", "username", "user"]: selectors.extend([ "input[name*='name']", "input[name*='user']", "input[id*='name']", "input[id*='user']" ]) # Remove duplicates while preserving order seen = set() unique_selectors = [] for selector in selectors: if selector not in seen: seen.add(selector) unique_selectors.append(selector) return unique_selectors async def _smart_click_mcp(self, element_description: str) -> str: """Smart click that finds elements by text content, labels, or descriptions with enhanced logging""" try: self.logger.info(f"🔍 SELECTOR SEARCH: Looking for clickable element matching '{element_description}'") # First try to find interactive elements self.logger.debug("📋 Step 1: Getting interactive elements from page") interactive_result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["button", "a", "input", "select"] }) if interactive_result and "elements" in interactive_result: elements = interactive_result["elements"] self.logger.info(f"📊 Found {len(elements)} interactive elements on page") # Log all found elements for debugging for i, element in enumerate(elements): element_info = { "index": i, "tag": element.get("tagName", "unknown"), "text": element.get("textContent", "")[:50], "attributes": {k: v for k, v in element.get("attributes", {}).items() if k in ["id", "class", "name", "type", "aria-label", "title", "value"]} } self.logger.debug(f"🔍 Element {i}: {element_info}") # Look for elements that match the description matching_elements = [] for i, element in enumerate(elements): if self._element_matches_description(element, element_description): selector = self._extract_best_selector(element) if selector: matching_elements.append({ "index": i, "element": element, "selector": selector, "match_reason": self._get_match_reason(element, element_description) }) if matching_elements: self.logger.info(f"✅ Found {len(matching_elements)} matching elements:") for match in matching_elements: self.logger.info(f" 🎯 Match {match['index']}: selector='{match['selector']}', reason='{match['match_reason']}'") # Try the first matching element best_match = matching_elements[0] selector = best_match["selector"] self.logger.info(f"🚀 EXECUTING CLICK: Using selector '{selector}' (reason: {best_match['match_reason']})") try: result = await self._call_mcp_tool("chrome_click_element", {"selector": selector}) self.logger.info(f"✅ CLICK SUCCESS: Clicked on '{element_description}' using selector: {selector}") self.logger.debug(f"📝 MCP Result: {result}") return f"✅ Clicked on '{element_description}' using selector: {selector} (reason: {best_match['match_reason']})" except Exception as click_error: self.logger.error(f"❌ CLICK FAILED: Error clicking selector '{selector}': {click_error}") # Try other matching elements if available for match in matching_elements[1:]: try: alt_selector = match["selector"] self.logger.info(f"🔄 RETRY: Trying alternative selector '{alt_selector}'") result = await self._call_mcp_tool("chrome_click_element", {"selector": alt_selector}) self.logger.info(f"✅ RETRY SUCCESS: Clicked using alternative selector: {alt_selector}") return f"✅ Clicked on '{element_description}' using alternative selector: {alt_selector}" except Exception as retry_error: self.logger.debug(f"❌ Alternative selector '{alt_selector}' also failed: {retry_error}") continue # If all matching elements failed, continue to fallback methods self.logger.warning(f"⚠️ All {len(matching_elements)} matching elements failed to click") else: self.logger.warning(f"⚠️ No elements matched description '{element_description}' in interactive elements") # Fallback to direct selector if description looks like a CSS selector if any(char in element_description for char in ['#', '.', '[', ']']): self.logger.info(f"🔧 FALLBACK 1: Treating '{element_description}' as direct CSS selector") try: result = await self._call_mcp_tool("chrome_click_element", {"selector": element_description}) self.logger.info(f"✅ DIRECT SELECTOR SUCCESS: Clicked using direct selector: {element_description}") return f"✅ Clicked on element with direct selector: {element_description}" except Exception as direct_error: self.logger.error(f"❌ DIRECT SELECTOR FAILED: {direct_error}") # Try common button/link patterns self.logger.info(f"🔧 FALLBACK 2: Trying common selector patterns for '{element_description}'") common_selectors = [ f"button:contains('{element_description}')", f"a:contains('{element_description}')", f"input[value*='{element_description}']", f"[aria-label*='{element_description}']", f"[title*='{element_description}']" ] for i, selector in enumerate(common_selectors): try: self.logger.debug(f"🔍 Trying pattern {i+1}/{len(common_selectors)}: {selector}") result = await self._call_mcp_tool("chrome_click_element", {"selector": selector}) self.logger.info(f"✅ PATTERN SUCCESS: Clicked using pattern: {selector}") return f"✅ Clicked on '{element_description}' using pattern: {selector}" except Exception as pattern_error: self.logger.debug(f"❌ Pattern failed: {pattern_error}") continue self.logger.error(f"❌ ALL METHODS FAILED: Could not find or click element matching: {element_description}") return f"❌ Could not find clickable element matching: {element_description}" except Exception as e: self.logger.error(f"💥 CRITICAL ERROR in smart click: {str(e)}") return f"💥 Error in smart click: {str(e)}" def _element_matches_description(self, element: dict, description: str) -> bool: """Check if an element matches the given description""" description_lower = description.lower() # Check text content text_content = element.get("textContent", "").lower() if description_lower in text_content: return True # Check attributes attrs = element.get("attributes", {}) for attr_name, attr_value in attrs.items(): if isinstance(attr_value, str) and description_lower in attr_value.lower(): return True # Check for common button/link text patterns if element.get("tagName", "").lower() in ["button", "a", "input"]: # Check value attribute for buttons if "value" in attrs and description_lower in attrs["value"].lower(): return True # Check aria-label if "aria-label" in attrs and description_lower in attrs["aria-label"].lower(): return True # Check title if "title" in attrs and description_lower in attrs["title"].lower(): return True return False def _get_match_reason(self, element: dict, description: str) -> str: """Get the reason why an element matches the description (for debugging)""" description_lower = description.lower() reasons = [] # Check text content text_content = element.get("textContent", "").lower() if description_lower in text_content: reasons.append(f"text_content='{text_content[:30]}...'") # Check attributes attrs = element.get("attributes", {}) for attr_name, attr_value in attrs.items(): if isinstance(attr_value, str) and description_lower in attr_value.lower(): reasons.append(f"{attr_name}='{attr_value}'") # Check for common button/link text patterns if element.get("tagName", "").lower() in ["button", "a", "input"]: # Check value attribute for buttons if "value" in attrs and description_lower in attrs["value"].lower(): reasons.append(f"value='{attrs['value']}'") # Check aria-label if "aria-label" in attrs and description_lower in attrs["aria-label"].lower(): reasons.append(f"aria-label='{attrs['aria-label']}'") # Check title if "title" in attrs and description_lower in attrs["title"].lower(): reasons.append(f"title='{attrs['title']}'") return "; ".join(reasons) if reasons else "unknown_match" async def _get_page_content_mcp(self) -> str: """Get page content using MCP chrome_get_web_content tool""" try: result = await self._call_mcp_tool("chrome_get_web_content", { "format": "text" }) if result and "content" in result: content = result["content"] if isinstance(content, list) and len(content) > 0: text_content = content[0].get("text", "") return f"Page content retrieved:\n{text_content[:1000]}..." if len(text_content) > 1000 else f"Page content:\n{text_content}" else: return str(content) else: return "No content found on the page" except Exception as e: return f"Error getting page content: {str(e)}" async def _get_form_fields_mcp(self) -> str: """Get form fields using MCP chrome_get_interactive_elements tool""" try: result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["input", "textarea", "select"] }) if result and "elements" in result: elements = result["elements"] if not elements: return "No form fields found on the page" field_info = [] for element in elements: attrs = element.get("attributes", {}) tag_name = element.get("tagName", "").lower() field_desc = f"- {tag_name}" if "name" in attrs: field_desc += f" (name: {attrs['name']})" if "id" in attrs: field_desc += f" (id: {attrs['id']})" if "type" in attrs: field_desc += f" (type: {attrs['type']})" if "placeholder" in attrs: field_desc += f" (placeholder: {attrs['placeholder']})" field_info.append(field_desc) return f"Found {len(elements)} form fields:\n" + "\n".join(field_info[:10]) else: return "No form fields found" except Exception as e: return f"Error getting form fields: {str(e)}" async def _get_interactive_elements_mcp(self) -> str: """Get interactive elements using MCP chrome_get_interactive_elements tool""" try: result = await self._call_mcp_tool("chrome_get_interactive_elements", { "types": ["button", "a", "input", "select"] }) if result and "elements" in result: elements = result["elements"] if not elements: return "No interactive elements found on the page" element_info = [] for element in elements: attrs = element.get("attributes", {}) tag_name = element.get("tagName", "").lower() text_content = element.get("textContent", "").strip() element_desc = f"- {tag_name}" if text_content: element_desc += f" '{text_content[:50]}'" if "id" in attrs: element_desc += f" (id: {attrs['id']})" if "class" in attrs: element_desc += f" (class: {attrs['class'][:30]})" element_info.append(element_desc) return f"Found {len(elements)} interactive elements:\n" + "\n".join(element_info[:15]) else: return "No interactive elements found" except Exception as e: return f"Error getting interactive elements: {str(e)}" async def process_natural_language_command(self, command: str) -> str: """ Process natural language commands with enhanced real-time capabilities. This is the main entry point for voice commands with intelligent routing. """ try: self.logger.info(f"Processing natural language command: {command}") # Parse the command action, params = self._parse_voice_command(command) if not action: # Try to infer action from command context action, params = self._infer_action_from_context(command) if action: # Execute with real-time feedback result = await self._execute_action(action, params) # Provide contextual response return self._format_response_for_voice(action, result, params) else: return f"I didn't understand the command: {command}. Try saying something like 'fill email with john@example.com' or 'click login button'." except Exception as e: self.logger.error(f"Error processing natural language command: {e}") return f"Error processing command: {str(e)}" def _infer_action_from_context(self, command: str) -> tuple[Optional[str], Dict[str, Any]]: """Infer action from command context when direct parsing fails""" command_lower = command.lower().strip() # Email detection if '@' in command and any(word in command_lower for word in ['email', 'mail']): email_match = re.search(r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', command) if email_match: return 'fill_field_by_name', {'field_name': 'email', 'value': email_match.group(1)} # Phone number detection phone_match = re.search(r'([\d\-\+\(\)\s]{10,})', command) if phone_match and any(word in command_lower for word in ['phone', 'number', 'mobile', 'telephone']): return 'fill_field_by_name', {'field_name': 'phone', 'value': phone_match.group(1)} # Password detection if any(word in command_lower for word in ['password', 'pass']): # Extract potential password (non-space sequence after password keyword) password_match = re.search(r'(?:password|pass)\s+(\S+)', command_lower) if password_match: return 'fill_field_by_name', {'field_name': 'password', 'value': password_match.group(1)} # Button/link click detection if any(word in command_lower for word in ['button', 'link', 'click', 'press', 'tap']): # Extract button/link text for pattern in [r'(?:click|press|tap)\s+(?:on\s+)?(?:the\s+)?(.+)', r'(.+)\s+(?:button|link)']: match = re.search(pattern, command_lower) if match: return 'click', {'text': match.group(1).strip()} # Search detection if any(word in command_lower for word in ['search', 'find', 'look']): search_match = re.search(r'(?:search|find|look)\s+(?:for\s+)?(.+)', command_lower) if search_match: return 'fill_field_by_name', {'field_name': 'search', 'value': search_match.group(1)} return None, {} def _format_response_for_voice(self, action: str, result: str, params: Dict[str, Any]) -> str: """Format response for voice output with context""" try: if action == 'fill_field_by_name': field_name = params.get('field_name', 'field') value = params.get('value', '') if 'success' in result.lower() or 'filled' in result.lower(): return f"Successfully filled {field_name} field with {value[:20]}{'...' if len(value) > 20 else ''}" else: return f"Could not fill {field_name} field. {result}" elif action == 'click': element = params.get('text', 'element') if 'success' in result.lower() or 'clicked' in result.lower(): return f"Successfully clicked {element}" else: return f"Could not click {element}. {result}" elif action in ['get_page_content', 'get_form_fields', 'get_interactive_elements']: return result else: return result except Exception: return result