Major refactor: Multi-user Chrome MCP extension with remote server architecture

This commit is contained in:
nasir@endelospay.com
2025-08-21 20:09:57 +05:00
parent d97cad1736
commit 5d869f6a7c
125 changed files with 16249 additions and 11906 deletions

158
test_info_extraction.py Normal file
View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""
Test script to verify the information extraction functionality
"""
import asyncio
import re
async def test_extract_search_information(search_results: str, query: str) -> str:
"""Test version of the extract search information function"""
try:
# Initialize extracted information
extracted = {
'phones': [],
'emails': [],
'addresses': [],
'websites': [],
'business_name': '',
'hours': '',
'summary': ''
}
# Extract phone numbers (improved patterns for international numbers)
phone_patterns = [
r'(\+\d{1,3}[-\.\s]?\d{1,4}[-\.\s]?\d{1,4}[-\.\s]?\d{1,9})', # International format
r'(\(?[0-9]{3}\)?[-\.\s]?[0-9]{3}[-\.\s]?[0-9]{4})', # US format
r'(\d{2,4}[-\.\s]?\d{6,8})', # General format
]
phones = []
for pattern in phone_patterns:
found_phones = re.findall(pattern, search_results)
phones.extend(found_phones)
extracted['phones'] = list(set(phones)) # Remove duplicates
# Extract email addresses
email_pattern = r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
emails = re.findall(email_pattern, search_results)
extracted['emails'] = list(set(emails))
# Extract websites/URLs
url_pattern = r'(https?://[^\s<>"]+|www\.[^\s<>"]+)'
websites = re.findall(url_pattern, search_results)
extracted['websites'] = list(set(websites))
# Extract business hours patterns
hours_patterns = [
r'((?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
r'(Hours?:?\s*[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
r'(Open:?\s*[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
r'(\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)\s*-\s*\d{1,2}:\d{2}\s*(?:AM|PM|am|pm))',
r'(\d{1,2}\s*(?:AM|PM|am|pm)\s*-\s*\d{1,2}\s*(?:AM|PM|am|pm))'
]
for pattern in hours_patterns:
hours_match = re.search(pattern, search_results, re.IGNORECASE)
if hours_match:
extracted['hours'] = hours_match.group(1).strip()
break
# Extract addresses
address_patterns = [
r'(\d+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Drive|Dr|Lane|Ln|Way|Circle|Cir|Court|Ct|Place|Pl)[^,]*(?:,\s*[A-Za-z\s]+)*)',
r'([A-Za-z\s]+,\s*[A-Z]{2}\s+\d{5})', # City, State ZIP
r'(\d+\s+[A-Za-z0-9\s,.-]+(?:Pakistan|PK))', # Pakistan addresses
]
for pattern in address_patterns:
address_matches = re.findall(pattern, search_results, re.IGNORECASE)
if address_matches:
extracted['addresses'] = list(set(address_matches))
break
# Try to identify business name from query and results
business_keywords = ['post office', 'bank', 'hospital', 'school', 'office', 'center', 'department']
for keyword in business_keywords:
if keyword in query.lower():
# Look for the business name in results
lines = search_results.split('\n')
for line in lines[:5]: # Check first few lines
if keyword in line.lower() and len(line.strip()) < 100:
extracted['business_name'] = line.strip()
break
break
# Format the response
if any([extracted['phones'], extracted['emails'], extracted['websites'], extracted['hours'], extracted['addresses']]):
response = f"I found information for your search '{query}':\n\n"
if extracted['business_name']:
response += f"🏢 **{extracted['business_name']}**\n\n"
if extracted['phones']:
response += f"📞 **Phone**: {', '.join(extracted['phones'])}\n"
if extracted['emails']:
response += f"📧 **Email**: {', '.join(extracted['emails'])}\n"
if extracted['addresses']:
response += f"📍 **Address**: {', '.join(extracted['addresses'][:2])}\n" # Limit to 2 addresses
if extracted['websites']:
response += f"🌐 **Website**: {', '.join(extracted['websites'][:2])}\n" # Limit to 2 URLs
if extracted['hours']:
response += f"🕒 **Hours**: {extracted['hours']}\n"
# Add a summary from the first few lines of results
lines = search_results.split('\n')
meaningful_lines = [line.strip() for line in lines if len(line.strip()) > 20 and not line.strip().startswith('http')]
if meaningful_lines:
response += f"\n **Additional Info**: {meaningful_lines[0][:200]}...\n"
response += f"\nWould you like me to help you with anything specific, like getting directions or finding more details?"
return response
# If no specific information extracted, return original results
return search_results
except Exception as e:
print(f"Error extracting search information: {e}")
return search_results
# Test with sample search results
async def main():
# Test case 1: Post office search
sample_results_1 = """
Post Office Fortabbas - Pakistan Post
Contact Information
Phone: +92-68-5555123
Email: fortabbas@pakistanpost.gov.pk
Address: Main Bazaar Road, Fortabbas, Punjab, Pakistan
Hours: Monday to Friday 8:00 AM - 5:00 PM
Services: Mail delivery, postal services, money orders
Website: www.pakistanpost.gov.pk
"""
result1 = await test_extract_search_information(sample_results_1, "phone number post office Fortabbas")
print("Test 1 - Post Office Search:")
print(result1)
print("\n" + "="*50 + "\n")
# Test case 2: Business search
sample_results_2 = """
ABC Bank Branch
Contact: (555) 123-4567
Location: 123 Main Street, Anytown, NY 12345
Business Hours: Mon-Fri 9:00 AM - 6:00 PM, Sat 9:00 AM - 2:00 PM
Email: info@abcbank.com
Website: https://www.abcbank.com
Services: Banking, loans, investments
"""
result2 = await test_extract_search_information(sample_results_2, "ABC Bank contact information")
print("Test 2 - Bank Search:")
print(result2)
if __name__ == "__main__":
asyncio.run(main())