Major refactor: Multi-user Chrome MCP extension with remote server architecture
This commit is contained in:
158
test_info_extraction.py
Normal file
158
test_info_extraction.py
Normal file
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify the information extraction functionality
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
async def test_extract_search_information(search_results: str, query: str) -> str:
|
||||
"""Test version of the extract search information function"""
|
||||
|
||||
try:
|
||||
# Initialize extracted information
|
||||
extracted = {
|
||||
'phones': [],
|
||||
'emails': [],
|
||||
'addresses': [],
|
||||
'websites': [],
|
||||
'business_name': '',
|
||||
'hours': '',
|
||||
'summary': ''
|
||||
}
|
||||
|
||||
# Extract phone numbers (improved patterns for international numbers)
|
||||
phone_patterns = [
|
||||
r'(\+\d{1,3}[-\.\s]?\d{1,4}[-\.\s]?\d{1,4}[-\.\s]?\d{1,9})', # International format
|
||||
r'(\(?[0-9]{3}\)?[-\.\s]?[0-9]{3}[-\.\s]?[0-9]{4})', # US format
|
||||
r'(\d{2,4}[-\.\s]?\d{6,8})', # General format
|
||||
]
|
||||
phones = []
|
||||
for pattern in phone_patterns:
|
||||
found_phones = re.findall(pattern, search_results)
|
||||
phones.extend(found_phones)
|
||||
extracted['phones'] = list(set(phones)) # Remove duplicates
|
||||
|
||||
# Extract email addresses
|
||||
email_pattern = r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
|
||||
emails = re.findall(email_pattern, search_results)
|
||||
extracted['emails'] = list(set(emails))
|
||||
|
||||
# Extract websites/URLs
|
||||
url_pattern = r'(https?://[^\s<>"]+|www\.[^\s<>"]+)'
|
||||
websites = re.findall(url_pattern, search_results)
|
||||
extracted['websites'] = list(set(websites))
|
||||
|
||||
# Extract business hours patterns
|
||||
hours_patterns = [
|
||||
r'((?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
|
||||
r'(Hours?:?\s*[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
|
||||
r'(Open:?\s*[^.]*?(?:\d{1,2}:\d{2}|\d{1,2}\s*(?:AM|PM|am|pm)))',
|
||||
r'(\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)\s*-\s*\d{1,2}:\d{2}\s*(?:AM|PM|am|pm))',
|
||||
r'(\d{1,2}\s*(?:AM|PM|am|pm)\s*-\s*\d{1,2}\s*(?:AM|PM|am|pm))'
|
||||
]
|
||||
for pattern in hours_patterns:
|
||||
hours_match = re.search(pattern, search_results, re.IGNORECASE)
|
||||
if hours_match:
|
||||
extracted['hours'] = hours_match.group(1).strip()
|
||||
break
|
||||
|
||||
# Extract addresses
|
||||
address_patterns = [
|
||||
r'(\d+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Drive|Dr|Lane|Ln|Way|Circle|Cir|Court|Ct|Place|Pl)[^,]*(?:,\s*[A-Za-z\s]+)*)',
|
||||
r'([A-Za-z\s]+,\s*[A-Z]{2}\s+\d{5})', # City, State ZIP
|
||||
r'(\d+\s+[A-Za-z0-9\s,.-]+(?:Pakistan|PK))', # Pakistan addresses
|
||||
]
|
||||
for pattern in address_patterns:
|
||||
address_matches = re.findall(pattern, search_results, re.IGNORECASE)
|
||||
if address_matches:
|
||||
extracted['addresses'] = list(set(address_matches))
|
||||
break
|
||||
|
||||
# Try to identify business name from query and results
|
||||
business_keywords = ['post office', 'bank', 'hospital', 'school', 'office', 'center', 'department']
|
||||
for keyword in business_keywords:
|
||||
if keyword in query.lower():
|
||||
# Look for the business name in results
|
||||
lines = search_results.split('\n')
|
||||
for line in lines[:5]: # Check first few lines
|
||||
if keyword in line.lower() and len(line.strip()) < 100:
|
||||
extracted['business_name'] = line.strip()
|
||||
break
|
||||
break
|
||||
|
||||
# Format the response
|
||||
if any([extracted['phones'], extracted['emails'], extracted['websites'], extracted['hours'], extracted['addresses']]):
|
||||
response = f"I found information for your search '{query}':\n\n"
|
||||
|
||||
if extracted['business_name']:
|
||||
response += f"🏢 **{extracted['business_name']}**\n\n"
|
||||
|
||||
if extracted['phones']:
|
||||
response += f"📞 **Phone**: {', '.join(extracted['phones'])}\n"
|
||||
|
||||
if extracted['emails']:
|
||||
response += f"📧 **Email**: {', '.join(extracted['emails'])}\n"
|
||||
|
||||
if extracted['addresses']:
|
||||
response += f"📍 **Address**: {', '.join(extracted['addresses'][:2])}\n" # Limit to 2 addresses
|
||||
|
||||
if extracted['websites']:
|
||||
response += f"🌐 **Website**: {', '.join(extracted['websites'][:2])}\n" # Limit to 2 URLs
|
||||
|
||||
if extracted['hours']:
|
||||
response += f"🕒 **Hours**: {extracted['hours']}\n"
|
||||
|
||||
# Add a summary from the first few lines of results
|
||||
lines = search_results.split('\n')
|
||||
meaningful_lines = [line.strip() for line in lines if len(line.strip()) > 20 and not line.strip().startswith('http')]
|
||||
if meaningful_lines:
|
||||
response += f"\nℹ️ **Additional Info**: {meaningful_lines[0][:200]}...\n"
|
||||
|
||||
response += f"\nWould you like me to help you with anything specific, like getting directions or finding more details?"
|
||||
|
||||
return response
|
||||
|
||||
# If no specific information extracted, return original results
|
||||
return search_results
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error extracting search information: {e}")
|
||||
return search_results
|
||||
|
||||
# Test with sample search results
|
||||
async def main():
|
||||
# Test case 1: Post office search
|
||||
sample_results_1 = """
|
||||
Post Office Fortabbas - Pakistan Post
|
||||
Contact Information
|
||||
Phone: +92-68-5555123
|
||||
Email: fortabbas@pakistanpost.gov.pk
|
||||
Address: Main Bazaar Road, Fortabbas, Punjab, Pakistan
|
||||
Hours: Monday to Friday 8:00 AM - 5:00 PM
|
||||
Services: Mail delivery, postal services, money orders
|
||||
Website: www.pakistanpost.gov.pk
|
||||
"""
|
||||
|
||||
result1 = await test_extract_search_information(sample_results_1, "phone number post office Fortabbas")
|
||||
print("Test 1 - Post Office Search:")
|
||||
print(result1)
|
||||
print("\n" + "="*50 + "\n")
|
||||
|
||||
# Test case 2: Business search
|
||||
sample_results_2 = """
|
||||
ABC Bank Branch
|
||||
Contact: (555) 123-4567
|
||||
Location: 123 Main Street, Anytown, NY 12345
|
||||
Business Hours: Mon-Fri 9:00 AM - 6:00 PM, Sat 9:00 AM - 2:00 PM
|
||||
Email: info@abcbank.com
|
||||
Website: https://www.abcbank.com
|
||||
Services: Banking, loans, investments
|
||||
"""
|
||||
|
||||
result2 = await test_extract_search_information(sample_results_2, "ABC Bank contact information")
|
||||
print("Test 2 - Bank Search:")
|
||||
print(result2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
Reference in New Issue
Block a user