mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add alphabet navigation to scraper - now collects ALL 233 countries
- Implemented alphabet navigation (A-Z) for NJC international rates page - Added request delays (2s) and retry logic with exponential backoff to avoid server timeouts - Added error handling for pages without tables - Installed html5lib for better HTML parsing - Now scrapes 233 countries (up from 15) with 104 unique currencies - Total 11,628 international rate entries collected - Added verification scripts to check all countries and their currencies - Fixed currency extraction working perfectly for EUR, USD, CAD, AUD, ARS, and 99+ other currencies
This commit is contained in:
37
scripts/final_stats.py
Normal file
37
scripts/final_stats.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Count countries
|
||||
cursor.execute('SELECT COUNT(DISTINCT country) FROM rate_entries WHERE source="international" AND country IS NOT NULL')
|
||||
total_countries = cursor.fetchone()[0]
|
||||
|
||||
# Count total entries
|
||||
cursor.execute('SELECT COUNT(*) FROM rate_entries WHERE source="international"')
|
||||
total_entries = cursor.fetchone()[0]
|
||||
|
||||
# Count unique currencies
|
||||
cursor.execute('SELECT COUNT(DISTINCT currency) FROM rate_entries WHERE source="international"')
|
||||
total_currencies = cursor.fetchone()[0]
|
||||
|
||||
print(f"✅ COMPLETE SCRAPER RESULTS:")
|
||||
print(f" Total Countries: {total_countries}")
|
||||
print(f" Total Entries: {total_entries:,}")
|
||||
print(f" Unique Currencies: {total_currencies}")
|
||||
|
||||
# Show currency breakdown
|
||||
cursor.execute("""
|
||||
SELECT currency, COUNT(DISTINCT country) as country_count, COUNT(*) as entries
|
||||
FROM rate_entries
|
||||
WHERE source="international"
|
||||
GROUP BY currency
|
||||
ORDER BY country_count DESC
|
||||
LIMIT 20
|
||||
""")
|
||||
|
||||
print(f"\nTop 20 Currencies:")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]:5} - {row[1]:3} countries, {row[2]:,} entries")
|
||||
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user