mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
44
scripts/query_scraped_db.py
Normal file
44
scripts/query_scraped_db.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
|
||||
print('\n=== SCRAPED DATABASE ANALYSIS ===\n')
|
||||
|
||||
# Tables
|
||||
print('📊 Tables:')
|
||||
tables = [row[0] for row in conn.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall()]
|
||||
for table in tables:
|
||||
count = conn.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
|
||||
print(f' - {table}: {count} rows')
|
||||
|
||||
# Unique currencies
|
||||
print('\n💰 Unique Currencies in rate_entries:')
|
||||
currencies = [row[0] for row in conn.execute('SELECT DISTINCT currency FROM rate_entries WHERE currency IS NOT NULL ORDER BY currency').fetchall()]
|
||||
print(f' {", ".join(currencies)}')
|
||||
|
||||
# Argentina data
|
||||
print('\n🇦🇷 Argentina entries:')
|
||||
for row in conn.execute('SELECT country, city, currency, rate_type, rate_amount FROM rate_entries WHERE country="Argentina" LIMIT 10').fetchall():
|
||||
print(f' {row[0]} - {row[1]} - Currency: {row[2]} - {row[3]}: ${row[4]:.2f}')
|
||||
|
||||
# Sample rate entries by country
|
||||
print('\n🌍 Sample entries by country (first 3 countries):')
|
||||
for row in conn.execute('SELECT DISTINCT country FROM rate_entries WHERE country IS NOT NULL LIMIT 3').fetchall():
|
||||
country = row[0]
|
||||
print(f'\n {country}:')
|
||||
for entry in conn.execute('SELECT city, currency, rate_type, rate_amount FROM rate_entries WHERE country=? LIMIT 3', (country,)).fetchall():
|
||||
print(f' {entry[0]} - {entry[1]} - {entry[2]}: ${entry[3]:.2f}')
|
||||
|
||||
# Exchange rates
|
||||
print('\n💱 Exchange rates:')
|
||||
for row in conn.execute('SELECT currency, rate_to_cad, effective_date FROM exchange_rates WHERE currency IS NOT NULL LIMIT 10').fetchall():
|
||||
print(f' {row[0]}: {row[1]:.4f} CAD (effective: {row[2]})')
|
||||
|
||||
# Accommodations
|
||||
print('\n🏨 Accommodation entries (sample):')
|
||||
for row in conn.execute('SELECT property_name, city, province, rate_amount, currency FROM accommodations WHERE rate_amount IS NOT NULL LIMIT 10').fetchall():
|
||||
print(f' {row[0]} - {row[1]}, {row[2]} - ${row[3]:.2f} {row[4]}')
|
||||
|
||||
conn.close()
|
||||
print('\n✅ Done!')
|
||||
Reference in New Issue
Block a user