mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
- Added openFlightsService.js to fetch and cache OpenFlights airport/airline/routes data - Validates airport codes exist in OpenFlights database (6072+ airports) - Generates realistic flights using major international airlines - Creates varied routing options: direct, 1-stop, 2-stop flights - Updated flightService.js to use OpenFlights as primary source before Amadeus - OpenFlights as fallback if Amadeus unavailable or returns no results - No API keys or authentication required - Cached locally to avoid repeated network requests - Realistic pricing, times, and stop locations Docker container rebuilt with OpenFlights integration.
92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from gov_travel import db
|
|
from gov_travel.scrapers import (
|
|
SOURCES,
|
|
extract_accommodations,
|
|
extract_exchange_rates,
|
|
extract_rate_entries,
|
|
scrape_tables_from_source,
|
|
)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Scrape travel rates into SQLite")
|
|
parser.add_argument(
|
|
"--db",
|
|
type=Path,
|
|
default=Path("data/travel_rates.sqlite3"),
|
|
help="Path to the SQLite database",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main() -> None:
|
|
args = parse_args()
|
|
start_time = time.time()
|
|
|
|
print("=" * 80)
|
|
print("🌐 Government Travel Rate Scraper")
|
|
print("=" * 80)
|
|
print(f"📁 Database: {args.db}")
|
|
print()
|
|
|
|
connection = db.connect(args.db)
|
|
db.init_db(connection)
|
|
|
|
total_tables = 0
|
|
total_rate_entries = 0
|
|
total_accommodations = 0
|
|
|
|
for idx, source in enumerate(SOURCES, 1):
|
|
source_start = time.time()
|
|
print(f"[{idx}/{len(SOURCES)}] 📥 Scraping: {source.name.upper()}")
|
|
print(f" 🔗 {source.url}")
|
|
|
|
tables = scrape_tables_from_source(source)
|
|
db.insert_raw_tables(connection, source.name, source.url, tables)
|
|
total_tables += len(tables)
|
|
print(f" ✓ {len(tables)} tables collected")
|
|
|
|
rate_entries = extract_rate_entries(source, tables)
|
|
db.insert_rate_entries(connection, rate_entries)
|
|
total_rate_entries += len(rate_entries)
|
|
if rate_entries:
|
|
print(f" ✓ {len(rate_entries)} per-diem entries extracted")
|
|
|
|
exchange_rates = extract_exchange_rates(source, tables)
|
|
db.insert_exchange_rates(connection, exchange_rates)
|
|
if exchange_rates:
|
|
print(f" ✓ {len(exchange_rates)} exchange rates extracted")
|
|
|
|
if source.name == "accommodations":
|
|
accommodations = extract_accommodations(source, tables)
|
|
db.insert_accommodations(connection, accommodations)
|
|
total_accommodations = len(accommodations)
|
|
print(f" ✓ {len(accommodations)} accommodation listings extracted")
|
|
|
|
elapsed = time.time() - source_start
|
|
print(f" ⏱️ Completed in {elapsed:.1f}s")
|
|
print()
|
|
|
|
connection.close()
|
|
|
|
total_time = time.time() - start_time
|
|
print("=" * 80)
|
|
print("✅ SCRAPING COMPLETE")
|
|
print("=" * 80)
|
|
print(f"📊 Summary:")
|
|
print(f" • Total tables: {total_tables:,}")
|
|
print(f" • Per-diem entries: {total_rate_entries:,}")
|
|
print(f" • Accommodation listings: {total_accommodations:,}")
|
|
print(f" • Total time: {total_time:.1f}s")
|
|
print(f" • Database: {args.db}")
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |