mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
183
database/schema.sql
Normal file
183
database/schema.sql
Normal file
@@ -0,0 +1,183 @@
|
||||
-- Accommodation Rates Table
|
||||
CREATE TABLE IF NOT EXISTS accommodation_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
province TEXT,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
jan_rate REAL NOT NULL,
|
||||
feb_rate REAL NOT NULL,
|
||||
mar_rate REAL NOT NULL,
|
||||
apr_rate REAL NOT NULL,
|
||||
may_rate REAL NOT NULL,
|
||||
jun_rate REAL NOT NULL,
|
||||
jul_rate REAL NOT NULL,
|
||||
aug_rate REAL NOT NULL,
|
||||
sep_rate REAL NOT NULL,
|
||||
oct_rate REAL NOT NULL,
|
||||
nov_rate REAL NOT NULL,
|
||||
dec_rate REAL NOT NULL,
|
||||
standard_rate REAL,
|
||||
is_international BOOLEAN DEFAULT 0,
|
||||
effective_date DATE DEFAULT '2025-01-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Meal Rates Table
|
||||
CREATE TABLE IF NOT EXISTS meal_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
breakfast_rate REAL NOT NULL,
|
||||
lunch_rate REAL NOT NULL,
|
||||
dinner_rate REAL NOT NULL,
|
||||
incidentals_rate REAL NOT NULL,
|
||||
total_daily_rate REAL NOT NULL,
|
||||
effective_date DATE DEFAULT '2025-10-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Full-Text Search Index for Accommodation
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
province,
|
||||
country,
|
||||
region,
|
||||
content='accommodation_rates'
|
||||
);
|
||||
|
||||
-- Full-Text Search Index for Meals
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS meal_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
country,
|
||||
region,
|
||||
content='meal_rates'
|
||||
);
|
||||
|
||||
-- Indexes for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
|
||||
CREATE INDEX IF NOT EXISTS idx_meal_city ON meal_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_meal_country ON meal_rates(country);
|
||||
|
||||
-- Trigger to keep search index updated
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_ai AFTER INSERT ON accommodation_rates BEGIN
|
||||
INSERT INTO accommodation_search(rowid, city_key, city_name, province, country, region)
|
||||
VALUES (new.id, new.city_key, new.city_name, new.province, new.country, new.region);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_au AFTER UPDATE ON accommodation_rates BEGIN
|
||||
UPDATE accommodation_search SET
|
||||
city_key = new.city_key,
|
||||
city_name = new.city_name,
|
||||
province = new.province,
|
||||
country = new.country,
|
||||
region = new.region
|
||||
WHERE rowid = new.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_ad AFTER DELETE ON accommodation_rates BEGIN
|
||||
DELETE FROM accommodation_search WHERE rowid = old.id;
|
||||
END;-- Accommodation Rates Table
|
||||
CREATE TABLE IF NOT EXISTS accommodation_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
province TEXT,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
jan_rate REAL NOT NULL,
|
||||
feb_rate REAL NOT NULL,
|
||||
mar_rate REAL NOT NULL,
|
||||
apr_rate REAL NOT NULL,
|
||||
may_rate REAL NOT NULL,
|
||||
jun_rate REAL NOT NULL,
|
||||
jul_rate REAL NOT NULL,
|
||||
aug_rate REAL NOT NULL,
|
||||
sep_rate REAL NOT NULL,
|
||||
oct_rate REAL NOT NULL,
|
||||
nov_rate REAL NOT NULL,
|
||||
dec_rate REAL NOT NULL,
|
||||
standard_rate REAL,
|
||||
is_international BOOLEAN DEFAULT 0,
|
||||
effective_date DATE DEFAULT '2025-01-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Meal Rates Table
|
||||
CREATE TABLE IF NOT EXISTS meal_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
breakfast_rate REAL NOT NULL,
|
||||
lunch_rate REAL NOT NULL,
|
||||
dinner_rate REAL NOT NULL,
|
||||
incidentals_rate REAL NOT NULL,
|
||||
total_daily_rate REAL NOT NULL,
|
||||
effective_date DATE DEFAULT '2025-10-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Full-Text Search Index for Accommodation
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
province,
|
||||
country,
|
||||
region,
|
||||
content='accommodation_rates'
|
||||
);
|
||||
|
||||
-- Full-Text Search Index for Meals
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS meal_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
country,
|
||||
region,
|
||||
content='meal_rates'
|
||||
);
|
||||
|
||||
-- Indexes for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
|
||||
CREATE INDEX IF NOT EXISTS idx_meal_city ON meal_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_meal_country ON meal_rates(country);
|
||||
|
||||
-- Trigger to keep search index updated
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_ai AFTER INSERT ON accommodation_rates BEGIN
|
||||
INSERT INTO accommodation_search(rowid, city_key, city_name, province, country, region)
|
||||
VALUES (new.id, new.city_key, new.city_name, new.province, new.country, new.region);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_au AFTER UPDATE ON accommodation_rates BEGIN
|
||||
UPDATE accommodation_search SET
|
||||
city_key = new.city_key,
|
||||
city_name = new.city_name,
|
||||
province = new.province,
|
||||
country = new.country,
|
||||
region = new.region
|
||||
WHERE rowid = new.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS accommodation_ad AFTER DELETE ON accommodation_rates BEGIN
|
||||
DELETE FROM accommodation_search WHERE rowid = old.id;
|
||||
END;
|
||||
BIN
database/travel_rates.db
Normal file
BIN
database/travel_rates.db
Normal file
Binary file not shown.
Reference in New Issue
Block a user