mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
322
scripts/migrateToDatabase.js
Normal file
322
scripts/migrateToDatabase.js
Normal file
@@ -0,0 +1,322 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
class DatabaseMigration {
|
||||
constructor() {
|
||||
this.dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
this.db = null;
|
||||
}
|
||||
|
||||
async migrate() {
|
||||
console.log('🚀 Starting database migration...\n');
|
||||
|
||||
try {
|
||||
// Ensure database directory exists
|
||||
const dbDir = path.join(__dirname, '..', 'database');
|
||||
if (!fs.existsSync(dbDir)) {
|
||||
fs.mkdirSync(dbDir, { recursive: true });
|
||||
console.log('✅ Created database directory');
|
||||
}
|
||||
|
||||
// Open database connection
|
||||
await this.openDatabase();
|
||||
|
||||
// Create tables (inline schema - no external file needed)
|
||||
await this.createTables();
|
||||
|
||||
// Import accommodation rates
|
||||
await this.importAccommodationRates();
|
||||
|
||||
// Add Canberra
|
||||
await this.addCanberra();
|
||||
|
||||
// Build search indexes
|
||||
await this.buildSearchIndexes();
|
||||
|
||||
// Display statistics
|
||||
await this.displayStats();
|
||||
|
||||
console.log('\n✅ Migration complete!');
|
||||
console.log(`📊 Database: ${this.dbPath}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Migration failed:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
openDatabase() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db = new sqlite3.Database(this.dbPath, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('✅ Database connection opened');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async createTables() {
|
||||
console.log('📋 Creating tables...');
|
||||
|
||||
// Inline schema - no external file dependency
|
||||
const schema = `
|
||||
CREATE TABLE IF NOT EXISTS accommodation_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
province TEXT,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
jan_rate REAL NOT NULL,
|
||||
feb_rate REAL NOT NULL,
|
||||
mar_rate REAL NOT NULL,
|
||||
apr_rate REAL NOT NULL,
|
||||
may_rate REAL NOT NULL,
|
||||
jun_rate REAL NOT NULL,
|
||||
jul_rate REAL NOT NULL,
|
||||
aug_rate REAL NOT NULL,
|
||||
sep_rate REAL NOT NULL,
|
||||
oct_rate REAL NOT NULL,
|
||||
nov_rate REAL NOT NULL,
|
||||
dec_rate REAL NOT NULL,
|
||||
standard_rate REAL,
|
||||
is_international BOOLEAN DEFAULT 0,
|
||||
effective_date DATE DEFAULT '2025-01-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
province,
|
||||
country,
|
||||
region,
|
||||
content='accommodation_rates'
|
||||
);
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.exec(schema, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('✅ Tables created');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async importAccommodationRates() {
|
||||
console.log('📥 Importing accommodation rates...');
|
||||
|
||||
const jsonPath = path.join(__dirname, '..', 'data', 'accommodationRates.json');
|
||||
|
||||
console.log(` 📂 Looking for JSON at: ${jsonPath}`);
|
||||
|
||||
if (!fs.existsSync(jsonPath)) {
|
||||
console.error('❌ accommodationRates.json not found!');
|
||||
throw new Error('Missing accommodationRates.json file');
|
||||
}
|
||||
|
||||
console.log(' ✅ JSON file found, reading...');
|
||||
const rawData = fs.readFileSync(jsonPath, 'utf8');
|
||||
console.log(` 📄 File size: ${rawData.length} bytes`);
|
||||
|
||||
const data = JSON.parse(rawData);
|
||||
console.log(` ✅ JSON parsed successfully`);
|
||||
console.log(` 📊 Data keys: ${Object.keys(data).join(', ')}`);
|
||||
|
||||
let imported = 0;
|
||||
|
||||
// Import Canadian cities
|
||||
if (data.cities) {
|
||||
const cityCount = Object.keys(data.cities).length;
|
||||
console.log(` - Importing ${cityCount} Canadian cities...`);
|
||||
|
||||
for (const [key, city] of Object.entries(data.cities)) {
|
||||
try {
|
||||
await this.insertAccommodationRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: city.province,
|
||||
country: 'Canada',
|
||||
region: city.region,
|
||||
currency: city.currency,
|
||||
rates: city.monthlyRates,
|
||||
is_international: 0
|
||||
});
|
||||
imported++;
|
||||
if (imported % 50 === 0) {
|
||||
console.log(` ... ${imported} cities imported so far`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${imported} Canadian cities`);
|
||||
} else {
|
||||
console.log(' ⚠️ No "cities" key found in JSON');
|
||||
}
|
||||
|
||||
// Import international cities
|
||||
if (data.internationalCities) {
|
||||
const intlCityCount = Object.keys(data.internationalCities).length;
|
||||
console.log(` - Importing ${intlCityCount} international cities...`);
|
||||
let intlCount = 0;
|
||||
for (const [key, city] of Object.entries(data.internationalCities)) {
|
||||
try {
|
||||
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
|
||||
|
||||
await this.insertAccommodationRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: null,
|
||||
country: city.country,
|
||||
region: city.region,
|
||||
currency: city.currency,
|
||||
rates: rates,
|
||||
standard_rate: city.standardRate || rates[0],
|
||||
is_international: 1
|
||||
});
|
||||
intlCount++;
|
||||
if (intlCount % 20 === 0) {
|
||||
console.log(` ... ${intlCount} international cities imported so far`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${intlCount} international cities`);
|
||||
imported += intlCount;
|
||||
} else {
|
||||
console.log(' ⚠️ No "internationalCities" key found in JSON');
|
||||
}
|
||||
|
||||
console.log(`✅ Total imported: ${imported} cities`);
|
||||
}
|
||||
|
||||
async addCanberra() {
|
||||
console.log('🇦🇺 Adding Canberra...');
|
||||
|
||||
try {
|
||||
await this.insertAccommodationRate({
|
||||
city_key: 'canberra',
|
||||
city_name: 'Canberra',
|
||||
province: null,
|
||||
country: 'Australia',
|
||||
region: 'Oceania',
|
||||
currency: 'USD',
|
||||
rates: [184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184],
|
||||
standard_rate: 184,
|
||||
is_international: 1
|
||||
});
|
||||
console.log('✅ Canberra added: $184 USD/night');
|
||||
} catch (err) {
|
||||
if (err.message.includes('UNIQUE')) {
|
||||
console.log('ℹ️ Canberra already exists');
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async insertAccommodationRate(city) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const sql = `
|
||||
INSERT OR REPLACE INTO accommodation_rates (
|
||||
city_key, city_name, province, country, region, currency,
|
||||
jan_rate, feb_rate, mar_rate, apr_rate, may_rate, jun_rate,
|
||||
jul_rate, aug_rate, sep_rate, oct_rate, nov_rate, dec_rate,
|
||||
standard_rate, is_international
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
this.db.run(sql, [
|
||||
city.city_key,
|
||||
city.city_name,
|
||||
city.province,
|
||||
city.country,
|
||||
city.region,
|
||||
city.currency,
|
||||
...city.rates,
|
||||
city.standard_rate || city.rates[0],
|
||||
city.is_international
|
||||
], (err) => {
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async buildSearchIndexes() {
|
||||
console.log('🔍 Building search indexes...');
|
||||
console.log(' ℹ️ Skipping FTS5 index population (can be done later if needed)');
|
||||
console.log(' ✅ Standard indexes already created with tables');
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
async displayStats() {
|
||||
console.log('\n📊 Database Statistics:');
|
||||
|
||||
const total = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates');
|
||||
console.log(` Total cities: ${total}`);
|
||||
|
||||
const canadian = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 0');
|
||||
console.log(` Canadian: ${canadian}`);
|
||||
|
||||
const international = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 1');
|
||||
console.log(` International: ${international}`);
|
||||
|
||||
const canberra = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE city_key = "canberra"');
|
||||
console.log(` Canberra found: ${canberra > 0 ? '✅ YES' : '❌ NO'}`);
|
||||
|
||||
if (canberra > 0) {
|
||||
const rate = await this.getCanberraRate();
|
||||
console.log(` Canberra rate: $${rate} USD/night`);
|
||||
}
|
||||
}
|
||||
|
||||
getCount(sql) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(sql, [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row.count);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
getCanberraRate() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get('SELECT jan_rate FROM accommodation_rates WHERE city_key = "canberra"', [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row ? row.jan_rate : null);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Run migration
|
||||
if (require.main === module) {
|
||||
const migration = new DatabaseMigration();
|
||||
migration.migrate().catch(err => {
|
||||
console.error('Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = DatabaseMigration;
|
||||
Reference in New Issue
Block a user