Files
Gov_Travel_App/scripts/migrateToDatabase.js
mblanke 15094ac94b Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website
- Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.)
- Added country extraction from table titles for international rates
- Flatten pandas MultiIndex columns for cleaner data structure
- Default to CAD for domestic Canadian sources (accommodations and domestic tables)
- Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations)
- Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities
- Added migration script to convert scraped data to Node.js database format
- Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3)
- Fixed city validation and currency conversion in main app
- Added comprehensive debug and verification scripts

This replaces manual JSON maintenance with automated data collection from official government source.
2026-01-13 09:21:43 -05:00

322 lines
12 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const sqlite3 = require('sqlite3').verbose();
const fs = require('fs');
const path = require('path');
class DatabaseMigration {
constructor() {
this.dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
this.db = null;
}
async migrate() {
console.log('🚀 Starting database migration...\n');
try {
// Ensure database directory exists
const dbDir = path.join(__dirname, '..', 'database');
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
console.log('✅ Created database directory');
}
// Open database connection
await this.openDatabase();
// Create tables (inline schema - no external file needed)
await this.createTables();
// Import accommodation rates
await this.importAccommodationRates();
// Add Canberra
await this.addCanberra();
// Build search indexes
await this.buildSearchIndexes();
// Display statistics
await this.displayStats();
console.log('\n✅ Migration complete!');
console.log(`📊 Database: ${this.dbPath}`);
} catch (error) {
console.error('❌ Migration failed:', error);
throw error;
} finally {
if (this.db) {
this.db.close();
}
}
}
openDatabase() {
return new Promise((resolve, reject) => {
this.db = new sqlite3.Database(this.dbPath, (err) => {
if (err) {
reject(err);
} else {
console.log('✅ Database connection opened');
resolve();
}
});
});
}
async createTables() {
console.log('📋 Creating tables...');
// Inline schema - no external file dependency
const schema = `
CREATE TABLE IF NOT EXISTS accommodation_rates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
city_key TEXT UNIQUE NOT NULL,
city_name TEXT NOT NULL,
province TEXT,
country TEXT,
region TEXT NOT NULL,
currency TEXT NOT NULL,
jan_rate REAL NOT NULL,
feb_rate REAL NOT NULL,
mar_rate REAL NOT NULL,
apr_rate REAL NOT NULL,
may_rate REAL NOT NULL,
jun_rate REAL NOT NULL,
jul_rate REAL NOT NULL,
aug_rate REAL NOT NULL,
sep_rate REAL NOT NULL,
oct_rate REAL NOT NULL,
nov_rate REAL NOT NULL,
dec_rate REAL NOT NULL,
standard_rate REAL,
is_international BOOLEAN DEFAULT 0,
effective_date DATE DEFAULT '2025-01-01',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
city_key,
city_name,
province,
country,
region,
content='accommodation_rates'
);
`;
return new Promise((resolve, reject) => {
this.db.exec(schema, (err) => {
if (err) {
reject(err);
} else {
console.log('✅ Tables created');
resolve();
}
});
});
}
async importAccommodationRates() {
console.log('📥 Importing accommodation rates...');
const jsonPath = path.join(__dirname, '..', 'data', 'accommodationRates.json');
console.log(` 📂 Looking for JSON at: ${jsonPath}`);
if (!fs.existsSync(jsonPath)) {
console.error('❌ accommodationRates.json not found!');
throw new Error('Missing accommodationRates.json file');
}
console.log(' ✅ JSON file found, reading...');
const rawData = fs.readFileSync(jsonPath, 'utf8');
console.log(` 📄 File size: ${rawData.length} bytes`);
const data = JSON.parse(rawData);
console.log(` ✅ JSON parsed successfully`);
console.log(` 📊 Data keys: ${Object.keys(data).join(', ')}`);
let imported = 0;
// Import Canadian cities
if (data.cities) {
const cityCount = Object.keys(data.cities).length;
console.log(` - Importing ${cityCount} Canadian cities...`);
for (const [key, city] of Object.entries(data.cities)) {
try {
await this.insertAccommodationRate({
city_key: key,
city_name: city.name,
province: city.province,
country: 'Canada',
region: city.region,
currency: city.currency,
rates: city.monthlyRates,
is_international: 0
});
imported++;
if (imported % 50 === 0) {
console.log(` ... ${imported} cities imported so far`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${imported} Canadian cities`);
} else {
console.log(' ⚠️ No "cities" key found in JSON');
}
// Import international cities
if (data.internationalCities) {
const intlCityCount = Object.keys(data.internationalCities).length;
console.log(` - Importing ${intlCityCount} international cities...`);
let intlCount = 0;
for (const [key, city] of Object.entries(data.internationalCities)) {
try {
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
await this.insertAccommodationRate({
city_key: key,
city_name: city.name,
province: null,
country: city.country,
region: city.region,
currency: city.currency,
rates: rates,
standard_rate: city.standardRate || rates[0],
is_international: 1
});
intlCount++;
if (intlCount % 20 === 0) {
console.log(` ... ${intlCount} international cities imported so far`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${intlCount} international cities`);
imported += intlCount;
} else {
console.log(' ⚠️ No "internationalCities" key found in JSON');
}
console.log(`✅ Total imported: ${imported} cities`);
}
async addCanberra() {
console.log('🇦🇺 Adding Canberra...');
try {
await this.insertAccommodationRate({
city_key: 'canberra',
city_name: 'Canberra',
province: null,
country: 'Australia',
region: 'Oceania',
currency: 'USD',
rates: [184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184],
standard_rate: 184,
is_international: 1
});
console.log('✅ Canberra added: $184 USD/night');
} catch (err) {
if (err.message.includes('UNIQUE')) {
console.log(' Canberra already exists');
} else {
throw err;
}
}
}
async insertAccommodationRate(city) {
return new Promise((resolve, reject) => {
const sql = `
INSERT OR REPLACE INTO accommodation_rates (
city_key, city_name, province, country, region, currency,
jan_rate, feb_rate, mar_rate, apr_rate, may_rate, jun_rate,
jul_rate, aug_rate, sep_rate, oct_rate, nov_rate, dec_rate,
standard_rate, is_international
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
this.db.run(sql, [
city.city_key,
city.city_name,
city.province,
city.country,
city.region,
city.currency,
...city.rates,
city.standard_rate || city.rates[0],
city.is_international
], (err) => {
if (err) reject(err);
else resolve();
});
});
}
async buildSearchIndexes() {
console.log('🔍 Building search indexes...');
console.log(' Skipping FTS5 index population (can be done later if needed)');
console.log(' ✅ Standard indexes already created with tables');
return Promise.resolve();
}
async displayStats() {
console.log('\n📊 Database Statistics:');
const total = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates');
console.log(` Total cities: ${total}`);
const canadian = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 0');
console.log(` Canadian: ${canadian}`);
const international = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 1');
console.log(` International: ${international}`);
const canberra = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE city_key = "canberra"');
console.log(` Canberra found: ${canberra > 0 ? '✅ YES' : '❌ NO'}`);
if (canberra > 0) {
const rate = await this.getCanberraRate();
console.log(` Canberra rate: $${rate} USD/night`);
}
}
getCount(sql) {
return new Promise((resolve, reject) => {
this.db.get(sql, [], (err, row) => {
if (err) reject(err);
else resolve(row.count);
});
});
}
getCanberraRate() {
return new Promise((resolve, reject) => {
this.db.get('SELECT jan_rate FROM accommodation_rates WHERE city_key = "canberra"', [], (err, row) => {
if (err) reject(err);
else resolve(row ? row.jan_rate : null);
});
});
}
}
// Run migration
if (require.main === module) {
const migration = new DatabaseMigration();
migration.migrate().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});
}
module.exports = DatabaseMigration;