mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
277
services/databaseService.js
Normal file
277
services/databaseService.js
Normal file
@@ -0,0 +1,277 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
|
||||
class DatabaseService {
|
||||
constructor() {
|
||||
this.dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
this.db = null;
|
||||
}
|
||||
|
||||
connect() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db = new sqlite3.Database(this.dbPath, (err) => {
|
||||
if (err) {
|
||||
console.error('❌ Database connection failed:', err);
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('✅ Database connected');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a city (complete travel rates)
|
||||
* GUARANTEED to find Canberra!
|
||||
*/
|
||||
async searchCity(searchTerm) {
|
||||
const query = `
|
||||
SELECT * FROM travel_rates
|
||||
WHERE LOWER(city_name) LIKE LOWER(?)
|
||||
OR LOWER(city_key) LIKE LOWER(?)
|
||||
OR LOWER(country) LIKE LOWER(?)
|
||||
OR LOWER(province) LIKE LOWER(?)
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN LOWER(city_name) = LOWER(?) THEN 1
|
||||
WHEN LOWER(city_key) = LOWER(?) THEN 2
|
||||
WHEN LOWER(city_name) LIKE LOWER(?) THEN 3
|
||||
ELSE 4
|
||||
END
|
||||
LIMIT 10
|
||||
`;
|
||||
|
||||
const term = `%${searchTerm}%`;
|
||||
const exactTerm = searchTerm.toLowerCase();
|
||||
const likeTerm = `${searchTerm.toLowerCase()}%`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [term, term, term, term, exactTerm, exactTerm, likeTerm], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows ? rows.map(row => this.formatTravelRate(row)) : []);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get complete travel rate by exact city key
|
||||
*/
|
||||
async getAccommodationRate(cityKey) {
|
||||
const query = `SELECT * FROM travel_rates WHERE LOWER(city_key) = LOWER(?) LIMIT 1`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(query, [cityKey], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row ? this.formatTravelRate(row) : null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get accommodation rate for a specific month
|
||||
*/
|
||||
async getMonthlyRate(cityKey, month) {
|
||||
const rate = await this.getAccommodationRate(cityKey);
|
||||
|
||||
if (!rate) return null;
|
||||
|
||||
const monthIndex = month - 1; // 0-based index
|
||||
return {
|
||||
city: rate.name,
|
||||
month: month,
|
||||
rate: rate.monthlyRates[monthIndex],
|
||||
currency: rate.currency
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Full-text search across all cities
|
||||
*/
|
||||
async fullTextSearch(searchTerm) {
|
||||
const query = `
|
||||
SELECT a.* FROM travel_rates a
|
||||
WHERE a.id IN (
|
||||
SELECT rowid FROM travel_search
|
||||
WHERE travel_search MATCH ?
|
||||
)
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN LOWER(a.city_name) = LOWER(?) THEN 1
|
||||
ELSE 2
|
||||
END
|
||||
LIMIT 20
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [searchTerm, searchTerm], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows.map(row => this.formatTravelRate(row)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Format complete travel rate for API response
|
||||
*/
|
||||
formatTravelRate(row) {
|
||||
return {
|
||||
cityKey: row.city_key,
|
||||
name: row.city_name,
|
||||
province: row.province,
|
||||
country: row.country,
|
||||
region: row.region,
|
||||
currency: row.currency,
|
||||
accommodation: {
|
||||
monthly: [
|
||||
row.jan_accommodation, row.feb_accommodation, row.mar_accommodation,
|
||||
row.apr_accommodation, row.may_accommodation, row.jun_accommodation,
|
||||
row.jul_accommodation, row.aug_accommodation, row.sep_accommodation,
|
||||
row.oct_accommodation, row.nov_accommodation, row.dec_accommodation
|
||||
],
|
||||
standard: row.standard_accommodation
|
||||
},
|
||||
meals: {
|
||||
breakfast: row.breakfast,
|
||||
lunch: row.lunch,
|
||||
dinner: row.dinner,
|
||||
total: row.total_meals
|
||||
},
|
||||
incidentals: row.incidentals,
|
||||
totalDailyAllowance: row.total_daily_allowance,
|
||||
fullDayCost: parseFloat(row.standard_accommodation || row.jan_accommodation) + parseFloat(row.total_daily_allowance),
|
||||
isInternational: row.is_international === 1
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy format for backward compatibility
|
||||
*/
|
||||
formatAccommodationRate(row) {
|
||||
return {
|
||||
cityKey: row.city_key,
|
||||
name: row.city_name,
|
||||
province: row.province,
|
||||
country: row.country,
|
||||
region: row.region,
|
||||
currency: row.currency,
|
||||
monthlyRates: [
|
||||
row.jan_accommodation || row.jan_rate,
|
||||
row.feb_accommodation || row.feb_rate,
|
||||
row.mar_accommodation || row.mar_rate,
|
||||
row.apr_accommodation || row.apr_rate,
|
||||
row.may_accommodation || row.may_rate,
|
||||
row.jun_accommodation || row.jun_rate,
|
||||
row.jul_accommodation || row.jul_rate,
|
||||
row.aug_accommodation || row.aug_rate,
|
||||
row.sep_accommodation || row.sep_rate,
|
||||
row.oct_accommodation || row.oct_rate,
|
||||
row.nov_accommodation || row.nov_rate,
|
||||
row.dec_accommodation || row.dec_rate
|
||||
],
|
||||
standardRate: row.standard_accommodation || row.standard_rate,
|
||||
isInternational: row.is_international === 1,
|
||||
effectiveDate: row.effective_date
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* List all cities by region
|
||||
*/
|
||||
async getCitiesByRegion(region) {
|
||||
const query = `
|
||||
SELECT * FROM travel_rates
|
||||
WHERE region = ?
|
||||
ORDER BY city_name
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [region], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows.map(row => this.formatAccommodationRate(row)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* List all cities by country
|
||||
*/
|
||||
async getCitiesByCountry(country) {
|
||||
const query = `
|
||||
SELECT * FROM travel_rates
|
||||
WHERE LOWER(country) = LOWER(?)
|
||||
ORDER BY city_name
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [country], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows.map(row => this.formatAccommodationRate(row)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available regions
|
||||
*/
|
||||
async getAllRegions() {
|
||||
const query = `SELECT DISTINCT region FROM travel_rates ORDER BY region`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows.map(row => row.region));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available countries
|
||||
*/
|
||||
async getAllCountries() {
|
||||
const query = `SELECT DISTINCT country FROM travel_rates ORDER BY country`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows.map(row => row.country));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Autocomplete for city search
|
||||
*/
|
||||
async autocomplete(prefix, limit = 10) {
|
||||
const query = `
|
||||
SELECT city_name, country, region FROM travel_rates
|
||||
WHERE LOWER(city_name) LIKE LOWER(?)
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN LOWER(city_name) LIKE LOWER(?) THEN 1
|
||||
ELSE 2
|
||||
END,
|
||||
city_name
|
||||
LIMIT ?
|
||||
`;
|
||||
|
||||
const term = `${prefix}%`;
|
||||
const exactTerm = `${prefix}`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(query, [term, exactTerm, limit], (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
close() {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
console.log('✅ Database connection closed');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new DatabaseService();
|
||||
Reference in New Issue
Block a user