Add Python web scraper for NJC travel rates with currency extraction

- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website
- Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.)
- Added country extraction from table titles for international rates
- Flatten pandas MultiIndex columns for cleaner data structure
- Default to CAD for domestic Canadian sources (accommodations and domestic tables)
- Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations)
- Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities
- Added migration script to convert scraped data to Node.js database format
- Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3)
- Fixed city validation and currency conversion in main app
- Added comprehensive debug and verification scripts

This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
2026-01-13 09:21:43 -05:00
commit 15094ac94b
84 changed files with 19859 additions and 0 deletions

185
utils/cache.js Normal file
View File

@@ -0,0 +1,185 @@
const NodeCache = require('node-cache');
const logger = require('./logger');
/**
* Cache Service
* Provides in-memory caching for API responses
*/
class CacheService {
constructor() {
// Flight cache: 1 hour TTL
this.flightCache = new NodeCache({
stdTTL: 3600,
checkperiod: 600,
useClones: false
});
// Rate cache: 24 hours TTL (rates don't change often)
this.rateCache = new NodeCache({
stdTTL: 86400,
checkperiod: 3600,
useClones: false
});
// Database query cache: 5 minutes TTL
this.dbCache = new NodeCache({
stdTTL: 300,
checkperiod: 60,
useClones: false
});
// Set up event listeners
this.setupEventListeners();
}
setupEventListeners() {
// Flight cache events
this.flightCache.on('set', (key, value) => {
logger.debug(`Flight cache SET: ${key}`);
});
this.flightCache.on('expired', (key, value) => {
logger.debug(`Flight cache EXPIRED: ${key}`);
});
// Rate cache events
this.rateCache.on('set', (key, value) => {
logger.debug(`Rate cache SET: ${key}`);
});
// DB cache events
this.dbCache.on('set', (key, value) => {
logger.debug(`DB cache SET: ${key}`);
});
}
/**
* Generate cache key for flight searches
*/
generateFlightKey(origin, destination, departureDate, returnDate, adults = 1) {
return `flight:${origin}:${destination}:${departureDate}:${returnDate}:${adults}`.toLowerCase();
}
/**
* Generate cache key for accommodation searches
*/
generateAccommodationKey(city) {
return `accommodation:${city}`.toLowerCase();
}
/**
* Generate cache key for database queries
*/
generateDbKey(query, params) {
const paramStr = params ? JSON.stringify(params) : '';
return `db:${query}:${paramStr}`.toLowerCase();
}
/**
* Get flight from cache
*/
getFlight(origin, destination, departureDate, returnDate, adults) {
const key = this.generateFlightKey(origin, destination, departureDate, returnDate, adults);
const cached = this.flightCache.get(key);
if (cached) {
logger.info(`Flight cache HIT: ${key}`);
return cached;
}
logger.debug(`Flight cache MISS: ${key}`);
return null;
}
/**
* Set flight in cache
*/
setFlight(origin, destination, departureDate, returnDate, adults, data) {
const key = this.generateFlightKey(origin, destination, departureDate, returnDate, adults);
this.flightCache.set(key, data);
logger.info(`Flight cached: ${key}`);
}
/**
* Get accommodation rate from cache
*/
getAccommodation(city) {
const key = this.generateAccommodationKey(city);
const cached = this.rateCache.get(key);
if (cached) {
logger.debug(`Accommodation cache HIT: ${key}`);
return cached;
}
logger.debug(`Accommodation cache MISS: ${key}`);
return null;
}
/**
* Set accommodation rate in cache
*/
setAccommodation(city, data) {
const key = this.generateAccommodationKey(city);
this.rateCache.set(key, data);
logger.debug(`Accommodation cached: ${key}`);
}
/**
* Get database query result from cache
*/
getDbQuery(query, params) {
const key = this.generateDbKey(query, params);
return this.dbCache.get(key);
}
/**
* Set database query result in cache
*/
setDbQuery(query, params, data) {
const key = this.generateDbKey(query, params);
this.dbCache.set(key, data);
}
/**
* Clear specific cache
*/
clearFlightCache() {
this.flightCache.flushAll();
logger.info('Flight cache cleared');
}
clearRateCache() {
this.rateCache.flushAll();
logger.info('Rate cache cleared');
}
clearDbCache() {
this.dbCache.flushAll();
logger.info('DB cache cleared');
}
/**
* Clear all caches
*/
clearAll() {
this.clearFlightCache();
this.clearRateCache();
this.clearDbCache();
logger.info('All caches cleared');
}
/**
* Get cache statistics
*/
getStats() {
return {
flights: this.flightCache.getStats(),
rates: this.rateCache.getStats(),
database: this.dbCache.getStats()
};
}
}
// Export singleton instance
module.exports = new CacheService();

78
utils/logger.js Normal file
View File

@@ -0,0 +1,78 @@
const winston = require('winston');
const DailyRotateFile = require('winston-daily-rotate-file');
const path = require('path');
// Define log format
const logFormat = winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.errors({ stack: true }),
winston.format.splat(),
winston.format.json()
);
// Console format (more readable)
const consoleFormat = winston.format.combine(
winston.format.colorize(),
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.printf(({ timestamp, level, message, ...meta }) => {
let msg = `${timestamp} [${level}]: ${message}`;
if (Object.keys(meta).length > 0) {
msg += ` ${JSON.stringify(meta)}`;
}
return msg;
})
);
// Create logs directory if it doesn't exist
const logsDir = path.join(__dirname, '..', 'logs');
// Logger configuration
const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: logFormat,
defaultMeta: { service: 'govt-travel-estimator' },
transports: [
// Error logs
new DailyRotateFile({
filename: path.join(logsDir, 'error-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
level: 'error',
maxFiles: '30d',
maxSize: '20m'
}),
// Combined logs
new DailyRotateFile({
filename: path.join(logsDir, 'combined-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '14d',
maxSize: '20m'
}),
// Console output
new winston.transports.Console({
format: consoleFormat
})
],
exceptionHandlers: [
new DailyRotateFile({
filename: path.join(logsDir, 'exceptions-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '30d'
})
],
rejectionHandlers: [
new DailyRotateFile({
filename: path.join(logsDir, 'rejections-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '30d'
})
]
});
// Create a stream object for Morgan (HTTP request logging)
logger.stream = {
write: (message) => {
logger.info(message.trim());
}
};
module.exports = logger;

129
utils/validation.js Normal file
View File

@@ -0,0 +1,129 @@
const Joi = require('joi');
// Flight search validation
const flightSearchSchema = Joi.object({
origin: Joi.string()
.min(2)
.max(100)
.required()
.trim()
.messages({
'string.empty': 'Origin city is required',
'string.min': 'Origin city must be at least 2 characters',
'string.max': 'Origin city cannot exceed 100 characters'
}),
destination: Joi.string()
.min(2)
.max(100)
.required()
.trim()
.messages({
'string.empty': 'Destination city is required',
'string.min': 'Destination city must be at least 2 characters',
'string.max': 'Destination city cannot exceed 100 characters'
}),
departureDate: Joi.date()
.iso()
.min('now')
.required()
.messages({
'date.base': 'Departure date must be a valid date',
'date.min': 'Departure date cannot be in the past',
'any.required': 'Departure date is required'
}),
returnDate: Joi.date()
.iso()
.min(Joi.ref('departureDate'))
.optional()
.allow(null, '')
.messages({
'date.base': 'Return date must be a valid date',
'date.min': 'Return date must be after departure date'
}),
adults: Joi.number()
.integer()
.min(1)
.max(9)
.default(1)
.messages({
'number.base': 'Number of adults must be a number',
'number.min': 'At least 1 adult is required',
'number.max': 'Maximum 9 adults allowed'
})
});
// Accommodation search validation
const accommodationSearchSchema = Joi.object({
city: Joi.string()
.min(2)
.max(100)
.required()
.trim()
.messages({
'string.empty': 'City name is required',
'string.min': 'City name must be at least 2 characters',
'string.max': 'City name cannot exceed 100 characters'
})
});
// City key validation
const cityKeySchema = Joi.object({
cityKey: Joi.string()
.min(2)
.max(100)
.required()
.trim()
.messages({
'string.empty': 'City key is required'
})
});
// Month validation
const monthSchema = Joi.object({
cityKey: Joi.string().required(),
month: Joi.number()
.integer()
.min(1)
.max(12)
.required()
.messages({
'number.min': 'Month must be between 1 and 12',
'number.max': 'Month must be between 1 and 12',
'any.required': 'Month is required'
})
});
// Validation middleware factory
const validate = (schema) => {
return (req, res, next) => {
const { error, value } = schema.validate(req.query, {
abortEarly: false,
stripUnknown: true
});
if (error) {
const errors = error.details.map(detail => ({
field: detail.path.join('.'),
message: detail.message
}));
return res.status(400).json({
success: false,
message: 'Validation failed',
errors
});
}
// Replace req.query with validated and sanitized values
req.query = value;
next();
};
};
module.exports = {
validate,
flightSearchSchema,
accommodationSearchSchema,
cityKeySchema,
monthSchema
};