mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
185
utils/cache.js
Normal file
185
utils/cache.js
Normal file
@@ -0,0 +1,185 @@
|
||||
const NodeCache = require('node-cache');
|
||||
const logger = require('./logger');
|
||||
|
||||
/**
|
||||
* Cache Service
|
||||
* Provides in-memory caching for API responses
|
||||
*/
|
||||
class CacheService {
|
||||
constructor() {
|
||||
// Flight cache: 1 hour TTL
|
||||
this.flightCache = new NodeCache({
|
||||
stdTTL: 3600,
|
||||
checkperiod: 600,
|
||||
useClones: false
|
||||
});
|
||||
|
||||
// Rate cache: 24 hours TTL (rates don't change often)
|
||||
this.rateCache = new NodeCache({
|
||||
stdTTL: 86400,
|
||||
checkperiod: 3600,
|
||||
useClones: false
|
||||
});
|
||||
|
||||
// Database query cache: 5 minutes TTL
|
||||
this.dbCache = new NodeCache({
|
||||
stdTTL: 300,
|
||||
checkperiod: 60,
|
||||
useClones: false
|
||||
});
|
||||
|
||||
// Set up event listeners
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
setupEventListeners() {
|
||||
// Flight cache events
|
||||
this.flightCache.on('set', (key, value) => {
|
||||
logger.debug(`Flight cache SET: ${key}`);
|
||||
});
|
||||
|
||||
this.flightCache.on('expired', (key, value) => {
|
||||
logger.debug(`Flight cache EXPIRED: ${key}`);
|
||||
});
|
||||
|
||||
// Rate cache events
|
||||
this.rateCache.on('set', (key, value) => {
|
||||
logger.debug(`Rate cache SET: ${key}`);
|
||||
});
|
||||
|
||||
// DB cache events
|
||||
this.dbCache.on('set', (key, value) => {
|
||||
logger.debug(`DB cache SET: ${key}`);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate cache key for flight searches
|
||||
*/
|
||||
generateFlightKey(origin, destination, departureDate, returnDate, adults = 1) {
|
||||
return `flight:${origin}:${destination}:${departureDate}:${returnDate}:${adults}`.toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate cache key for accommodation searches
|
||||
*/
|
||||
generateAccommodationKey(city) {
|
||||
return `accommodation:${city}`.toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate cache key for database queries
|
||||
*/
|
||||
generateDbKey(query, params) {
|
||||
const paramStr = params ? JSON.stringify(params) : '';
|
||||
return `db:${query}:${paramStr}`.toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get flight from cache
|
||||
*/
|
||||
getFlight(origin, destination, departureDate, returnDate, adults) {
|
||||
const key = this.generateFlightKey(origin, destination, departureDate, returnDate, adults);
|
||||
const cached = this.flightCache.get(key);
|
||||
|
||||
if (cached) {
|
||||
logger.info(`Flight cache HIT: ${key}`);
|
||||
return cached;
|
||||
}
|
||||
|
||||
logger.debug(`Flight cache MISS: ${key}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set flight in cache
|
||||
*/
|
||||
setFlight(origin, destination, departureDate, returnDate, adults, data) {
|
||||
const key = this.generateFlightKey(origin, destination, departureDate, returnDate, adults);
|
||||
this.flightCache.set(key, data);
|
||||
logger.info(`Flight cached: ${key}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get accommodation rate from cache
|
||||
*/
|
||||
getAccommodation(city) {
|
||||
const key = this.generateAccommodationKey(city);
|
||||
const cached = this.rateCache.get(key);
|
||||
|
||||
if (cached) {
|
||||
logger.debug(`Accommodation cache HIT: ${key}`);
|
||||
return cached;
|
||||
}
|
||||
|
||||
logger.debug(`Accommodation cache MISS: ${key}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set accommodation rate in cache
|
||||
*/
|
||||
setAccommodation(city, data) {
|
||||
const key = this.generateAccommodationKey(city);
|
||||
this.rateCache.set(key, data);
|
||||
logger.debug(`Accommodation cached: ${key}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get database query result from cache
|
||||
*/
|
||||
getDbQuery(query, params) {
|
||||
const key = this.generateDbKey(query, params);
|
||||
return this.dbCache.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set database query result in cache
|
||||
*/
|
||||
setDbQuery(query, params, data) {
|
||||
const key = this.generateDbKey(query, params);
|
||||
this.dbCache.set(key, data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear specific cache
|
||||
*/
|
||||
clearFlightCache() {
|
||||
this.flightCache.flushAll();
|
||||
logger.info('Flight cache cleared');
|
||||
}
|
||||
|
||||
clearRateCache() {
|
||||
this.rateCache.flushAll();
|
||||
logger.info('Rate cache cleared');
|
||||
}
|
||||
|
||||
clearDbCache() {
|
||||
this.dbCache.flushAll();
|
||||
logger.info('DB cache cleared');
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all caches
|
||||
*/
|
||||
clearAll() {
|
||||
this.clearFlightCache();
|
||||
this.clearRateCache();
|
||||
this.clearDbCache();
|
||||
logger.info('All caches cleared');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getStats() {
|
||||
return {
|
||||
flights: this.flightCache.getStats(),
|
||||
rates: this.rateCache.getStats(),
|
||||
database: this.dbCache.getStats()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
module.exports = new CacheService();
|
||||
78
utils/logger.js
Normal file
78
utils/logger.js
Normal file
@@ -0,0 +1,78 @@
|
||||
const winston = require('winston');
|
||||
const DailyRotateFile = require('winston-daily-rotate-file');
|
||||
const path = require('path');
|
||||
|
||||
// Define log format
|
||||
const logFormat = winston.format.combine(
|
||||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
winston.format.errors({ stack: true }),
|
||||
winston.format.splat(),
|
||||
winston.format.json()
|
||||
);
|
||||
|
||||
// Console format (more readable)
|
||||
const consoleFormat = winston.format.combine(
|
||||
winston.format.colorize(),
|
||||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
winston.format.printf(({ timestamp, level, message, ...meta }) => {
|
||||
let msg = `${timestamp} [${level}]: ${message}`;
|
||||
if (Object.keys(meta).length > 0) {
|
||||
msg += ` ${JSON.stringify(meta)}`;
|
||||
}
|
||||
return msg;
|
||||
})
|
||||
);
|
||||
|
||||
// Create logs directory if it doesn't exist
|
||||
const logsDir = path.join(__dirname, '..', 'logs');
|
||||
|
||||
// Logger configuration
|
||||
const logger = winston.createLogger({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
format: logFormat,
|
||||
defaultMeta: { service: 'govt-travel-estimator' },
|
||||
transports: [
|
||||
// Error logs
|
||||
new DailyRotateFile({
|
||||
filename: path.join(logsDir, 'error-%DATE%.log'),
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
level: 'error',
|
||||
maxFiles: '30d',
|
||||
maxSize: '20m'
|
||||
}),
|
||||
// Combined logs
|
||||
new DailyRotateFile({
|
||||
filename: path.join(logsDir, 'combined-%DATE%.log'),
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
maxFiles: '14d',
|
||||
maxSize: '20m'
|
||||
}),
|
||||
// Console output
|
||||
new winston.transports.Console({
|
||||
format: consoleFormat
|
||||
})
|
||||
],
|
||||
exceptionHandlers: [
|
||||
new DailyRotateFile({
|
||||
filename: path.join(logsDir, 'exceptions-%DATE%.log'),
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
maxFiles: '30d'
|
||||
})
|
||||
],
|
||||
rejectionHandlers: [
|
||||
new DailyRotateFile({
|
||||
filename: path.join(logsDir, 'rejections-%DATE%.log'),
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
maxFiles: '30d'
|
||||
})
|
||||
]
|
||||
});
|
||||
|
||||
// Create a stream object for Morgan (HTTP request logging)
|
||||
logger.stream = {
|
||||
write: (message) => {
|
||||
logger.info(message.trim());
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = logger;
|
||||
129
utils/validation.js
Normal file
129
utils/validation.js
Normal file
@@ -0,0 +1,129 @@
|
||||
const Joi = require('joi');
|
||||
|
||||
// Flight search validation
|
||||
const flightSearchSchema = Joi.object({
|
||||
origin: Joi.string()
|
||||
.min(2)
|
||||
.max(100)
|
||||
.required()
|
||||
.trim()
|
||||
.messages({
|
||||
'string.empty': 'Origin city is required',
|
||||
'string.min': 'Origin city must be at least 2 characters',
|
||||
'string.max': 'Origin city cannot exceed 100 characters'
|
||||
}),
|
||||
destination: Joi.string()
|
||||
.min(2)
|
||||
.max(100)
|
||||
.required()
|
||||
.trim()
|
||||
.messages({
|
||||
'string.empty': 'Destination city is required',
|
||||
'string.min': 'Destination city must be at least 2 characters',
|
||||
'string.max': 'Destination city cannot exceed 100 characters'
|
||||
}),
|
||||
departureDate: Joi.date()
|
||||
.iso()
|
||||
.min('now')
|
||||
.required()
|
||||
.messages({
|
||||
'date.base': 'Departure date must be a valid date',
|
||||
'date.min': 'Departure date cannot be in the past',
|
||||
'any.required': 'Departure date is required'
|
||||
}),
|
||||
returnDate: Joi.date()
|
||||
.iso()
|
||||
.min(Joi.ref('departureDate'))
|
||||
.optional()
|
||||
.allow(null, '')
|
||||
.messages({
|
||||
'date.base': 'Return date must be a valid date',
|
||||
'date.min': 'Return date must be after departure date'
|
||||
}),
|
||||
adults: Joi.number()
|
||||
.integer()
|
||||
.min(1)
|
||||
.max(9)
|
||||
.default(1)
|
||||
.messages({
|
||||
'number.base': 'Number of adults must be a number',
|
||||
'number.min': 'At least 1 adult is required',
|
||||
'number.max': 'Maximum 9 adults allowed'
|
||||
})
|
||||
});
|
||||
|
||||
// Accommodation search validation
|
||||
const accommodationSearchSchema = Joi.object({
|
||||
city: Joi.string()
|
||||
.min(2)
|
||||
.max(100)
|
||||
.required()
|
||||
.trim()
|
||||
.messages({
|
||||
'string.empty': 'City name is required',
|
||||
'string.min': 'City name must be at least 2 characters',
|
||||
'string.max': 'City name cannot exceed 100 characters'
|
||||
})
|
||||
});
|
||||
|
||||
// City key validation
|
||||
const cityKeySchema = Joi.object({
|
||||
cityKey: Joi.string()
|
||||
.min(2)
|
||||
.max(100)
|
||||
.required()
|
||||
.trim()
|
||||
.messages({
|
||||
'string.empty': 'City key is required'
|
||||
})
|
||||
});
|
||||
|
||||
// Month validation
|
||||
const monthSchema = Joi.object({
|
||||
cityKey: Joi.string().required(),
|
||||
month: Joi.number()
|
||||
.integer()
|
||||
.min(1)
|
||||
.max(12)
|
||||
.required()
|
||||
.messages({
|
||||
'number.min': 'Month must be between 1 and 12',
|
||||
'number.max': 'Month must be between 1 and 12',
|
||||
'any.required': 'Month is required'
|
||||
})
|
||||
});
|
||||
|
||||
// Validation middleware factory
|
||||
const validate = (schema) => {
|
||||
return (req, res, next) => {
|
||||
const { error, value } = schema.validate(req.query, {
|
||||
abortEarly: false,
|
||||
stripUnknown: true
|
||||
});
|
||||
|
||||
if (error) {
|
||||
const errors = error.details.map(detail => ({
|
||||
field: detail.path.join('.'),
|
||||
message: detail.message
|
||||
}));
|
||||
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
message: 'Validation failed',
|
||||
errors
|
||||
});
|
||||
}
|
||||
|
||||
// Replace req.query with validated and sanitized values
|
||||
req.query = value;
|
||||
next();
|
||||
};
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
validate,
|
||||
flightSearchSchema,
|
||||
accommodationSearchSchema,
|
||||
cityKeySchema,
|
||||
monthSchema
|
||||
};
|
||||
Reference in New Issue
Block a user