Add Python web scraper for NJC travel rates with currency extraction

- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website
- Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.)
- Added country extraction from table titles for international rates
- Flatten pandas MultiIndex columns for cleaner data structure
- Default to CAD for domestic Canadian sources (accommodations and domestic tables)
- Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations)
- Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities
- Added migration script to convert scraped data to Node.js database format
- Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3)
- Fixed city validation and currency conversion in main app
- Added comprehensive debug and verification scripts

This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
2026-01-13 09:21:43 -05:00
commit 15094ac94b
84 changed files with 19859 additions and 0 deletions

78
utils/logger.js Normal file
View File

@@ -0,0 +1,78 @@
const winston = require('winston');
const DailyRotateFile = require('winston-daily-rotate-file');
const path = require('path');
// Define log format
const logFormat = winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.errors({ stack: true }),
winston.format.splat(),
winston.format.json()
);
// Console format (more readable)
const consoleFormat = winston.format.combine(
winston.format.colorize(),
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.printf(({ timestamp, level, message, ...meta }) => {
let msg = `${timestamp} [${level}]: ${message}`;
if (Object.keys(meta).length > 0) {
msg += ` ${JSON.stringify(meta)}`;
}
return msg;
})
);
// Create logs directory if it doesn't exist
const logsDir = path.join(__dirname, '..', 'logs');
// Logger configuration
const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: logFormat,
defaultMeta: { service: 'govt-travel-estimator' },
transports: [
// Error logs
new DailyRotateFile({
filename: path.join(logsDir, 'error-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
level: 'error',
maxFiles: '30d',
maxSize: '20m'
}),
// Combined logs
new DailyRotateFile({
filename: path.join(logsDir, 'combined-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '14d',
maxSize: '20m'
}),
// Console output
new winston.transports.Console({
format: consoleFormat
})
],
exceptionHandlers: [
new DailyRotateFile({
filename: path.join(logsDir, 'exceptions-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '30d'
})
],
rejectionHandlers: [
new DailyRotateFile({
filename: path.join(logsDir, 'rejections-%DATE%.log'),
datePattern: 'YYYY-MM-DD',
maxFiles: '30d'
})
]
});
// Create a stream object for Morgan (HTTP request logging)
logger.stream = {
write: (message) => {
logger.info(message.trim());
}
};
module.exports = logger;