mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
504
server.js
Normal file
504
server.js
Normal file
@@ -0,0 +1,504 @@
|
||||
require("dotenv").config();
|
||||
const express = require("express");
|
||||
const path = require("path");
|
||||
const helmet = require("helmet");
|
||||
const compression = require("compression");
|
||||
const cors = require("cors");
|
||||
const rateLimit = require("express-rate-limit");
|
||||
const { searchFlights, getAirportCode } = require("./flightService");
|
||||
const dbService = require("./services/databaseService");
|
||||
const logger = require("./utils/logger");
|
||||
const cache = require("./utils/cache");
|
||||
const {
|
||||
validate,
|
||||
flightSearchSchema,
|
||||
accommodationSearchSchema,
|
||||
} = require("./utils/validation");
|
||||
|
||||
const app = express();
|
||||
const PORT = process.env.PORT || 5001;
|
||||
|
||||
// Security middleware
|
||||
app.use(
|
||||
helmet({
|
||||
contentSecurityPolicy: {
|
||||
directives: {
|
||||
defaultSrc: ["'self'"],
|
||||
styleSrc: ["'self'", "'unsafe-inline'"],
|
||||
scriptSrc: ["'self'", "'unsafe-inline'"],
|
||||
imgSrc: ["'self'", "data:", "https:"],
|
||||
},
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
// Compression middleware
|
||||
app.use(compression());
|
||||
|
||||
// CORS configuration
|
||||
app.use(
|
||||
cors({
|
||||
origin: process.env.CORS_ORIGIN || "*",
|
||||
methods: ["GET", "POST"],
|
||||
allowedHeaders: ["Content-Type", "Authorization"],
|
||||
})
|
||||
);
|
||||
|
||||
// Rate limiting
|
||||
const apiLimiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 100, // Limit each IP to 100 requests per windowMs
|
||||
message: { error: "Too many requests, please try again later." },
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
});
|
||||
|
||||
const flightLimiter = rateLimit({
|
||||
windowMs: 5 * 60 * 1000, // 5 minutes
|
||||
max: 20, // Limit flight searches
|
||||
message: { error: "Too many flight searches, please try again later." },
|
||||
});
|
||||
|
||||
// Apply rate limiters
|
||||
app.use("/api/", apiLimiter);
|
||||
app.use("/api/flights/", flightLimiter);
|
||||
|
||||
// Body parsing middleware
|
||||
app.use(express.json({ limit: "10mb" }));
|
||||
app.use(express.urlencoded({ extended: true, limit: "10mb" }));
|
||||
|
||||
// Body parsing middleware
|
||||
app.use(express.json({ limit: "10mb" }));
|
||||
app.use(express.urlencoded({ extended: true, limit: "10mb" }));
|
||||
|
||||
// Request logging
|
||||
app.use((req, res, next) => {
|
||||
logger.info(`${req.method} ${req.url}`, {
|
||||
ip: req.ip,
|
||||
userAgent: req.get("user-agent"),
|
||||
});
|
||||
next();
|
||||
});
|
||||
|
||||
// Serve static files from the current directory
|
||||
app.use(
|
||||
express.static(__dirname, {
|
||||
maxAge: "1d",
|
||||
etag: true,
|
||||
})
|
||||
);
|
||||
|
||||
// Disable caching for HTML and JS files
|
||||
app.use((req, res, next) => {
|
||||
if (req.path.endsWith(".html") || req.path.endsWith(".js")) {
|
||||
res.set(
|
||||
"Cache-Control",
|
||||
"no-store, no-cache, must-revalidate, proxy-revalidate"
|
||||
);
|
||||
res.set("Pragma", "no-cache");
|
||||
res.set("Expires", "0");
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// Serve data directory explicitly
|
||||
app.use("/data", express.static(path.join(__dirname, "data")));
|
||||
|
||||
// Route for root
|
||||
app.get("/", (req, res) => {
|
||||
res.sendFile(path.join(__dirname, "index.html"));
|
||||
});
|
||||
|
||||
// Route for validation page
|
||||
app.get("/validation", (req, res) => {
|
||||
res.sendFile(path.join(__dirname, "validation.html"));
|
||||
});
|
||||
|
||||
// API endpoint to search flights with caching and validation
|
||||
app.get(
|
||||
"/api/flights/search",
|
||||
validate(flightSearchSchema),
|
||||
async (req, res) => {
|
||||
try {
|
||||
const {
|
||||
origin,
|
||||
destination,
|
||||
departureDate,
|
||||
returnDate,
|
||||
adults = 1,
|
||||
} = req.query;
|
||||
|
||||
// Check cache first
|
||||
const cached = cache.getFlight(
|
||||
origin,
|
||||
destination,
|
||||
departureDate,
|
||||
returnDate,
|
||||
adults
|
||||
);
|
||||
if (cached) {
|
||||
logger.info("Returning cached flight data");
|
||||
return res.json({ ...cached, cached: true });
|
||||
}
|
||||
|
||||
// Get airport codes from city names
|
||||
const originCode = getAirportCode(origin);
|
||||
const destinationCode = getAirportCode(destination);
|
||||
|
||||
if (!originCode || !destinationCode) {
|
||||
logger.warn(`Airport codes not found: ${origin} -> ${destination}`);
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
message: `Could not find airport codes for: ${
|
||||
!originCode ? origin : ""
|
||||
} ${!destinationCode ? destination : ""}`.trim(),
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`Searching flights: ${originCode} -> ${destinationCode}`);
|
||||
|
||||
// Search flights
|
||||
const result = await searchFlights(
|
||||
originCode,
|
||||
destinationCode,
|
||||
departureDate,
|
||||
returnDate,
|
||||
adults
|
||||
);
|
||||
|
||||
// Cache successful results
|
||||
if (result.success) {
|
||||
cache.setFlight(
|
||||
origin,
|
||||
destination,
|
||||
departureDate,
|
||||
returnDate,
|
||||
adults,
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
logger.error("Flight search error:", error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: "Internal server error",
|
||||
error:
|
||||
process.env.NODE_ENV === "development"
|
||||
? error.message
|
||||
: "An error occurred",
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Initialize database connection on startup
|
||||
(async () => {
|
||||
try {
|
||||
await dbService.connect();
|
||||
logger.info("✅ Database ready for queries");
|
||||
} catch (err) {
|
||||
logger.error("❌ Failed to connect to database:", err);
|
||||
logger.warn("⚠️ Falling back to JSON files");
|
||||
}
|
||||
})();
|
||||
|
||||
// ============ DATABASE SEARCH ENDPOINTS ============
|
||||
|
||||
/**
|
||||
* Search for a city with caching
|
||||
* GET /api/accommodation/search?city=canberra
|
||||
*/
|
||||
app.get(
|
||||
"/api/accommodation/search",
|
||||
validate(accommodationSearchSchema),
|
||||
async (req, res) => {
|
||||
try {
|
||||
const { city } = req.query;
|
||||
|
||||
// Check cache
|
||||
const cached = cache.getAccommodation(city);
|
||||
if (cached) {
|
||||
logger.debug(`Returning cached accommodation data for ${city}`);
|
||||
return res.json({ ...cached, cached: true });
|
||||
}
|
||||
|
||||
const results = await dbService.searchCity(city);
|
||||
|
||||
if (results.length === 0) {
|
||||
logger.warn(`City not found: ${city}`);
|
||||
return res.status(404).json({
|
||||
error: "City not found",
|
||||
message: `No accommodation rates found for: ${city}`,
|
||||
suggestion: "Try searching for a nearby major city",
|
||||
});
|
||||
}
|
||||
|
||||
const response = {
|
||||
query: city,
|
||||
results: results,
|
||||
count: results.length,
|
||||
};
|
||||
|
||||
// Cache the results
|
||||
cache.setAccommodation(city, response);
|
||||
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
logger.error("Accommodation search error:", error);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* Get exact rate by city key
|
||||
* GET /api/accommodation/rate?city=canberra
|
||||
*/
|
||||
app.get("/api/accommodation/rate", async (req, res) => {
|
||||
try {
|
||||
const { city, month } = req.query;
|
||||
|
||||
if (!city) {
|
||||
return res.status(400).json({ error: "Missing city parameter" });
|
||||
}
|
||||
|
||||
if (month) {
|
||||
const monthNum = parseInt(month);
|
||||
if (monthNum < 1 || monthNum > 12) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ error: "Month must be between 1 and 12" });
|
||||
}
|
||||
const rate = await dbService.getMonthlyRate(city, monthNum);
|
||||
res.json(rate || { error: "City not found" });
|
||||
} else {
|
||||
const rate = await dbService.getAccommodationRate(city);
|
||||
res.json(rate || { error: "City not found" });
|
||||
}
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Full-text search
|
||||
* GET /api/search?q=australia
|
||||
*/
|
||||
app.get("/api/search", async (req, res) => {
|
||||
try {
|
||||
const { q } = req.query;
|
||||
|
||||
if (!q) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ error: "Missing search query (q parameter)" });
|
||||
}
|
||||
|
||||
const results = await dbService.fullTextSearch(q);
|
||||
res.json({
|
||||
query: q,
|
||||
results: results,
|
||||
count: results.length,
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Autocomplete endpoint
|
||||
* GET /api/autocomplete?q=can
|
||||
*/
|
||||
app.get("/api/autocomplete", async (req, res) => {
|
||||
try {
|
||||
const { q } = req.query;
|
||||
|
||||
if (!q || q.length < 2) {
|
||||
return res.json({ suggestions: [] });
|
||||
}
|
||||
|
||||
const suggestions = await dbService.autocomplete(q);
|
||||
res.json({
|
||||
query: q,
|
||||
suggestions: suggestions,
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get cities by region
|
||||
* GET /api/cities/region?region=Oceania
|
||||
*/
|
||||
app.get("/api/cities/region", async (req, res) => {
|
||||
try {
|
||||
const { region } = req.query;
|
||||
|
||||
if (!region) {
|
||||
return res.status(400).json({ error: "Missing region parameter" });
|
||||
}
|
||||
|
||||
const cities = await dbService.getCitiesByRegion(region);
|
||||
res.json({
|
||||
region: region,
|
||||
cities: cities,
|
||||
count: cities.length,
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get cities by country
|
||||
* GET /api/cities/country?country=Australia
|
||||
*/
|
||||
app.get("/api/cities/country", async (req, res) => {
|
||||
try {
|
||||
const { country } = req.query;
|
||||
|
||||
if (!country) {
|
||||
return res.status(400).json({ error: "Missing country parameter" });
|
||||
}
|
||||
|
||||
const cities = await dbService.getCitiesByCountry(country);
|
||||
res.json({
|
||||
country: country,
|
||||
cities: cities,
|
||||
count: cities.length,
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* List all regions
|
||||
* GET /api/regions
|
||||
*/
|
||||
app.get("/api/regions", async (req, res) => {
|
||||
try {
|
||||
const regions = await dbService.getAllRegions();
|
||||
res.json({ regions: regions });
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* List all countries
|
||||
* GET /api/countries
|
||||
*/
|
||||
app.get("/api/countries", async (req, res) => {
|
||||
try {
|
||||
const countries = await dbService.getAllCountries();
|
||||
res.json({ countries: countries });
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Update health check to include database status
|
||||
app.get("/api/health", async (req, res) => {
|
||||
let dbStatus = "inactive";
|
||||
try {
|
||||
const regions = await dbService.getAllRegions();
|
||||
dbStatus = regions.length > 0 ? "active" : "empty";
|
||||
} catch (err) {
|
||||
dbStatus = "error";
|
||||
}
|
||||
|
||||
res.json({
|
||||
status: "healthy",
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
database: dbStatus,
|
||||
cache: cache.getStats(),
|
||||
version: "1.2.0",
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Cache management endpoint (development only)
|
||||
* GET /api/cache/clear
|
||||
*/
|
||||
if (process.env.NODE_ENV === "development") {
|
||||
app.get("/api/cache/clear", (req, res) => {
|
||||
cache.clearAll();
|
||||
logger.info("All caches cleared via API");
|
||||
res.json({ message: "All caches cleared" });
|
||||
});
|
||||
|
||||
app.get("/api/cache/stats", (req, res) => {
|
||||
res.json(cache.getStats());
|
||||
});
|
||||
}
|
||||
|
||||
// Global error handler
|
||||
app.use((err, req, res, next) => {
|
||||
logger.error("Unhandled error:", err);
|
||||
res.status(500).json({
|
||||
error: "Internal server error",
|
||||
message:
|
||||
process.env.NODE_ENV === "development"
|
||||
? err.message
|
||||
: "An unexpected error occurred",
|
||||
});
|
||||
});
|
||||
|
||||
// 404 handler
|
||||
app.use((req, res) => {
|
||||
logger.warn(`404 Not Found: ${req.url}`);
|
||||
res.status(404).json({ error: "Endpoint not found" });
|
||||
});
|
||||
|
||||
// Start server
|
||||
app.listen(PORT, () => {
|
||||
logger.info("==========================================");
|
||||
logger.info("Government Travel Cost Estimator v1.2.0");
|
||||
logger.info("==========================================");
|
||||
logger.info(`🚀 Server running on port ${PORT}`);
|
||||
logger.info(`📱 Main App: http://localhost:${PORT}`);
|
||||
logger.info(`🔍 Validation: http://localhost:${PORT}/validation.html`);
|
||||
logger.info(`✈️ Flight API: http://localhost:${PORT}/api/flights/search`);
|
||||
logger.info(
|
||||
`🏨 Accommodation API: http://localhost:${PORT}/api/accommodation/search`
|
||||
);
|
||||
logger.info(`❤️ Health Check: http://localhost:${PORT}/api/health`);
|
||||
logger.info("==========================================");
|
||||
|
||||
if (!process.env.AMADEUS_API_KEY || !process.env.AMADEUS_API_SECRET) {
|
||||
logger.warn("⚠️ WARNING: Amadeus API credentials not configured!");
|
||||
logger.warn(
|
||||
" Flight search will use sample data until credentials are added"
|
||||
);
|
||||
logger.warn(
|
||||
" Get free API key at: https://developers.amadeus.com/register"
|
||||
);
|
||||
} else {
|
||||
logger.info("✅ Amadeus API configured");
|
||||
}
|
||||
|
||||
logger.info(`📝 Log files: ${path.join(__dirname, "logs")}`);
|
||||
logger.info(`💾 Cache enabled: Flights (1h), Rates (24h), DB (5m)`);
|
||||
logger.info("==========================================");
|
||||
logger.info("Press Ctrl+C to stop the server");
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
process.on("SIGTERM", () => {
|
||||
logger.info("SIGTERM signal received: closing HTTP server");
|
||||
server.close(() => {
|
||||
logger.info("HTTP server closed");
|
||||
cache.clearAll();
|
||||
process.exit(0);
|
||||
});
|
||||
});
|
||||
|
||||
process.on("SIGINT", () => {
|
||||
logger.info("SIGINT signal received: closing HTTP server");
|
||||
process.exit(0);
|
||||
});
|
||||
Reference in New Issue
Block a user