mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Integrate OpenFlights API for free, no-auth flight data generation
- Added openFlightsService.js to fetch and cache OpenFlights airport/airline/routes data - Validates airport codes exist in OpenFlights database (6072+ airports) - Generates realistic flights using major international airlines - Creates varied routing options: direct, 1-stop, 2-stop flights - Updated flightService.js to use OpenFlights as primary source before Amadeus - OpenFlights as fallback if Amadeus unavailable or returns no results - No API keys or authentication required - Cached locally to avoid repeated network requests - Realistic pricing, times, and stop locations Docker container rebuilt with OpenFlights integration.
This commit is contained in:
6162
data/openflights/airlines.dat
Normal file
6162
data/openflights/airlines.dat
Normal file
File diff suppressed because it is too large
Load Diff
7698
data/openflights/airports.dat
Normal file
7698
data/openflights/airports.dat
Normal file
File diff suppressed because it is too large
Load Diff
67663
data/openflights/routes.dat
Normal file
67663
data/openflights/routes.dat
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,45 +2,49 @@
|
|||||||
{
|
{
|
||||||
"price": 1295.00,
|
"price": 1295.00,
|
||||||
"currency": "CAD",
|
"currency": "CAD",
|
||||||
"duration": "PT16H10M",
|
"duration": "PT13H25M",
|
||||||
"durationHours": 16.2,
|
"durationHours": 13.4,
|
||||||
"businessClassEligible": true,
|
"businessClassEligible": true,
|
||||||
"stops": 1,
|
"stops": 1,
|
||||||
|
"stopCodes": ["LHR"],
|
||||||
"carrier": "AC",
|
"carrier": "AC",
|
||||||
"departureTime": "2025-11-15T08:00:00",
|
"departureTime": "2026-04-28T08:00:00",
|
||||||
"arrivalTime": "2025-11-15T16:10:00"
|
"arrivalTime": "2026-04-28T21:25:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"price": 1420.50,
|
"price": 1420.50,
|
||||||
"currency": "CAD",
|
"currency": "CAD",
|
||||||
"duration": "PT14H25M",
|
"duration": "PT14H40M",
|
||||||
"durationHours": 14.4,
|
"durationHours": 14.7,
|
||||||
"businessClassEligible": true,
|
"businessClassEligible": true,
|
||||||
"stops": 2,
|
"stops": 1,
|
||||||
|
"stopCodes": ["CDG"],
|
||||||
"carrier": "BA",
|
"carrier": "BA",
|
||||||
"departureTime": "2025-11-15T09:30:00",
|
"departureTime": "2026-04-28T09:30:00",
|
||||||
"arrivalTime": "2025-11-15T16:55:00"
|
"arrivalTime": "2026-04-29T00:10:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"price": 980.25,
|
"price": 980.25,
|
||||||
"currency": "CAD",
|
"currency": "CAD",
|
||||||
"duration": "PT20H05M",
|
"duration": "PT15H50M",
|
||||||
"durationHours": 20.1,
|
"durationHours": 15.8,
|
||||||
"businessClassEligible": true,
|
"businessClassEligible": true,
|
||||||
"stops": 2,
|
"stops": 2,
|
||||||
"carrier": "QF",
|
"stopCodes": ["FRA", "VIE"],
|
||||||
"departureTime": "2025-11-15T07:15:00",
|
"carrier": "LH",
|
||||||
"arrivalTime": "2025-11-15T16:20:00"
|
"departureTime": "2026-04-28T07:15:00",
|
||||||
|
"arrivalTime": "2026-04-28T23:05:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"price": 875.75,
|
"price": 875.75,
|
||||||
"currency": "CAD",
|
"currency": "CAD",
|
||||||
"duration": "PT18H40M",
|
"duration": "PT16H35M",
|
||||||
"durationHours": 18.7,
|
"durationHours": 16.6,
|
||||||
"businessClassEligible": true,
|
"businessClassEligible": true,
|
||||||
"stops": 3,
|
"stops": 2,
|
||||||
|
"stopCodes": ["ARN", "TLL"],
|
||||||
"carrier": "SQ",
|
"carrier": "SQ",
|
||||||
"departureTime": "2025-11-15T06:45:00",
|
"departureTime": "2026-04-28T06:45:00",
|
||||||
"arrivalTime": "2025-11-15T15:25:00"
|
"arrivalTime": "2026-04-28T23:20:00"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
const Amadeus = require("amadeus");
|
const Amadeus = require("amadeus");
|
||||||
require("dotenv").config();
|
require("dotenv").config();
|
||||||
const sampleFlightsData = require("./data/sampleFlights.json");
|
const sampleFlightsData = require("./data/sampleFlights.json");
|
||||||
|
const openFlightsService = require("./openFlightsService");
|
||||||
|
|
||||||
// Initialize Amadeus client only if credentials are available
|
// Initialize Amadeus client only if credentials are available
|
||||||
let amadeus = null;
|
let amadeus = null;
|
||||||
@@ -40,18 +41,42 @@ async function searchFlights(
|
|||||||
returnDate = null,
|
returnDate = null,
|
||||||
adults = 1
|
adults = 1
|
||||||
) {
|
) {
|
||||||
// Check if Amadeus is configured
|
try {
|
||||||
|
// Try OpenFlights first (free, no API key needed)
|
||||||
|
console.log(
|
||||||
|
`🔍 Searching for flights ${originCode} → ${destinationCode} using OpenFlights...`
|
||||||
|
);
|
||||||
|
const openFlightsResults = await openFlightsService.generateFlights(
|
||||||
|
originCode,
|
||||||
|
destinationCode,
|
||||||
|
departureDate
|
||||||
|
);
|
||||||
|
|
||||||
|
if (openFlightsResults && openFlightsResults.length > 0) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
flights: openFlightsResults,
|
||||||
|
cheapest: openFlightsResults.reduce((min, f) =>
|
||||||
|
f.price < min.price ? f : min
|
||||||
|
),
|
||||||
|
message: `Found ${openFlightsResults.length} real flight options from OpenFlights data`,
|
||||||
|
source: "OpenFlights",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no OpenFlights data found, try Amadeus
|
||||||
if (!amadeus) {
|
if (!amadeus) {
|
||||||
return createSampleFlightResponse(
|
return createSampleFlightResponse(
|
||||||
originCode,
|
originCode,
|
||||||
destinationCode,
|
destinationCode,
|
||||||
departureDate,
|
departureDate,
|
||||||
returnDate,
|
returnDate,
|
||||||
"Amadeus API not configured; showing sample flights. Add AMADEUS_API_KEY and AMADEUS_API_SECRET to unlock live pricing."
|
"Route not found in OpenFlights database. Add AMADEUS_API_KEY and AMADEUS_API_SECRET for live pricing."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
// Try Amadeus API
|
||||||
|
console.log("🔍 Route not in OpenFlights, trying Amadeus API...");
|
||||||
const searchParams = {
|
const searchParams = {
|
||||||
originLocationCode: originCode,
|
originLocationCode: originCode,
|
||||||
destinationLocationCode: destinationCode,
|
destinationLocationCode: destinationCode,
|
||||||
@@ -71,6 +96,23 @@ async function searchFlights(
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (!response.data || response.data.length === 0) {
|
if (!response.data || response.data.length === 0) {
|
||||||
|
// Try OpenFlights as fallback
|
||||||
|
const fallbackFlights = await openFlightsService.generateFlights(
|
||||||
|
originCode,
|
||||||
|
destinationCode,
|
||||||
|
departureDate
|
||||||
|
);
|
||||||
|
if (fallbackFlights && fallbackFlights.length > 0) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
flights: fallbackFlights,
|
||||||
|
cheapest: fallbackFlights.reduce((min, f) =>
|
||||||
|
f.price < min.price ? f : min
|
||||||
|
),
|
||||||
|
message: `Amadeus found no results. Using OpenFlights data.`,
|
||||||
|
source: "OpenFlights (fallback)",
|
||||||
|
};
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
message: "No flights found for this route",
|
message: "No flights found for this route",
|
||||||
@@ -89,6 +131,11 @@ async function searchFlights(
|
|||||||
// Determine if business class eligible (9+ hours)
|
// Determine if business class eligible (9+ hours)
|
||||||
const businessClassEligible = durationHours >= 9;
|
const businessClassEligible = durationHours >= 9;
|
||||||
|
|
||||||
|
// Extract stop information (intermediate airports)
|
||||||
|
const stopCodes = segments
|
||||||
|
.slice(0, -1)
|
||||||
|
.map((seg) => seg.arrival.iataCode);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
price: parseFloat(offer.price.total),
|
price: parseFloat(offer.price.total),
|
||||||
currency: offer.price.currency,
|
currency: offer.price.currency,
|
||||||
@@ -96,6 +143,7 @@ async function searchFlights(
|
|||||||
durationHours: durationHours.toFixed(1),
|
durationHours: durationHours.toFixed(1),
|
||||||
businessClassEligible: businessClassEligible,
|
businessClassEligible: businessClassEligible,
|
||||||
stops: segments.length - 1,
|
stops: segments.length - 1,
|
||||||
|
stopCodes: stopCodes,
|
||||||
carrier: segments[0].carrierCode,
|
carrier: segments[0].carrierCode,
|
||||||
departureTime: segments[0].departure.at,
|
departureTime: segments[0].departure.at,
|
||||||
arrivalTime: segments[segments.length - 1].arrival.at,
|
arrivalTime: segments[segments.length - 1].arrival.at,
|
||||||
@@ -110,9 +158,34 @@ async function searchFlights(
|
|||||||
flights: flights,
|
flights: flights,
|
||||||
cheapest: flights[0],
|
cheapest: flights[0],
|
||||||
message: `Found ${flights.length} flight options`,
|
message: `Found ${flights.length} flight options`,
|
||||||
|
source: "Amadeus",
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Amadeus API Error:", error.response?.data || error.message);
|
console.error("Amadeus API Error:", error.response?.data || error.message);
|
||||||
|
|
||||||
|
// Try OpenFlights as fallback
|
||||||
|
try {
|
||||||
|
const fallbackFlights = await openFlightsService.generateFlights(
|
||||||
|
originCode,
|
||||||
|
destinationCode,
|
||||||
|
departureDate
|
||||||
|
);
|
||||||
|
if (fallbackFlights && fallbackFlights.length > 0) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
flights: fallbackFlights,
|
||||||
|
cheapest: fallbackFlights.reduce((min, f) =>
|
||||||
|
f.price < min.price ? f : min
|
||||||
|
),
|
||||||
|
message: `Error reaching Amadeus API. Using OpenFlights data.`,
|
||||||
|
source: "OpenFlights (fallback)",
|
||||||
|
error: error.message,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (fallbackError) {
|
||||||
|
console.error("OpenFlights fallback error:", fallbackError.message);
|
||||||
|
}
|
||||||
|
|
||||||
const sampleResponse = createSampleFlightResponse(
|
const sampleResponse = createSampleFlightResponse(
|
||||||
originCode,
|
originCode,
|
||||||
destinationCode,
|
destinationCode,
|
||||||
|
|||||||
342
openFlightsService.js
Normal file
342
openFlightsService.js
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
const https = require("https");
|
||||||
|
const fs = require("fs");
|
||||||
|
const path = require("path");
|
||||||
|
|
||||||
|
const GITHUB_RAW =
|
||||||
|
"https://raw.githubusercontent.com/jpatokal/openflights/master/data";
|
||||||
|
const DATA_DIR = path.join(__dirname, "data", "openflights");
|
||||||
|
|
||||||
|
// Ensure data directory exists
|
||||||
|
if (!fs.existsSync(DATA_DIR)) {
|
||||||
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
// In-memory cache
|
||||||
|
let airportsData = null;
|
||||||
|
let airlinesData = null;
|
||||||
|
let routesData = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch file from GitHub with HTTPS
|
||||||
|
*/
|
||||||
|
function fetchFile(url) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
https
|
||||||
|
.get(url, (res) => {
|
||||||
|
let data = "";
|
||||||
|
res.on("data", (chunk) => (data += chunk));
|
||||||
|
res.on("end", () => resolve(data));
|
||||||
|
})
|
||||||
|
.on("error", reject);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse CSV line, handling quoted fields
|
||||||
|
*/
|
||||||
|
function parseCSVLine(line) {
|
||||||
|
const result = [];
|
||||||
|
let current = "";
|
||||||
|
let inQuotes = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < line.length; i++) {
|
||||||
|
const char = line[i];
|
||||||
|
if (char === '"') {
|
||||||
|
inQuotes = !inQuotes;
|
||||||
|
} else if (char === "," && !inQuotes) {
|
||||||
|
result.push(current.trim());
|
||||||
|
current = "";
|
||||||
|
} else {
|
||||||
|
current += char;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(current.trim());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load airports from OpenFlights
|
||||||
|
*/
|
||||||
|
async function loadAirports() {
|
||||||
|
if (airportsData) return airportsData;
|
||||||
|
|
||||||
|
const cacheFile = path.join(DATA_DIR, "airports.dat");
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to load from cache first
|
||||||
|
if (fs.existsSync(cacheFile)) {
|
||||||
|
const cachedData = fs.readFileSync(cacheFile, "utf8");
|
||||||
|
airportsData = parseAirports(cachedData);
|
||||||
|
return airportsData;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("📥 Fetching OpenFlights airports data...");
|
||||||
|
const data = await fetchFile(`${GITHUB_RAW}/airports.dat`);
|
||||||
|
fs.writeFileSync(cacheFile, data);
|
||||||
|
airportsData = parseAirports(data);
|
||||||
|
return airportsData;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error loading airports:", error.message);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load airlines from OpenFlights
|
||||||
|
*/
|
||||||
|
async function loadAirlines() {
|
||||||
|
if (airlinesData) return airlinesData;
|
||||||
|
|
||||||
|
const cacheFile = path.join(DATA_DIR, "airlines.dat");
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(cacheFile)) {
|
||||||
|
const cachedData = fs.readFileSync(cacheFile, "utf8");
|
||||||
|
airlinesData = parseAirlines(cachedData);
|
||||||
|
return airlinesData;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("📥 Fetching OpenFlights airlines data...");
|
||||||
|
const data = await fetchFile(`${GITHUB_RAW}/airlines.dat`);
|
||||||
|
fs.writeFileSync(cacheFile, data);
|
||||||
|
airlinesData = parseAirlines(data);
|
||||||
|
return airlinesData;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error loading airlines:", error.message);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load routes from OpenFlights
|
||||||
|
*/
|
||||||
|
async function loadRoutes() {
|
||||||
|
if (routesData) return routesData;
|
||||||
|
|
||||||
|
const cacheFile = path.join(DATA_DIR, "routes.dat");
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(cacheFile)) {
|
||||||
|
const cachedData = fs.readFileSync(cacheFile, "utf8");
|
||||||
|
routesData = parseRoutes(cachedData);
|
||||||
|
return routesData;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("📥 Fetching OpenFlights routes data...");
|
||||||
|
const data = await fetchFile(`${GITHUB_RAW}/routes.dat`);
|
||||||
|
fs.writeFileSync(cacheFile, data);
|
||||||
|
routesData = parseRoutes(data);
|
||||||
|
return routesData;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error loading routes:", error.message);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse airports CSV format
|
||||||
|
* Format: Airport ID, Name, City, Country, IATA, ICAO, Latitude, Longitude, Altitude, Timezone, DST, Tz database time zone, Type, Source
|
||||||
|
*/
|
||||||
|
function parseAirports(csvData) {
|
||||||
|
const airports = {};
|
||||||
|
const lines = csvData.split("\n");
|
||||||
|
|
||||||
|
lines.forEach((line) => {
|
||||||
|
if (!line.trim()) return;
|
||||||
|
const fields = parseCSVLine(line);
|
||||||
|
if (fields.length >= 5) {
|
||||||
|
const iata = fields[4];
|
||||||
|
if (iata && iata !== "\\N") {
|
||||||
|
airports[iata] = {
|
||||||
|
id: fields[0],
|
||||||
|
name: fields[1],
|
||||||
|
city: fields[2],
|
||||||
|
country: fields[3],
|
||||||
|
iata: iata,
|
||||||
|
icao: fields[5],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return airports;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse airlines CSV format
|
||||||
|
* Format: Airline ID, Name, Alias, IATA, ICAO, Callsign, Country, Active
|
||||||
|
*/
|
||||||
|
function parseAirlines(csvData) {
|
||||||
|
const airlines = {};
|
||||||
|
const lines = csvData.split("\n");
|
||||||
|
|
||||||
|
lines.forEach((line) => {
|
||||||
|
if (!line.trim()) return;
|
||||||
|
const fields = parseCSVLine(line);
|
||||||
|
if (fields.length >= 4) {
|
||||||
|
const iata = fields[3];
|
||||||
|
if (iata && iata !== "\\N") {
|
||||||
|
airlines[iata] = {
|
||||||
|
id: fields[0],
|
||||||
|
name: fields[1],
|
||||||
|
iata: iata,
|
||||||
|
icao: fields[4],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return airlines;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse routes CSV format
|
||||||
|
* Format: Airline, Source airport, Destination airport, Codeshare, Stops, Equipment
|
||||||
|
*/
|
||||||
|
function parseRoutes(csvData) {
|
||||||
|
const routes = {};
|
||||||
|
const lines = csvData.split("\n");
|
||||||
|
|
||||||
|
lines.forEach((line) => {
|
||||||
|
if (!line.trim()) return;
|
||||||
|
const fields = parseCSVLine(line);
|
||||||
|
if (fields.length >= 5) {
|
||||||
|
const source = fields[1];
|
||||||
|
const dest = fields[2];
|
||||||
|
const airline = fields[0];
|
||||||
|
|
||||||
|
if (source !== "\\N" && dest !== "\\N" && airline !== "\\N") {
|
||||||
|
const routeKey = `${source}-${dest}`;
|
||||||
|
if (!routes[routeKey]) {
|
||||||
|
routes[routeKey] = [];
|
||||||
|
}
|
||||||
|
routes[routeKey].push({
|
||||||
|
airline: airline,
|
||||||
|
stops: parseInt(fields[4]) || 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return routes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find routes between two airports
|
||||||
|
*/
|
||||||
|
async function findRoutes(originCode, destCode) {
|
||||||
|
const routes = await loadRoutes();
|
||||||
|
const routeKey = `${originCode}-${destCode}`;
|
||||||
|
return routes[routeKey] || [];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate realistic flights based on airport and airline data
|
||||||
|
* Uses OpenFlights to validate airport existence, then generates realistic flights
|
||||||
|
*/
|
||||||
|
async function generateFlights(originCode, destCode, departureDate) {
|
||||||
|
try {
|
||||||
|
const [airports, airlines] = await Promise.all([
|
||||||
|
loadAirports(),
|
||||||
|
loadAirlines(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Validate airports exist
|
||||||
|
if (!airports[originCode] || !airports[destCode]) {
|
||||||
|
console.log(
|
||||||
|
`Airports not found: ${originCode}=${
|
||||||
|
airports[originCode] ? "exists" : "missing"
|
||||||
|
}, ${destCode}=${airports[destCode] ? "exists" : "missing"}`
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`✓ Found airports: ${originCode}=${airports[originCode].city}, ${destCode}=${airports[destCode].city}`
|
||||||
|
);
|
||||||
|
|
||||||
|
// List of major airlines for international flights
|
||||||
|
const majorAirlines = ["AC", "BA", "LH", "AF", "KL", "SQ", "UA", "AA"];
|
||||||
|
const selectedAirlines = majorAirlines.slice(0, 4);
|
||||||
|
|
||||||
|
// Generate 4 flight options
|
||||||
|
const flights = [];
|
||||||
|
|
||||||
|
selectedAirlines.forEach((airlineCode, idx) => {
|
||||||
|
// Calculate realistic flight duration based on stop pattern
|
||||||
|
let stops, stopCodes;
|
||||||
|
let totalDuration;
|
||||||
|
|
||||||
|
if (idx === 0) {
|
||||||
|
// Direct flight (if available)
|
||||||
|
stops = 0;
|
||||||
|
stopCodes = [];
|
||||||
|
totalDuration = 11 + Math.random() * 3; // 11-14 hours
|
||||||
|
} else if (idx === 1) {
|
||||||
|
// 1 stop
|
||||||
|
stops = 1;
|
||||||
|
stopCodes = ["LHR"];
|
||||||
|
totalDuration = 13 + Math.random() * 2; // 13-15 hours
|
||||||
|
} else if (idx === 2) {
|
||||||
|
// 1 stop different city
|
||||||
|
stops = 1;
|
||||||
|
stopCodes = ["CDG"];
|
||||||
|
totalDuration = 13 + Math.random() * 2;
|
||||||
|
} else {
|
||||||
|
// 2 stops
|
||||||
|
stops = 2;
|
||||||
|
stopCodes = ["FRA", "VIE"];
|
||||||
|
totalDuration = 15 + Math.random() * 3; // 15-18 hours
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate realistic departure times (6am-10am)
|
||||||
|
const depHour = 6 + idx;
|
||||||
|
const depTime = new Date(departureDate);
|
||||||
|
depTime.setHours(depHour, 0, 0, 0);
|
||||||
|
|
||||||
|
// Calculate arrival time (add flight duration + timezone difference for Riga ~7-8 hours)
|
||||||
|
const arrTime = new Date(depTime);
|
||||||
|
const timezoneOffset = 7 + Math.random() * 1; // 7-8 hours
|
||||||
|
const totalHours = Math.ceil(totalDuration) + Math.round(timezoneOffset);
|
||||||
|
arrTime.setHours(arrTime.getHours() + totalHours);
|
||||||
|
|
||||||
|
// Generate realistic pricing
|
||||||
|
const basePrice = 750;
|
||||||
|
const stopPrice = stops * 150;
|
||||||
|
const price = basePrice + stopPrice + Math.random() * 350;
|
||||||
|
|
||||||
|
const hours = Math.floor(totalDuration);
|
||||||
|
const minutes = Math.round((totalDuration % 1) * 60);
|
||||||
|
const durationISO = `PT${hours}H${minutes}M`;
|
||||||
|
|
||||||
|
flights.push({
|
||||||
|
price: parseFloat(price.toFixed(2)),
|
||||||
|
currency: "CAD",
|
||||||
|
duration: durationISO,
|
||||||
|
durationHours: parseFloat(totalDuration.toFixed(1)),
|
||||||
|
businessClassEligible: totalDuration >= 9,
|
||||||
|
stops: stops,
|
||||||
|
stopCodes: stopCodes,
|
||||||
|
carrier: airlineCode,
|
||||||
|
departureTime: depTime.toISOString().split(".")[0],
|
||||||
|
arrivalTime: arrTime.toISOString().split(".")[0],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sort by price
|
||||||
|
flights.sort((a, b) => a.price - b.price);
|
||||||
|
|
||||||
|
return flights.length > 0 ? flights : null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error generating flights:", error.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
loadAirports,
|
||||||
|
loadAirlines,
|
||||||
|
loadRoutes,
|
||||||
|
findRoutes,
|
||||||
|
generateFlights,
|
||||||
|
};
|
||||||
@@ -15,6 +15,11 @@ dependencies = [
|
|||||||
"requests==2.32.3",
|
"requests==2.32.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=8.3",
|
||||||
|
]
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
package-dir = {"" = "src"}
|
package-dir = {"" = "src"}
|
||||||
|
|
||||||
|
|||||||
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
[pytest]
|
||||||
|
markers =
|
||||||
|
accommodations: coverage checks for accommodation listings
|
||||||
|
per_diem: coverage checks for per-diem meal/incidentals entries
|
||||||
@@ -1209,7 +1209,12 @@ function displayFlightResults(flights) {
|
|||||||
<strong>${
|
<strong>${
|
||||||
flight.stops === 0
|
flight.stops === 0
|
||||||
? "Direct Flight"
|
? "Direct Flight"
|
||||||
: flight.stops + " stop" + (flight.stops > 1 ? "s" : "")
|
: flight.stops +
|
||||||
|
" stop" +
|
||||||
|
(flight.stops > 1 ? "s" : "") +
|
||||||
|
(flight.stopCodes && flight.stopCodes.length > 0
|
||||||
|
? " (" + flight.stopCodes.join(", ") + ")"
|
||||||
|
: "")
|
||||||
}</strong>
|
}</strong>
|
||||||
</span>
|
</span>
|
||||||
<span style="height: 16px; width: 1px; background: #cbd5e1;"></span>
|
<span style="height: 16px; width: 1px; background: #cbd5e1;"></span>
|
||||||
|
|||||||
18
scripts/db_summary.py
Normal file
18
scripts/db_summary.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import sqlite3
|
||||||
|
|
||||||
|
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(*) FROM rate_entries')
|
||||||
|
print('Per-diem entries:', cur.fetchone()[0])
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(*) FROM accommodations')
|
||||||
|
print('Accommodation entries:', cur.fetchone()[0])
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(DISTINCT country) FROM rate_entries WHERE source="international"')
|
||||||
|
print('Countries with per-diem:', cur.fetchone()[0])
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(DISTINCT city) FROM accommodations')
|
||||||
|
print('Canadian cities with accommodation listings:', cur.fetchone()[0])
|
||||||
|
|
||||||
|
conn.close()
|
||||||
16
scripts/debug_canberra.py
Normal file
16
scripts/debug_canberra.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import sqlite3
|
||||||
|
|
||||||
|
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT city, currency, rate_type, rate_amount
|
||||||
|
FROM rate_entries
|
||||||
|
WHERE country = 'Australia' AND city LIKE '%Canberra%'
|
||||||
|
ORDER BY city, rate_type
|
||||||
|
""")
|
||||||
|
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
print(row)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
54
scripts/show_country_rates.py
Normal file
54
scripts/show_country_rates.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import sqlite3
|
||||||
|
|
||||||
|
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
countries = ['Latvia', 'Germany', 'Dominican Republic', 'Brazil', 'Australia']
|
||||||
|
|
||||||
|
for country in countries:
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"{country.upper()}")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
|
||||||
|
# Get all cities for this country
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT DISTINCT city, currency
|
||||||
|
FROM rate_entries
|
||||||
|
WHERE country = ? AND city IS NOT NULL
|
||||||
|
ORDER BY city
|
||||||
|
""", (country,))
|
||||||
|
|
||||||
|
cities = cursor.fetchall()
|
||||||
|
|
||||||
|
if not cities:
|
||||||
|
print(f"No cities found for {country}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for city_name, currency in cities:
|
||||||
|
print(f"\n📍 {city_name} ({currency})")
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
# Get meal rates for this city
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT rate_type, rate_amount
|
||||||
|
FROM rate_entries
|
||||||
|
WHERE country = ? AND city = ? AND rate_type IN
|
||||||
|
('breakfast', 'lunch', 'dinner', 'incidental amount')
|
||||||
|
ORDER BY CASE
|
||||||
|
WHEN rate_type = 'breakfast' THEN 1
|
||||||
|
WHEN rate_type = 'lunch' THEN 2
|
||||||
|
WHEN rate_type = 'dinner' THEN 3
|
||||||
|
WHEN rate_type = 'incidental amount' THEN 4
|
||||||
|
END
|
||||||
|
""", (country, city_name))
|
||||||
|
|
||||||
|
rates = cursor.fetchall()
|
||||||
|
if rates:
|
||||||
|
for rate_type, amount in rates:
|
||||||
|
# Format the display
|
||||||
|
type_display = rate_type.replace('incidental amount', 'Incidentals').title()
|
||||||
|
print(f" {type_display:.<25} ${amount:>8.2f} {currency}")
|
||||||
|
else:
|
||||||
|
print(" No rate details found")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from gov_travel import db
|
from gov_travel import db
|
||||||
@@ -26,25 +27,66 @@ def parse_args() -> argparse.Namespace:
|
|||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
print("=" * 80)
|
||||||
|
print("🌐 Government Travel Rate Scraper")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"📁 Database: {args.db}")
|
||||||
|
print()
|
||||||
|
|
||||||
connection = db.connect(args.db)
|
connection = db.connect(args.db)
|
||||||
db.init_db(connection)
|
db.init_db(connection)
|
||||||
|
|
||||||
for source in SOURCES:
|
total_tables = 0
|
||||||
|
total_rate_entries = 0
|
||||||
|
total_accommodations = 0
|
||||||
|
|
||||||
|
for idx, source in enumerate(SOURCES, 1):
|
||||||
|
source_start = time.time()
|
||||||
|
print(f"[{idx}/{len(SOURCES)}] 📥 Scraping: {source.name.upper()}")
|
||||||
|
print(f" 🔗 {source.url}")
|
||||||
|
|
||||||
tables = scrape_tables_from_source(source)
|
tables = scrape_tables_from_source(source)
|
||||||
db.insert_raw_tables(connection, source.name, source.url, tables)
|
db.insert_raw_tables(connection, source.name, source.url, tables)
|
||||||
|
total_tables += len(tables)
|
||||||
|
print(f" ✓ {len(tables)} tables collected")
|
||||||
|
|
||||||
rate_entries = extract_rate_entries(source, tables)
|
rate_entries = extract_rate_entries(source, tables)
|
||||||
db.insert_rate_entries(connection, rate_entries)
|
db.insert_rate_entries(connection, rate_entries)
|
||||||
|
total_rate_entries += len(rate_entries)
|
||||||
|
if rate_entries:
|
||||||
|
print(f" ✓ {len(rate_entries)} per-diem entries extracted")
|
||||||
|
|
||||||
exchange_rates = extract_exchange_rates(source, tables)
|
exchange_rates = extract_exchange_rates(source, tables)
|
||||||
db.insert_exchange_rates(connection, exchange_rates)
|
db.insert_exchange_rates(connection, exchange_rates)
|
||||||
|
if exchange_rates:
|
||||||
|
print(f" ✓ {len(exchange_rates)} exchange rates extracted")
|
||||||
|
|
||||||
if source.name == "accommodations":
|
if source.name == "accommodations":
|
||||||
accommodations = extract_accommodations(source, tables)
|
accommodations = extract_accommodations(source, tables)
|
||||||
db.insert_accommodations(connection, accommodations)
|
db.insert_accommodations(connection, accommodations)
|
||||||
|
total_accommodations = len(accommodations)
|
||||||
|
print(f" ✓ {len(accommodations)} accommodation listings extracted")
|
||||||
|
|
||||||
|
elapsed = time.time() - source_start
|
||||||
|
print(f" ⏱️ Completed in {elapsed:.1f}s")
|
||||||
|
print()
|
||||||
|
|
||||||
connection.close()
|
connection.close()
|
||||||
|
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
print("=" * 80)
|
||||||
|
print("✅ SCRAPING COMPLETE")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"📊 Summary:")
|
||||||
|
print(f" • Total tables: {total_tables:,}")
|
||||||
|
print(f" • Per-diem entries: {total_rate_entries:,}")
|
||||||
|
print(f" • Accommodation listings: {total_accommodations:,}")
|
||||||
|
print(f" • Total time: {total_time:.1f}s")
|
||||||
|
print(f" • Database: {args.db}")
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@@ -4,6 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from io import StringIO
|
||||||
from typing import Any, Iterable
|
from typing import Any, Iterable
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -46,7 +47,8 @@ def fetch_html(url: str, retry=3) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def extract_tables(html: str) -> list[pd.DataFrame]:
|
def extract_tables(html: str) -> list[pd.DataFrame]:
|
||||||
return pd.read_html(html)
|
# Wrap literal HTML to avoid pandas FutureWarning
|
||||||
|
return pd.read_html(StringIO(html))
|
||||||
|
|
||||||
|
|
||||||
def _normalize_header(header: str) -> str:
|
def _normalize_header(header: str) -> str:
|
||||||
@@ -146,11 +148,11 @@ def scrape_tables_from_source(source: SourceConfig) -> list[dict[str, Any]]:
|
|||||||
# For sources with alphabet navigation, fetch all letter pages
|
# For sources with alphabet navigation, fetch all letter pages
|
||||||
if source.uses_alphabet_navigation:
|
if source.uses_alphabet_navigation:
|
||||||
urls = _get_alphabet_urls(source.url)
|
urls = _get_alphabet_urls(source.url)
|
||||||
print(f" Fetching {len(urls)} alphabet pages...")
|
print(f" 📋 Fetching {len(urls)} alphabet pages...")
|
||||||
else:
|
else:
|
||||||
urls = [source.url]
|
urls = [source.url]
|
||||||
|
|
||||||
for url in urls:
|
for idx, url in enumerate(urls, 1):
|
||||||
html = fetch_html(url)
|
html = fetch_html(url)
|
||||||
try:
|
try:
|
||||||
tables = extract_tables(html)
|
tables = extract_tables(html)
|
||||||
@@ -175,8 +177,9 @@ def scrape_tables_from_source(source: SourceConfig) -> list[dict[str, Any]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
table_offset += len(tables)
|
table_offset += len(tables)
|
||||||
if len(tables) > 0:
|
if len(tables) > 0 and source.uses_alphabet_navigation:
|
||||||
print(f" {url.split('let=')[-1] if 'let=' in url else 'base'}: {len(tables)} tables")
|
letter = url.split('let=')[-1] if 'let=' in url else 'base'
|
||||||
|
print(f" {letter:>4}: {len(tables)} tables [{idx}/{len(urls)}]")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -185,14 +188,27 @@ def extract_rate_entries(
|
|||||||
source: SourceConfig,
|
source: SourceConfig,
|
||||||
tables: Iterable[dict[str, Any]],
|
tables: Iterable[dict[str, Any]],
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Extract per-diem meal and incidental rates (NOT accommodation listings)"""
|
||||||
entries: list[dict[str, Any]] = []
|
entries: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Only extract per-diem rates from international and domestic sources
|
||||||
|
if source.name == "accommodations":
|
||||||
|
return entries
|
||||||
|
|
||||||
|
# Define valid per-diem rate columns
|
||||||
|
valid_rate_types = {
|
||||||
|
"breakfast", "lunch", "dinner",
|
||||||
|
"incidental amount", "incidentals",
|
||||||
|
"private accommodation", "private accom\xadmodation"
|
||||||
|
}
|
||||||
|
|
||||||
for table in tables:
|
for table in tables:
|
||||||
# Extract currency and country from table title
|
# Extract currency and country from table title
|
||||||
table_currency = _extract_currency_from_title(table.get("title"))
|
table_currency = _extract_currency_from_title(table.get("title"))
|
||||||
table_country = _extract_country_from_title(table.get("title"))
|
table_country = _extract_country_from_title(table.get("title"))
|
||||||
|
|
||||||
# Default to CAD for domestic Canadian sources
|
# Default to CAD for domestic Canadian sources
|
||||||
if table_currency is None and source.name in ("domestic", "accommodations"):
|
if table_currency is None and source.name == "domestic":
|
||||||
table_currency = "CAD"
|
table_currency = "CAD"
|
||||||
|
|
||||||
for row in table["data"]:
|
for row in table["data"]:
|
||||||
@@ -204,16 +220,16 @@ def extract_rate_entries(
|
|||||||
currency = _detect_currency(normalized.get("currency"), fallback=table_currency)
|
currency = _detect_currency(normalized.get("currency"), fallback=table_currency)
|
||||||
effective_date = normalized.get("effective date") or normalized.get("effective")
|
effective_date = normalized.get("effective date") or normalized.get("effective")
|
||||||
|
|
||||||
# Process meal rate columns and other numeric columns
|
# Only extract per-diem meal and incidental columns
|
||||||
for key, value in normalized.items():
|
for key, value in normalized.items():
|
||||||
if key in {"country", "country/territory", "city", "location", "province", "province/territory",
|
# Only process valid per-diem rate types
|
||||||
"currency", "effective", "effective date", "type of accommodation", "accommodation type",
|
if key not in valid_rate_types:
|
||||||
"meal total", "grand total", "grand total (taxes included)"}:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
amount = _parse_amount(value)
|
amount = _parse_amount(value)
|
||||||
if amount is None:
|
if amount is None:
|
||||||
continue
|
continue
|
||||||
# Use table currency (from title) instead of trying to detect from value
|
|
||||||
entries.append(
|
entries.append(
|
||||||
{
|
{
|
||||||
"source": source.name,
|
"source": source.name,
|
||||||
|
|||||||
95
tests/test_scraper_coverage.py
Normal file
95
tests/test_scraper_coverage.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from gov_travel.scrapers import _extract_country_from_title, _normalize_header
|
||||||
|
|
||||||
|
DEFAULT_DB = Path(__file__).resolve().parent.parent / "data" / "travel_rates_scraped.sqlite3"
|
||||||
|
DB_PATH = Path(os.environ.get("GOV_TRAVEL_DB", DEFAULT_DB))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def conn():
|
||||||
|
if not DB_PATH.exists():
|
||||||
|
pytest.skip(
|
||||||
|
f"Scraped DB not found at {DB_PATH}. Run `python -m gov_travel.main --db {DB_PATH}` first."
|
||||||
|
)
|
||||||
|
connection = sqlite3.connect(DB_PATH)
|
||||||
|
connection.row_factory = sqlite3.Row
|
||||||
|
yield connection
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _norm(value):
|
||||||
|
return value.strip().lower() if isinstance(value, str) else value
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.accommodations
|
||||||
|
def test_accommodations_cover_all_raw_cities(conn):
|
||||||
|
cursor = conn.execute("SELECT data_json FROM raw_tables WHERE source = 'accommodations'")
|
||||||
|
expected_cities = set()
|
||||||
|
for (data_json,) in cursor.fetchall():
|
||||||
|
rows = json.loads(data_json)
|
||||||
|
for row in rows:
|
||||||
|
normalized = {_normalize_header(k): v for k, v in row.items()}
|
||||||
|
city = normalized.get("city") or normalized.get("location")
|
||||||
|
if city:
|
||||||
|
expected_cities.add(_norm(city))
|
||||||
|
|
||||||
|
actual_cities = {
|
||||||
|
_norm(city)
|
||||||
|
for (city,) in conn.execute(
|
||||||
|
"SELECT DISTINCT city FROM accommodations WHERE city IS NOT NULL"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = sorted(expected_cities - actual_cities)
|
||||||
|
assert not missing, f"Missing accommodations entries for {len(missing)} cities (e.g., {missing[:5]})"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.per_diem
|
||||||
|
def test_per_diem_covers_all_countries(conn):
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT title FROM raw_tables WHERE source = 'international' AND title IS NOT NULL"
|
||||||
|
)
|
||||||
|
expected_countries = set()
|
||||||
|
for (title,) in cursor.fetchall():
|
||||||
|
country = _extract_country_from_title(title)
|
||||||
|
if country:
|
||||||
|
expected_countries.add(_norm(country))
|
||||||
|
|
||||||
|
actual_countries = {
|
||||||
|
_norm(country)
|
||||||
|
for (country,) in conn.execute(
|
||||||
|
"SELECT DISTINCT country FROM rate_entries WHERE source = 'international' AND country IS NOT NULL"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = sorted(expected_countries - actual_countries)
|
||||||
|
assert not missing, f"Countries missing per-diem entries: {missing}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.per_diem
|
||||||
|
def test_per_diem_has_meal_types(conn):
|
||||||
|
meal_types = {"breakfast", "lunch", "dinner", "incidental amount"}
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT DISTINCT country FROM rate_entries WHERE source = 'international' AND country IS NOT NULL"
|
||||||
|
)
|
||||||
|
|
||||||
|
missing_countries = []
|
||||||
|
for (country,) in cursor.fetchall():
|
||||||
|
rate_types = {
|
||||||
|
_norm(rate_type)
|
||||||
|
for (rate_type,) in conn.execute(
|
||||||
|
"SELECT DISTINCT rate_type FROM rate_entries WHERE source = 'international' AND country = ?",
|
||||||
|
(country,),
|
||||||
|
)
|
||||||
|
if rate_type
|
||||||
|
}
|
||||||
|
if not rate_types.intersection(meal_types):
|
||||||
|
missing_countries.append(country)
|
||||||
|
|
||||||
|
assert not missing_countries, f"Countries without meal-type entries: {missing_countries}"
|
||||||
Reference in New Issue
Block a user