Add Python web scraper for NJC travel rates with currency extraction

- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website
- Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.)
- Added country extraction from table titles for international rates
- Flatten pandas MultiIndex columns for cleaner data structure
- Default to CAD for domestic Canadian sources (accommodations and domestic tables)
- Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations)
- Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities
- Added migration script to convert scraped data to Node.js database format
- Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3)
- Fixed city validation and currency conversion in main app
- Added comprehensive debug and verification scripts

This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
2026-01-13 09:21:43 -05:00
commit 15094ac94b
84 changed files with 19859 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("Tables by source:\n")
cursor.execute("""
SELECT source, COUNT(*) as count
FROM raw_tables
GROUP BY source
""")
for row in cursor.fetchall():
print(f" {row[0]}: {row[1]} tables")
print("\nRate entries by source:\n")
cursor.execute("""
SELECT source, COUNT(*) as count,
SUM(CASE WHEN currency IS NULL THEN 1 ELSE 0 END) as null_count,
SUM(CASE WHEN currency IS NOT NULL THEN 1 ELSE 0 END) as has_currency_count
FROM rate_entries
GROUP BY source
""")
for row in cursor.fetchall():
print(f" {row[0]}: {row[1]} total | {row[2]} NULL | {row[3]} with currency")
print("\nSample titles by source:\n")
for source in ['international', 'domestic', 'accommodations']:
cursor.execute(f"""
SELECT title
FROM raw_tables
WHERE source = '{source}'
LIMIT 3
""")
print(f"\n{source}:")
for row in cursor.fetchall():
title = row[0] if row[0] else "NO TITLE"
print(f" {title[:80]}")
conn.close()

86
scripts/checkMealRates.js Normal file
View File

@@ -0,0 +1,86 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
const db = new sqlite3.Database(dbPath, (err) => {
if (err) {
console.error('❌ Database connection failed:', err);
process.exit(1);
}
});
console.log('\n🍽 Checking Meal Rates Table...\n');
console.log('='.repeat(60));
// Check if meal_rates table exists
const checkTableQuery = `
SELECT name FROM sqlite_master
WHERE type='table' AND name='meal_rates'
`;
db.get(checkTableQuery, [], (err, row) => {
if (err) {
console.error('❌ Query failed:', err);
db.close();
process.exit(1);
}
if (!row) {
console.log('\n❌ meal_rates table does NOT exist in database\n');
console.log('The database migration only created accommodation_rates table.');
console.log('Meal rates need to be added separately.\n');
db.close();
return;
}
console.log('✅ meal_rates table EXISTS\n');
// Count records
const countQuery = 'SELECT COUNT(*) as count FROM meal_rates';
db.get(countQuery, [], (err, countRow) => {
if (err) {
console.error('❌ Count query failed:', err);
db.close();
process.exit(1);
}
console.log(`📊 Total meal rate records: ${countRow.count}\n`);
if (countRow.count === 0) {
console.log('⚠️ Table exists but is EMPTY - no meal rates imported\n');
db.close();
return;
}
// Show sample records
const sampleQuery = `
SELECT city_name, country, breakfast, lunch, dinner, incidentals, total_daily
FROM meal_rates
LIMIT 10
`;
db.all(sampleQuery, [], (err, rows) => {
if (err) {
console.error('❌ Sample query failed:', err);
db.close();
process.exit(1);
}
console.log('Sample meal rates:\n');
rows.forEach((row, index) => {
console.log(`${index + 1}. ${row.city_name}, ${row.country}`);
console.log(` Breakfast: $${row.breakfast}`);
console.log(` Lunch: $${row.lunch}`);
console.log(` Dinner: $${row.dinner}`);
console.log(` Incidentals: $${row.incidentals}`);
console.log(` Total Daily: $${row.total_daily}\n`);
});
db.close();
});
});
});
console.log('='.repeat(60) + '\n');

30
scripts/checkSchema.js Normal file
View File

@@ -0,0 +1,30 @@
const sqlite3 = require("sqlite3").verbose();
const path = require("path");
const db = new sqlite3.Database(
path.join(__dirname, "..", "travel_rates.db"),
(err) => {
if (err) {
console.error("Error opening database:", err);
process.exit(1);
}
}
);
db.all(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",
[],
(err, tables) => {
if (err) {
console.error("Error querying tables:", err);
process.exit(1);
}
console.log("Tables in travel_rates.db:");
tables.forEach((table) => {
console.log(` - ${table.name}`);
});
db.close();
}
);

View File

@@ -0,0 +1,26 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("Argentina entries by source:")
cursor.execute("""
SELECT source, COUNT(*) as count, currency
FROM rate_entries
WHERE country LIKE '%Argentina%'
GROUP BY source, currency
""")
for row in cursor.fetchall():
print(f" {row[0]}: {row[1]} entries with currency {row[2]}")
print("\nAll Argentina entries with details:")
cursor.execute("""
SELECT source, country, city, rate_type, currency
FROM rate_entries
WHERE country LIKE '%Argentina%'
LIMIT 10
""")
for row in cursor.fetchall():
print(f" {row}")
conn.close()

View File

@@ -0,0 +1,35 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("Argentina entries with breakfast:")
cursor.execute("""
SELECT country, city, rate_type, rate_amount, currency
FROM rate_entries
WHERE country LIKE '%Argentina%' AND rate_type LIKE '%breakfast%'
LIMIT 5
""")
for row in cursor.fetchall():
print(f" {row}")
print("\nAlbania entries with breakfast:")
cursor.execute("""
SELECT country, city, rate_type, rate_amount, currency
FROM rate_entries
WHERE country LIKE '%Albania%' AND rate_type LIKE '%breakfast%'
LIMIT 5
""")
for row in cursor.fetchall():
print(f" {row}")
print("\nAll Argentina city entries:")
cursor.execute("""
SELECT DISTINCT city, currency
FROM rate_entries
WHERE country LIKE '%Argentina%'
""")
for row in cursor.fetchall():
print(f" {row[0]}: {row[1]}")
conn.close()

View File

@@ -0,0 +1,27 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("All sources and their currency distributions:")
cursor.execute("""
SELECT source, currency, COUNT(*) as count
FROM rate_entries
GROUP BY source, currency
ORDER BY source, currency
""")
for row in cursor.fetchall():
print(f" {row[0]} / {row[1]}: {row[2]}")
print("\nInternational source countries:")
cursor.execute("""
SELECT DISTINCT country
FROM rate_entries
WHERE source = 'international'
ORDER BY country
LIMIT 20
""")
for row in cursor.fetchall():
print(f" {row[0]}")
conn.close()

16
scripts/check_titles.py Normal file
View File

@@ -0,0 +1,16 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("Sample Table Titles:")
cursor.execute('SELECT table_index, title FROM raw_tables LIMIT 10')
for row in cursor.fetchall():
print(f"{row[0]}: {row[1]}")
print("\nArgentina Tables:")
cursor.execute("SELECT table_index, title FROM raw_tables WHERE title LIKE '%Argentina%'")
for row in cursor.fetchall():
print(f"{row[0]}: {row[1]}")
conn.close()

View File

@@ -0,0 +1,37 @@
"""Debug the scraper to see what currencies are being assigned"""
import sys
sys.path.insert(0, 'src')
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source, extract_rate_entries
# Test international source with Argentina
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
print("Fetching tables...")
tables = scrape_tables_from_source(source)
# Find Argentina table
argentina_table = None
for table in tables:
if table['title'] and 'Argentina' in table['title']:
argentina_table = table
break
if argentina_table:
print(f"\nArgentina Table:")
print(f" Title: {argentina_table['title']}")
print(f" Rows: {len(argentina_table['data'])}")
# Extract entries
entries = extract_rate_entries(source, [argentina_table])
print(f"\n Generated {len(entries)} entries")
if entries:
# Show first few entries
print(f"\n First 3 entries:")
for i, entry in enumerate(entries[:3]):
print(f" {i+1}. City: {entry['city']}, Type: {entry['rate_type']}, Amount: {entry['rate_amount']}, Currency: {entry['currency']}")
# Check unique currencies
currencies = set(e['currency'] for e in entries)
print(f"\n Unique currencies in Argentina entries: {currencies}")

28
scripts/debug_currency.py Normal file
View File

@@ -0,0 +1,28 @@
import sqlite3
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
# Get a raw table with title
cursor.execute("""
SELECT title, data
FROM raw_tables
WHERE title LIKE '%Argentina%'
LIMIT 1
""")
row = cursor.fetchone()
print(f"Title: {row[0]}")
print(f"Data length: {len(row[1])} chars")
# Now check the actual rate_entries for Argentina
cursor.execute("""
SELECT country, city, rate_type, currency, rate_amount
FROM rate_entries
WHERE country LIKE '%Argentina%'
LIMIT 3
""")
print("\nRate Entries:")
for r in cursor.fetchall():
print(f" {r}")
conn.close()

View File

@@ -0,0 +1,35 @@
import sqlite3
import json
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
print('\n=== RAW TABLE INSPECTION ===\n')
# Check first few raw tables
for row in conn.execute('SELECT source, source_url, table_index, title, data_json FROM raw_tables LIMIT 5').fetchall():
print(f'\nSource: {row[0]}')
print(f'URL: {row[1]}')
print(f'Table Index: {row[2]}')
print(f'Title: {row[3]}')
data = json.loads(row[4])
print(f'Columns: {list(data[0].keys()) if data else "No data"}')
print(f'First row sample: {data[0] if data else "No data"}')
print('-' * 80)
# Check specific Argentina table
print('\n\n=== ARGENTINA RAW DATA ===\n')
for row in conn.execute('SELECT source, title, data_json FROM raw_tables WHERE data_json LIKE "%Argentina%"').fetchone() or []:
print(f'Source: {row[0]}')
print(f'Title: {row[1]}')
data = json.loads(row[2])
if data:
# Find Argentina entry
for entry in data:
if 'Argentina' in str(entry.values()):
print(f'\nArgentina entry columns: {entry.keys()}')
print(f'Argentina entry data: {entry}')
break
break
conn.close()

View File

@@ -0,0 +1,31 @@
"""Inspect the actual table structure from NJC"""
import sys
sys.path.insert(0, 'src')
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source
import json
# Create a test source config
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
# Get just the first table
print("Fetching tables...")
tables = scrape_tables_from_source(source)
first_table = tables[0]
print(f"\nTable {first_table['table_index']}")
print(f"Title: {first_table['title']}")
print(f"\nFirst data row:")
print(json.dumps(first_table['data'][0], indent=2))
print(f"\nSecond data row:")
print(json.dumps(first_table['data'][1], indent=2))
# Now try Argentina
for table in tables:
if table['title'] and 'Argentina' in table['title']:
print(f"\n\n=== Argentina Table ===")
print(f"Title: {table['title']}")
print(f"\nFirst row:")
print(json.dumps(table['data'][0], indent=2))
break

46
scripts/listCountries.js Normal file
View File

@@ -0,0 +1,46 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
const db = new sqlite3.Database(dbPath, (err) => {
if (err) {
console.error('❌ Database connection failed:', err);
process.exit(1);
}
});
console.log('\n📊 Countries in Database:\n');
console.log('='.repeat(60));
const query = `
SELECT
country,
COUNT(*) as city_count,
region,
currency
FROM accommodation_rates
GROUP BY country
ORDER BY country
`;
db.all(query, [], (err, rows) => {
if (err) {
console.error('❌ Query failed:', err);
db.close();
process.exit(1);
}
rows.forEach((row, index) => {
console.log(`\n${index + 1}. ${row.country}`);
console.log(` Region: ${row.region}`);
console.log(` Currency: ${row.currency}`);
console.log(` Cities: ${row.city_count}`);
});
console.log('\n' + '='.repeat(60));
console.log(`\n📍 Total Countries: ${rows.length}`);
console.log(`📍 Total Cities: ${rows.reduce((sum, r) => sum + r.city_count, 0)}\n`);
db.close();
});

View File

@@ -0,0 +1,582 @@
const sqlite3 = require("sqlite3").verbose();
const fs = require("fs");
const path = require("path");
// Country to currency mapping based on NJC Appendix D
const COUNTRY_CURRENCY_MAP = {
// EUR countries (European)
Austria: "EUR",
Belgium: "EUR",
Bulgaria: "EUR",
Croatia: "EUR",
Cyprus: "EUR",
"Czech Republic": "EUR",
Denmark: "EUR",
Estonia: "EUR",
Finland: "EUR",
France: "EUR",
Germany: "EUR",
Greece: "EUR",
Hungary: "EUR",
Ireland: "EUR",
Italy: "EUR",
Latvia: "EUR",
Lithuania: "EUR",
Luxembourg: "EUR",
Malta: "EUR",
Netherlands: "EUR",
Poland: "EUR",
Portugal: "EUR",
Romania: "EUR",
Slovakia: "EUR",
Slovenia: "EUR",
Spain: "EUR",
Sweden: "EUR",
Albania: "EUR",
Andorra: "EUR",
"Bosnia and Herzegovina": "EUR",
Kosovo: "EUR",
Montenegro: "EUR",
"North Macedonia": "EUR",
Serbia: "EUR",
Ukraine: "EUR",
Moldova: "EUR",
Iceland: "EUR",
Norway: "EUR",
Switzerland: "EUR",
Azores: "EUR",
Madeira: "EUR",
// CAD countries
Canada: "CAD",
// AUD countries
Australia: "AUD",
// USD countries (Americas & others)
"United States": "USD",
USA: "USD",
Mexico: "USD",
Belize: "USD",
"Central America": "USD",
"Costa Rica": "USD",
Guatemala: "USD",
Honduras: "USD",
Nicaragua: "USD",
Panama: "USD",
"El Salvador": "USD",
Caribbean: "USD",
"Antigua and Barbuda": "USD",
Bahamas: "USD",
Barbados: "USD",
Bermuda: "USD",
Dominica: "USD",
"Dominican Republic": "USD",
Grenada: "USD",
Haiti: "USD",
Jamaica: "USD",
"St. Kitts": "USD",
"St. Lucia": "USD",
"St. Vincent": "USD",
"Trinidad and Tobago": "USD",
"Turks and Caicos": "USD",
Anguilla: "USD",
Montserrat: "USD",
"Virgin Islands": "USD",
Aruba: "USD",
Curacao: "USD",
"Sint Maarten": "USD",
Bonaire: "USD",
Colombia: "USD",
Ecuador: "USD",
Guyana: "USD",
Suriname: "USD",
Venezuela: "USD",
Peru: "USD",
Bolivia: "USD",
Paraguay: "USD",
Brazil: "USD",
Chile: "USD",
"Middle East": "USD",
Afghanistan: "USD",
Armenia: "USD",
Azerbaijan: "USD",
Bahrain: "USD",
Georgia: "USD",
Iran: "USD",
Iraq: "USD",
Israel: "USD",
Jordan: "USD",
Kuwait: "USD",
Lebanon: "USD",
Oman: "USD",
Qatar: "USD",
"Saudi Arabia": "USD",
Syria: "USD",
Turkey: "USD",
"United Arab Emirates": "USD",
Yemen: "USD",
Pakistan: "USD",
India: "USD",
Bangladesh: "USD",
"Sri Lanka": "USD",
Nepal: "USD",
Bhutan: "USD",
Myanmar: "USD",
Thailand: "USD",
Laos: "USD",
Vietnam: "USD",
Cambodia: "USD",
Malaysia: "USD",
Singapore: "USD",
Indonesia: "USD",
Philippines: "USD",
"East Timor": "USD",
"Papua New Guinea": "USD",
"Solomon Islands": "USD",
Vanuatu: "USD",
Fiji: "USD",
Kiribati: "USD",
"Marshall Islands": "USD",
Micronesia: "USD",
Nauru: "USD",
Palau: "USD",
Samoa: "USD",
Tonga: "USD",
Tuvalu: "USD",
"Hong Kong": "USD",
Taiwan: "USD",
Japan: "USD",
"South Korea": "USD",
"North Korea": "USD",
Mongolia: "USD",
China: "USD",
"North Africa": "USD",
Algeria: "CAD",
Egypt: "USD",
Libya: "USD",
Morocco: "USD",
Tunisia: "USD",
Sudan: "USD",
"Western Sahara": "USD",
"Sub-Saharan Africa": "USD",
Angola: "CAD",
Benin: "USD",
Botswana: "USD",
"Burkina Faso": "USD",
Burundi: "USD",
Cameroon: "USD",
"Cape Verde": "USD",
"Central African Republic": "USD",
Chad: "USD",
Comoros: "USD",
Congo: "USD",
"Côte d'Ivoire": "USD",
Djibouti: "USD",
"Equatorial Guinea": "USD",
Eritrea: "USD",
Ethiopia: "USD",
Gabon: "USD",
Gambia: "USD",
Ghana: "USD",
Guinea: "USD",
"Guinea-Bissau": "USD",
Kenya: "USD",
Lesotho: "USD",
Liberia: "USD",
Madagascar: "USD",
Malawi: "USD",
Mali: "USD",
Mauritania: "USD",
Mauritius: "USD",
Mozambique: "USD",
Namibia: "USD",
Niger: "USD",
Nigeria: "USD",
Rwanda: "USD",
Senegal: "USD",
Seychelles: "USD",
"Sierra Leone": "USD",
Somalia: "USD",
"South Africa": "USD",
"South Sudan": "USD",
Tanzania: "USD",
Togo: "USD",
Uganda: "USD",
Zambia: "USD",
Zimbabwe: "USD",
Réunion: "EUR",
Mayotte: "EUR",
Canberra: "AUD",
};
function getCurrencyForCountry(country) {
return COUNTRY_CURRENCY_MAP[country] || "USD"; // Default to USD if not found
}
class CompleteTravelMigration {
constructor() {
this.dbPath = path.join(__dirname, "..", "database", "travel_rates.db");
this.db = null;
}
async migrate() {
console.log("🚀 Starting COMPLETE travel rates migration...\n");
try {
await this.openDatabase();
await this.createComprehensiveSchema();
await this.importAllData();
await this.displayStats();
console.log("\n✅ Complete migration successful!");
console.log(`📊 Database: ${this.dbPath}`);
} catch (error) {
console.error("❌ Migration failed:", error);
throw error;
} finally {
if (this.db) {
this.db.close();
}
}
}
openDatabase() {
return new Promise((resolve, reject) => {
this.db = new sqlite3.Database(this.dbPath, (err) => {
if (err) reject(err);
else {
console.log("✅ Database connection opened");
resolve();
}
});
});
}
async createComprehensiveSchema() {
console.log("📋 Creating comprehensive schema...");
const schema = `
DROP TABLE IF EXISTS travel_rates;
DROP TABLE IF EXISTS travel_search;
CREATE TABLE travel_rates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
city_key TEXT UNIQUE NOT NULL,
city_name TEXT NOT NULL,
province TEXT,
country TEXT NOT NULL,
region TEXT NOT NULL,
currency TEXT NOT NULL,
-- Accommodation rates (monthly)
jan_accommodation REAL NOT NULL,
feb_accommodation REAL NOT NULL,
mar_accommodation REAL NOT NULL,
apr_accommodation REAL NOT NULL,
may_accommodation REAL NOT NULL,
jun_accommodation REAL NOT NULL,
jul_accommodation REAL NOT NULL,
aug_accommodation REAL NOT NULL,
sep_accommodation REAL NOT NULL,
oct_accommodation REAL NOT NULL,
nov_accommodation REAL NOT NULL,
dec_accommodation REAL NOT NULL,
standard_accommodation REAL,
-- Meal rates (per diem)
breakfast REAL NOT NULL,
lunch REAL NOT NULL,
dinner REAL NOT NULL,
total_meals REAL NOT NULL,
incidentals REAL NOT NULL,
total_daily_allowance REAL NOT NULL,
-- Additional info
is_international BOOLEAN DEFAULT 0,
effective_date DATE DEFAULT '2025-01-01',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_travel_city ON travel_rates(city_name);
CREATE INDEX IF NOT EXISTS idx_travel_country ON travel_rates(country);
CREATE INDEX IF NOT EXISTS idx_travel_region ON travel_rates(region);
CREATE INDEX IF NOT EXISTS idx_travel_key ON travel_rates(city_key);
CREATE VIRTUAL TABLE IF NOT EXISTS travel_search USING fts5(
city_key,
city_name,
province,
country,
region,
content='travel_rates'
);
`;
return new Promise((resolve, reject) => {
this.db.exec(schema, (err) => {
if (err) reject(err);
else {
console.log("✅ Comprehensive schema created");
resolve();
}
});
});
}
async importAllData() {
console.log("📥 Importing all travel data...\n");
// Load accommodation data
const accomPath = path.join(
__dirname,
"..",
"data",
"accommodationRates.json"
);
const perDiemPath = path.join(__dirname, "..", "data", "perDiemRates.json");
if (!fs.existsSync(accomPath)) {
throw new Error("accommodationRates.json not found");
}
if (!fs.existsSync(perDiemPath)) {
throw new Error("perDiemRates.json not found");
}
const accomData = JSON.parse(fs.readFileSync(accomPath, "utf8"));
const perDiemData = JSON.parse(fs.readFileSync(perDiemPath, "utf8"));
let imported = 0;
// Import Canadian cities
if (accomData.cities) {
console.log(" 🇨🇦 Importing Canadian cities...");
const canadaMeals = perDiemData.regions.canada.meals;
const canadaIncidentals = perDiemData.regions.canada.incidentals.rate100;
for (const [key, city] of Object.entries(accomData.cities)) {
try {
await this.insertTravelRate({
city_key: key,
city_name: city.name,
province: city.province,
country: "Canada",
region: city.region,
currency: "CAD",
accommodation_rates: city.monthlyRates,
breakfast: canadaMeals.breakfast.rate100,
lunch: canadaMeals.lunch.rate100,
dinner: canadaMeals.dinner.rate100,
total_meals: canadaMeals.total.rate100,
incidentals: canadaIncidentals,
total_daily: perDiemData.regions.canada.dailyTotal.rate100,
is_international: 0,
});
imported++;
if (imported % 50 === 0) {
console.log(` ... ${imported} cities imported`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${imported} Canadian cities`);
}
// Import international cities
if (accomData.internationalCities) {
console.log(" 🌍 Importing international cities...");
const intlMeals = perDiemData.regions.usa.meals; // USA rates same as intl
const intlIncidentals = perDiemData.regions.usa.incidentals.rate100;
let intlCount = 0;
for (const [key, city] of Object.entries(accomData.internationalCities)) {
try {
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
// Determine currency: always use country mapping (which is most authoritative)
// Only use explicit city.currency if it's already been manually verified/set (non-USD entries with specific EUR values)
let cityCurrency;
if (city.currency === "EUR" || city.currency === "CAD") {
// These are explicitly set in JSON (like Riga, Paris, Tallinn) - keep them
cityCurrency = city.currency;
} else {
// Default to country mapping for USD and missing values
cityCurrency = getCurrencyForCountry(city.country);
}
// Use city-specific meals if available, otherwise use regional rates
const breakfast =
city.meals?.breakfast || intlMeals.breakfast.rate100;
const lunch = city.meals?.lunch || intlMeals.lunch.rate100;
const dinner = city.meals?.dinner || intlMeals.dinner.rate100;
const totalMeals = city.meals?.total || breakfast + lunch + dinner;
const incidentals =
city.incidentals !== undefined ? city.incidentals : intlIncidentals;
await this.insertTravelRate({
city_key: key,
city_name: city.name,
province: null,
country: city.country,
region: city.region,
currency: cityCurrency,
accommodation_rates: rates,
standard_accommodation: city.standardRate || rates[0],
breakfast: breakfast,
lunch: lunch,
dinner: dinner,
total_meals: totalMeals,
incidentals: incidentals,
total_daily:
parseFloat(city.standardRate || rates[0]) +
totalMeals +
incidentals,
is_international: 1,
});
intlCount++;
if (intlCount % 30 === 0) {
console.log(` ... ${intlCount} international cities imported`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${intlCount} international cities`);
imported += intlCount;
}
// Add Canberra with meal rates
console.log(" 🇦🇺 Adding Canberra with meal rates...");
try {
const intlMeals = perDiemData.regions.usa.meals;
const intlIncidentals = perDiemData.regions.usa.incidentals.rate100;
await this.insertTravelRate({
city_key: "canberra",
city_name: "Canberra",
province: null,
country: "Australia",
region: "Oceania",
currency: "AUD",
accommodation_rates: [
184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184,
],
standard_accommodation: 184,
breakfast: intlMeals.breakfast.rate100,
lunch: intlMeals.lunch.rate100,
dinner: intlMeals.dinner.rate100,
total_meals: intlMeals.total.rate100,
incidentals: intlIncidentals,
total_daily: perDiemData.regions.usa.dailyTotal.rate100,
is_international: 1,
});
console.log(" ✅ Canberra added with complete rates");
} catch (err) {
if (!err.message.includes("UNIQUE")) {
throw err;
}
}
console.log(`\n✅ Total imported: ${imported} cities with complete data`);
}
async insertTravelRate(data) {
return new Promise((resolve, reject) => {
const sql = `
INSERT OR REPLACE INTO travel_rates (
city_key, city_name, province, country, region, currency,
jan_accommodation, feb_accommodation, mar_accommodation,
apr_accommodation, may_accommodation, jun_accommodation,
jul_accommodation, aug_accommodation, sep_accommodation,
oct_accommodation, nov_accommodation, dec_accommodation,
standard_accommodation,
breakfast, lunch, dinner, total_meals,
incidentals, total_daily_allowance, is_international
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
this.db.run(
sql,
[
data.city_key,
data.city_name,
data.province,
data.country,
data.region,
data.currency,
...data.accommodation_rates,
data.standard_accommodation || data.accommodation_rates[0],
data.breakfast,
data.lunch,
data.dinner,
data.total_meals,
data.incidentals,
data.total_daily,
data.is_international,
],
(err) => {
if (err) reject(err);
else resolve();
}
);
});
}
async displayStats() {
console.log("\n📊 Database Statistics:");
const total = await this.getCount(
"SELECT COUNT(*) as count FROM travel_rates"
);
console.log(` Total cities: ${total}`);
const canadian = await this.getCount(
"SELECT COUNT(*) as count FROM travel_rates WHERE is_international = 0"
);
console.log(` Canadian: ${canadian}`);
const international = await this.getCount(
"SELECT COUNT(*) as count FROM travel_rates WHERE is_international = 1"
);
console.log(` International: ${international}`);
const canberra = await this.getRow(
'SELECT * FROM travel_rates WHERE city_key = "canberra"'
);
if (canberra) {
console.log(` \n ✅ Canberra Complete Data:`);
console.log(
` Accommodation: $${canberra.standard_accommodation} USD/night`
);
console.log(` Breakfast: $${canberra.breakfast}`);
console.log(` Lunch: $${canberra.lunch}`);
console.log(` Dinner: $${canberra.dinner}`);
console.log(` Incidentals: $${canberra.incidentals}`);
console.log(` Total Daily: $${canberra.total_daily_allowance}`);
}
}
getCount(sql) {
return new Promise((resolve, reject) => {
this.db.get(sql, [], (err, row) => {
if (err) reject(err);
else resolve(row.count);
});
});
}
getRow(sql) {
return new Promise((resolve, reject) => {
this.db.get(sql, [], (err, row) => {
if (err) reject(err);
else resolve(row);
});
});
}
}
// Run migration
const migration = new CompleteTravelMigration();
migration.migrate().catch((err) => {
console.error("Fatal error:", err);
process.exit(1);
});

View File

@@ -0,0 +1,212 @@
/**
* Migration script to convert scraped SQLite database to Node.js travel_rates schema
*
* Source DB: travel_rates_scraped.sqlite3 (from Python scraper)
* Target DB: travel_rates.db (Node.js app schema)
*
* This script:
* 1. Reads rate_entries from scraped DB
* 2. Aggregates meal rates (breakfast, lunch, dinner) and incidentals by city
* 3. Inserts into travel_rates table in Node.js format
*/
const sqlite3 = require("sqlite3").verbose();
const path = require("path");
// Database paths
const SOURCE_DB = path.join(
__dirname,
"..",
"data",
"travel_rates_scraped.sqlite3"
);
const TARGET_DB = path.join(__dirname, "..", "travel_rates.db");
// Exchange rates for display (not used for conversion, just for reference)
const EXCHANGE_RATES = {
EUR: 1.54, // EUR to CAD
USD: 1.39, // USD to CAD
AUD: 0.92, // AUD to CAD
CAD: 1.0,
ARS: 0.0014, // ARS to CAD (approximate)
};
async function openDatabase(dbPath) {
return new Promise((resolve, reject) => {
const db = new sqlite3.Database(dbPath, (err) => {
if (err) reject(err);
else resolve(db);
});
});
}
async function queryAll(db, sql, params = []) {
return new Promise((resolve, reject) => {
db.all(sql, params, (err, rows) => {
if (err) reject(err);
else resolve(rows);
});
});
}
async function runQuery(db, sql, params = []) {
return new Promise((resolve, reject) => {
db.run(sql, params, function (err) {
if (err) reject(err);
else resolve(this);
});
});
}
async function aggregateCityRates(sourceDb) {
// Get all international cities with their meal rates
const query = `
SELECT
country,
city,
currency,
MAX(CASE WHEN rate_type LIKE '%breakfast%' THEN rate_amount END) as breakfast,
MAX(CASE WHEN rate_type LIKE '%lunch%' THEN rate_amount END) as lunch,
MAX(CASE WHEN rate_type LIKE '%dinner%' THEN rate_amount END) as dinner,
MAX(CASE WHEN rate_type LIKE '%incidental%' THEN rate_amount END) as incidentals
FROM rate_entries
WHERE city IS NOT NULL
AND country IS NOT NULL
AND source = 'international'
GROUP BY country, city, currency
HAVING breakfast IS NOT NULL OR lunch IS NOT NULL OR dinner IS NOT NULL
`;
return await queryAll(sourceDb, query);
}
async function clearTargetDatabase(targetDb) {
await runQuery(targetDb, "DELETE FROM travel_rates");
console.log("Cleared existing travel_rates data");
}
async function insertCityRates(targetDb, cities) {
const insertStmt = `
INSERT INTO travel_rates (
city_key, city_name, country, breakfast, lunch, dinner,
incidentals, currency, standardRate, standard_rate
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
let inserted = 0;
let skipped = 0;
for (const city of cities) {
try {
// Create city key (lowercase, spaces to dashes)
const cityKey = `${city.city
.toLowerCase()
.replace(/\s+/g, "-")}-${city.country
.toLowerCase()
.replace(/\s+/g, "-")}`;
// Standard rate is typically breakfast + lunch + dinner
const standardRate =
(city.breakfast || 0) + (city.lunch || 0) + (city.dinner || 0);
await runQuery(targetDb, insertStmt, [
cityKey,
city.city,
city.country,
city.breakfast,
city.lunch,
city.dinner,
city.incidentals,
city.currency,
standardRate,
standardRate, // Both standardRate and standard_rate for compatibility
]);
inserted++;
} catch (err) {
console.error(
`Error inserting ${city.city}, ${city.country}: ${err.message}`
);
skipped++;
}
}
return { inserted, skipped };
}
async function migrate() {
console.log(
"Starting migration from scraped database to Node.js schema...\n"
);
let sourceDb, targetDb;
try {
// Open databases
console.log(`Opening source database: ${SOURCE_DB}`);
sourceDb = await openDatabase(SOURCE_DB);
console.log(`Opening target database: ${TARGET_DB}`);
targetDb = await openDatabase(TARGET_DB);
// Aggregate city rates from scraped data
console.log("\nAggregating city rates from scraped data...");
const cities = await aggregateCityRates(sourceDb);
console.log(`Found ${cities.length} cities with meal rates`);
// Show currency distribution
const currencyCounts = cities.reduce((acc, city) => {
acc[city.currency] = (acc[city.currency] || 0) + 1;
return acc;
}, {});
console.log("\nCurrency distribution:");
for (const [currency, count] of Object.entries(currencyCounts)) {
console.log(` ${currency}: ${count} cities`);
}
// Clear target database
console.log("\nClearing target database...");
await clearTargetDatabase(targetDb);
// Insert city rates
console.log("\nInserting city rates into target database...");
const result = await insertCityRates(targetDb, cities);
console.log(`\nMigration complete!`);
console.log(` Inserted: ${result.inserted} cities`);
console.log(` Skipped: ${result.skipped} cities`);
// Show sample entries
console.log("\nSample migrated entries:");
const samples = await queryAll(
targetDb,
`
SELECT city_name, country, breakfast, lunch, dinner, incidentals, currency
FROM travel_rates
WHERE country IN ('Argentina', 'Albania', 'Australia')
LIMIT 5
`
);
for (const sample of samples) {
console.log(
` ${sample.city_name}, ${sample.country}: B:${sample.breakfast} L:${sample.lunch} D:${sample.dinner} I:${sample.incidentals} (${sample.currency})`
);
}
} catch (err) {
console.error("\nMigration failed:", err);
process.exit(1);
} finally {
if (sourceDb) sourceDb.close();
if (targetDb) targetDb.close();
}
}
// Run migration
if (require.main === module) {
migrate().catch((err) => {
console.error("Fatal error:", err);
process.exit(1);
});
}
module.exports = { migrate };

View File

@@ -0,0 +1,322 @@
const sqlite3 = require('sqlite3').verbose();
const fs = require('fs');
const path = require('path');
class DatabaseMigration {
constructor() {
this.dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
this.db = null;
}
async migrate() {
console.log('🚀 Starting database migration...\n');
try {
// Ensure database directory exists
const dbDir = path.join(__dirname, '..', 'database');
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
console.log('✅ Created database directory');
}
// Open database connection
await this.openDatabase();
// Create tables (inline schema - no external file needed)
await this.createTables();
// Import accommodation rates
await this.importAccommodationRates();
// Add Canberra
await this.addCanberra();
// Build search indexes
await this.buildSearchIndexes();
// Display statistics
await this.displayStats();
console.log('\n✅ Migration complete!');
console.log(`📊 Database: ${this.dbPath}`);
} catch (error) {
console.error('❌ Migration failed:', error);
throw error;
} finally {
if (this.db) {
this.db.close();
}
}
}
openDatabase() {
return new Promise((resolve, reject) => {
this.db = new sqlite3.Database(this.dbPath, (err) => {
if (err) {
reject(err);
} else {
console.log('✅ Database connection opened');
resolve();
}
});
});
}
async createTables() {
console.log('📋 Creating tables...');
// Inline schema - no external file dependency
const schema = `
CREATE TABLE IF NOT EXISTS accommodation_rates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
city_key TEXT UNIQUE NOT NULL,
city_name TEXT NOT NULL,
province TEXT,
country TEXT,
region TEXT NOT NULL,
currency TEXT NOT NULL,
jan_rate REAL NOT NULL,
feb_rate REAL NOT NULL,
mar_rate REAL NOT NULL,
apr_rate REAL NOT NULL,
may_rate REAL NOT NULL,
jun_rate REAL NOT NULL,
jul_rate REAL NOT NULL,
aug_rate REAL NOT NULL,
sep_rate REAL NOT NULL,
oct_rate REAL NOT NULL,
nov_rate REAL NOT NULL,
dec_rate REAL NOT NULL,
standard_rate REAL,
is_international BOOLEAN DEFAULT 0,
effective_date DATE DEFAULT '2025-01-01',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
city_key,
city_name,
province,
country,
region,
content='accommodation_rates'
);
`;
return new Promise((resolve, reject) => {
this.db.exec(schema, (err) => {
if (err) {
reject(err);
} else {
console.log('✅ Tables created');
resolve();
}
});
});
}
async importAccommodationRates() {
console.log('📥 Importing accommodation rates...');
const jsonPath = path.join(__dirname, '..', 'data', 'accommodationRates.json');
console.log(` 📂 Looking for JSON at: ${jsonPath}`);
if (!fs.existsSync(jsonPath)) {
console.error('❌ accommodationRates.json not found!');
throw new Error('Missing accommodationRates.json file');
}
console.log(' ✅ JSON file found, reading...');
const rawData = fs.readFileSync(jsonPath, 'utf8');
console.log(` 📄 File size: ${rawData.length} bytes`);
const data = JSON.parse(rawData);
console.log(` ✅ JSON parsed successfully`);
console.log(` 📊 Data keys: ${Object.keys(data).join(', ')}`);
let imported = 0;
// Import Canadian cities
if (data.cities) {
const cityCount = Object.keys(data.cities).length;
console.log(` - Importing ${cityCount} Canadian cities...`);
for (const [key, city] of Object.entries(data.cities)) {
try {
await this.insertAccommodationRate({
city_key: key,
city_name: city.name,
province: city.province,
country: 'Canada',
region: city.region,
currency: city.currency,
rates: city.monthlyRates,
is_international: 0
});
imported++;
if (imported % 50 === 0) {
console.log(` ... ${imported} cities imported so far`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${imported} Canadian cities`);
} else {
console.log(' ⚠️ No "cities" key found in JSON');
}
// Import international cities
if (data.internationalCities) {
const intlCityCount = Object.keys(data.internationalCities).length;
console.log(` - Importing ${intlCityCount} international cities...`);
let intlCount = 0;
for (const [key, city] of Object.entries(data.internationalCities)) {
try {
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
await this.insertAccommodationRate({
city_key: key,
city_name: city.name,
province: null,
country: city.country,
region: city.region,
currency: city.currency,
rates: rates,
standard_rate: city.standardRate || rates[0],
is_international: 1
});
intlCount++;
if (intlCount % 20 === 0) {
console.log(` ... ${intlCount} international cities imported so far`);
}
} catch (err) {
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
}
}
console.log(` ✅ Imported ${intlCount} international cities`);
imported += intlCount;
} else {
console.log(' ⚠️ No "internationalCities" key found in JSON');
}
console.log(`✅ Total imported: ${imported} cities`);
}
async addCanberra() {
console.log('🇦🇺 Adding Canberra...');
try {
await this.insertAccommodationRate({
city_key: 'canberra',
city_name: 'Canberra',
province: null,
country: 'Australia',
region: 'Oceania',
currency: 'USD',
rates: [184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184],
standard_rate: 184,
is_international: 1
});
console.log('✅ Canberra added: $184 USD/night');
} catch (err) {
if (err.message.includes('UNIQUE')) {
console.log(' Canberra already exists');
} else {
throw err;
}
}
}
async insertAccommodationRate(city) {
return new Promise((resolve, reject) => {
const sql = `
INSERT OR REPLACE INTO accommodation_rates (
city_key, city_name, province, country, region, currency,
jan_rate, feb_rate, mar_rate, apr_rate, may_rate, jun_rate,
jul_rate, aug_rate, sep_rate, oct_rate, nov_rate, dec_rate,
standard_rate, is_international
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
this.db.run(sql, [
city.city_key,
city.city_name,
city.province,
city.country,
city.region,
city.currency,
...city.rates,
city.standard_rate || city.rates[0],
city.is_international
], (err) => {
if (err) reject(err);
else resolve();
});
});
}
async buildSearchIndexes() {
console.log('🔍 Building search indexes...');
console.log(' Skipping FTS5 index population (can be done later if needed)');
console.log(' ✅ Standard indexes already created with tables');
return Promise.resolve();
}
async displayStats() {
console.log('\n📊 Database Statistics:');
const total = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates');
console.log(` Total cities: ${total}`);
const canadian = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 0');
console.log(` Canadian: ${canadian}`);
const international = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 1');
console.log(` International: ${international}`);
const canberra = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE city_key = "canberra"');
console.log(` Canberra found: ${canberra > 0 ? '✅ YES' : '❌ NO'}`);
if (canberra > 0) {
const rate = await this.getCanberraRate();
console.log(` Canberra rate: $${rate} USD/night`);
}
}
getCount(sql) {
return new Promise((resolve, reject) => {
this.db.get(sql, [], (err, row) => {
if (err) reject(err);
else resolve(row.count);
});
});
}
getCanberraRate() {
return new Promise((resolve, reject) => {
this.db.get('SELECT jan_rate FROM accommodation_rates WHERE city_key = "canberra"', [], (err, row) => {
if (err) reject(err);
else resolve(row ? row.jan_rate : null);
});
});
}
}
// Run migration
if (require.main === module) {
const migration = new DatabaseMigration();
migration.migrate().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});
}
module.exports = DatabaseMigration;

View File

@@ -0,0 +1,44 @@
import sqlite3
import json
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
print('\n=== SCRAPED DATABASE ANALYSIS ===\n')
# Tables
print('📊 Tables:')
tables = [row[0] for row in conn.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall()]
for table in tables:
count = conn.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
print(f' - {table}: {count} rows')
# Unique currencies
print('\n💰 Unique Currencies in rate_entries:')
currencies = [row[0] for row in conn.execute('SELECT DISTINCT currency FROM rate_entries WHERE currency IS NOT NULL ORDER BY currency').fetchall()]
print(f' {", ".join(currencies)}')
# Argentina data
print('\n🇦🇷 Argentina entries:')
for row in conn.execute('SELECT country, city, currency, rate_type, rate_amount FROM rate_entries WHERE country="Argentina" LIMIT 10').fetchall():
print(f' {row[0]} - {row[1]} - Currency: {row[2]} - {row[3]}: ${row[4]:.2f}')
# Sample rate entries by country
print('\n🌍 Sample entries by country (first 3 countries):')
for row in conn.execute('SELECT DISTINCT country FROM rate_entries WHERE country IS NOT NULL LIMIT 3').fetchall():
country = row[0]
print(f'\n {country}:')
for entry in conn.execute('SELECT city, currency, rate_type, rate_amount FROM rate_entries WHERE country=? LIMIT 3', (country,)).fetchall():
print(f' {entry[0]} - {entry[1]} - {entry[2]}: ${entry[3]:.2f}')
# Exchange rates
print('\n💱 Exchange rates:')
for row in conn.execute('SELECT currency, rate_to_cad, effective_date FROM exchange_rates WHERE currency IS NOT NULL LIMIT 10').fetchall():
print(f' {row[0]}: {row[1]:.4f} CAD (effective: {row[2]})')
# Accommodations
print('\n🏨 Accommodation entries (sample):')
for row in conn.execute('SELECT property_name, city, province, rate_amount, currency FROM accommodations WHERE rate_amount IS NOT NULL LIMIT 10').fetchall():
print(f' {row[0]} - {row[1]}, {row[2]} - ${row[3]:.2f} {row[4]}')
conn.close()
print('\n✅ Done!')

View File

@@ -0,0 +1,59 @@
const http = require('http');
async function testAPI() {
console.log('\n🧪 Testing Canberra Search API...\n');
const options = {
hostname: 'localhost',
port: 5001,
path: '/api/accommodation/search?city=canberra',
method: 'GET'
};
return new Promise((resolve, reject) => {
const req = http.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
try {
const json = JSON.parse(data);
if (json.city) {
console.log('✅ SUCCESS! Canberra Found:\n');
console.log(` City: ${json.city}`);
console.log(` Country: ${json.country}`);
console.log(` Region: ${json.region}`);
console.log(` Currency: ${json.currency}`);
console.log(` Standard Rate: $${json.rates.standard || json.rates[0]} ${json.currency}`);
console.log(` January Rate: $${json.rates[0]} ${json.currency}`);
console.log('\n🎉 CANBERRA IS 100% SEARCHABLE!\n');
} else if (json.error) {
console.log(`❌ API Error: ${json.error}`);
} else {
console.log('❓ Unexpected response:', json);
}
resolve();
} catch (err) {
console.error('❌ Failed to parse response:', err.message);
console.log('Raw response:', data);
reject(err);
}
});
});
req.on('error', (err) => {
console.error(`❌ Connection failed: ${err.message}`);
console.error('Make sure the server is running: node server.js');
reject(err);
});
req.end();
});
}
testAPI();

View File

@@ -0,0 +1,118 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
const db = new sqlite3.Database(dbPath, (err) => {
if (err) {
console.error('❌ Database connection failed:', err);
process.exit(1);
}
});
console.log('\n🧪 Testing Complete Travel Rates Database\n');
console.log('='.repeat(70));
// Test 1: Check Canberra complete data
console.log('\n1⃣ Testing Canberra (Australia):\n');
const canberraQuery = `
SELECT
city_name, country, region, currency,
standard_accommodation,
breakfast, lunch, dinner, total_meals,
incidentals, total_daily_allowance
FROM travel_rates
WHERE city_key = 'canberra'
`;
db.get(canberraQuery, [], (err, row) => {
if (err) {
console.error('❌ Query failed:', err);
db.close();
process.exit(1);
}
if (!row) {
console.log('❌ CANBERRA NOT FOUND!\n');
} else {
console.log(`${row.city_name}, ${row.country}`);
console.log(` Region: ${row.region}`);
console.log(` Currency: ${row.currency}\n`);
console.log(` 🏨 Accommodation: $${row.standard_accommodation}/night`);
console.log(` 🍳 Breakfast: $${row.breakfast}`);
console.log(` 🍱 Lunch: $${row.lunch}`);
console.log(` 🍽️ Dinner: $${row.dinner}`);
console.log(` 📝 Total Meals: $${row.total_meals}`);
console.log(` 💼 Incidentals: $${row.incidentals}`);
console.log(` 💰 Total Daily Allowance: $${row.total_daily_allowance}\n`);
const fullDayTotal = parseFloat(row.standard_accommodation) + parseFloat(row.total_daily_allowance);
console.log(` 🎯 FULL DAY COST (Accommodation + Per Diem): $${fullDayTotal.toFixed(2)} ${row.currency}\n`);
}
// Test 2: Sample Canadian city
console.log('2⃣ Testing Toronto (Canada):\n');
const torontoQuery = `
SELECT
city_name, country, province, currency,
jan_accommodation, feb_accommodation, mar_accommodation,
breakfast, lunch, dinner, total_meals,
incidentals, total_daily_allowance
FROM travel_rates
WHERE city_key = 'toronto'
`;
db.get(torontoQuery, [], (err, row) => {
if (err) {
console.error('❌ Query failed:', err);
db.close();
process.exit(1);
}
if (!row) {
console.log('❌ Toronto not found\n');
} else {
console.log(`${row.city_name}, ${row.province}`);
console.log(` Currency: ${row.currency}\n`);
console.log(` 🏨 Accommodation (Jan): $${row.jan_accommodation}/night`);
console.log(` 🏨 Accommodation (Feb): $${row.feb_accommodation}/night`);
console.log(` 🏨 Accommodation (Mar): $${row.mar_accommodation}/night`);
console.log(` 🍳 Breakfast: $${row.breakfast}`);
console.log(` 🍱 Lunch: $${row.lunch}`);
console.log(` 🍽️ Dinner: $${row.dinner}`);
console.log(` 💰 Total Daily Allowance: $${row.total_daily_allowance}\n`);
}
// Test 3: Count verification
console.log('3⃣ Database Statistics:\n');
const statsQuery = `
SELECT
COUNT(*) as total,
COUNT(CASE WHEN is_international = 0 THEN 1 END) as canadian,
COUNT(CASE WHEN is_international = 1 THEN 1 END) as international,
COUNT(DISTINCT country) as countries
FROM travel_rates
`;
db.get(statsQuery, [], (err, stats) => {
if (err) {
console.error('❌ Query failed:', err);
db.close();
process.exit(1);
}
console.log(` 📊 Total Cities: ${stats.total}`);
console.log(` 🇨🇦 Canadian: ${stats.canadian}`);
console.log(` 🌍 International: ${stats.international}`);
console.log(` 🗺️ Countries: ${stats.countries}\n`);
console.log('='.repeat(70));
console.log('\n✅ All tests passed! Database has complete accommodation + meal rates\n');
db.close();
});
});
});

65
scripts/testDatabase.js Normal file
View File

@@ -0,0 +1,65 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
console.log('🔍 Testing Database...\n');
console.log(`📁 Database path: ${dbPath}\n`);
const db = new sqlite3.Database(dbPath, (err) => {
if (err) {
console.error('❌ Failed to open database:', err);
process.exit(1);
}
});
// Test 1: Check if Canberra exists
db.get('SELECT * FROM accommodation_rates WHERE city_key = ?', ['canberra'], (err, row) => {
if (err) {
console.error('❌ Query failed:', err);
} else if (row) {
console.log('✅ CANBERRA FOUND!');
console.log(' City:', row.city_name);
console.log(' Country:', row.country);
console.log(' Region:', row.region);
console.log(' Jan Rate:', `$${row.jan_rate} ${row.currency}`);
console.log(' Standard Rate:', `$${row.standard_rate} ${row.currency}`);
console.log(' International:', row.is_international ? 'Yes' : 'No');
} else {
console.log('❌ CANBERRA NOT FOUND IN DATABASE!');
}
});
// Test 2: Count total cities
db.get('SELECT COUNT(*) as count FROM accommodation_rates', [], (err, row) => {
if (err) {
console.error('❌ Count query failed:', err);
} else {
console.log(`\n📊 Total cities in database: ${row.count}`);
}
});
// Test 3: List all Australian cities
db.all('SELECT city_key, city_name, standard_rate FROM accommodation_rates WHERE country = ?', ['Australia'], (err, rows) => {
if (err) {
console.error('❌ Australia query failed:', err);
} else {
console.log('\n🇦🇺 Australian cities:');
if (rows.length === 0) {
console.log(' ❌ No Australian cities found!');
} else {
rows.forEach(row => {
console.log(` - ${row.city_name}: $${row.standard_rate} USD`);
});
}
}
// Close database
db.close((err) => {
if (err) {
console.error('Error closing database:', err);
} else {
console.log('\n✅ Test complete!');
}
});
});

View File

@@ -0,0 +1,25 @@
import re
def _extract_currency_from_title(title):
"""Extract currency code from table title like 'Albania - Currency: Euro (EUR)'"""
if not title:
return None
# Pattern: "Currency: [Name] ([CODE])"
match = re.search(r"Currency:\s*[^(]+\(([A-Z]{3})\)", title)
if match:
return match.group(1)
return None
# Test cases
test_titles = [
"Argentina - Currency: Argentine Peso (ARS)",
"Albania - Currency: Euro (EUR)",
"Afghanistan - Currency: US Dollar (USD)",
"Canada - Currency: Canadian Dollar (CAD)",
None,
"Some random text"
]
for title in test_titles:
result = _extract_currency_from_title(title)
print(f"{title!r} -> {result}")

View File

@@ -0,0 +1,61 @@
"""Test currency extraction step by step"""
import sqlite3
import re
def _extract_currency_from_title(title):
"""Extract currency code from table title like 'Albania - Currency: Euro (EUR)'"""
if not title:
return None
# Pattern: "Currency: [Name] ([CODE])"
match = re.search(r"Currency:\s*[^(]+\(([A-Z]{3})\)", title)
if match:
return match.group(1)
return None
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
cursor = conn.cursor()
print("Testing currency extraction from stored titles:\n")
# Get Argentina table title
cursor.execute("""
SELECT title
FROM raw_tables
WHERE title LIKE '%Argentina%'
""")
row = cursor.fetchone()
if row:
title = row[0]
print(f"Argentina Title: {title}")
currency = _extract_currency_from_title(title)
print(f"Extracted Currency: {currency}")
# Get Albania table title
cursor.execute("""
SELECT title
FROM raw_tables
WHERE title LIKE '%Albania%'
""")
row = cursor.fetchone()
if row:
title = row[0]
print(f"\nAlbania Title: {title}")
currency = _extract_currency_from_title(title)
print(f"Extracted Currency: {currency}")
# Check what entries we actually have
cursor.execute("""
SELECT COUNT(*)
FROM rate_entries
WHERE currency IS NOT NULL
""")
print(f"\nTotal entries with currency: {cursor.fetchone()[0]}")
cursor.execute("""
SELECT COUNT(*)
FROM rate_entries
WHERE currency IS NULL
""")
print(f"Total entries WITHOUT currency: {cursor.fetchone()[0]}")
conn.close()

33
scripts/test_scraper.py Normal file
View File

@@ -0,0 +1,33 @@
"""Test the scraper extract_rate_entries function with debug output"""
import sys
sys.path.insert(0, 'src')
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source, extract_rate_entries
# Create a test source config
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
# Get just the first few tables
print("Fetching tables...")
tables = scrape_tables_from_source(source)
print(f"Got {len(tables)} tables")
# Check first table
first_table = tables[0]
print(f"\nFirst table:")
print(f" Index: {first_table['table_index']}")
print(f" Title: {first_table['title']}")
print(f" Data rows: {len(first_table['data'])}")
# Extract rate entries from just first table
print("\nExtracting rate entries from first table...")
entries = extract_rate_entries(source, [first_table])
print(f"Got {len(entries)} entries")
if entries:
print(f"\nFirst entry:")
print(f" Country: {entries[0]['country']}")
print(f" City: {entries[0]['city']}")
print(f" Currency: {entries[0]['currency']}")
print(f" Rate Type: {entries[0]['rate_type']}")
print(f" Rate Amount: {entries[0]['rate_amount']}")

View File

@@ -0,0 +1,58 @@
import sqlite3
import sys
db_path = "data/travel_rates_scraped.sqlite3"
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Count total entries
cursor.execute("SELECT COUNT(*) FROM rate_entries")
total = cursor.fetchone()[0]
print(f"Total rate entries: {total}")
# Count entries with currency
cursor.execute("SELECT COUNT(*) FROM rate_entries WHERE currency IS NOT NULL")
with_currency = cursor.fetchone()[0]
print(f"Entries with currency: {with_currency}")
print(f"Missing currency: {total - with_currency}")
# Currency distribution
print("\nCurrency Distribution:")
cursor.execute("""
SELECT currency, COUNT(*) as count
FROM rate_entries
GROUP BY currency
ORDER BY count DESC
""")
for row in cursor.fetchall():
currency = row[0] if row[0] else "NULL"
print(f" {currency}: {row[1]}")
# Sample entries with currency
print("\nSample Entries (Argentina ARS):")
cursor.execute("""
SELECT country, city, rate_type, rate_amount, currency
FROM rate_entries
WHERE country LIKE '%Argentina%'
LIMIT 5
""")
for row in cursor.fetchall():
print(f" {row[0]} | {row[1]} | {row[2]}: ${row[3]} {row[4]}")
print("\nSample Entries (Albania EUR):")
cursor.execute("""
SELECT country, city, rate_type, rate_amount, currency
FROM rate_entries
WHERE country LIKE '%Albania%'
LIMIT 5
""")
for row in cursor.fetchall():
print(f" {row[0]} | {row[1]} | {row[2]}: ${row[3]} {row[4]}")
conn.close()
except Exception as e:
print(f"Error: {e}")
sys.exit(1)