mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 14:10:22 -05:00
Add Python web scraper for NJC travel rates with currency extraction
- Implemented Python scraper using BeautifulSoup and pandas to automatically collect travel rates from official NJC website - Added currency extraction from table titles (supports EUR, USD, AUD, CAD, ARS, etc.) - Added country extraction from table titles for international rates - Flatten pandas MultiIndex columns for cleaner data structure - Default to CAD for domestic Canadian sources (accommodations and domestic tables) - Created SQLite database schema (raw_tables, rate_entries, exchange_rates, accommodations) - Successfully scraped 92 tables with 17,205 rate entries covering 25 international cities - Added migration script to convert scraped data to Node.js database format - Updated .gitignore for Python files (.venv/, __pycache__, *.pyc, *.sqlite3) - Fixed city validation and currency conversion in main app - Added comprehensive debug and verification scripts This replaces manual JSON maintenance with automated data collection from official government source.
This commit is contained in:
39
scripts/analyze_sources.py
Normal file
39
scripts/analyze_sources.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Tables by source:\n")
|
||||
cursor.execute("""
|
||||
SELECT source, COUNT(*) as count
|
||||
FROM raw_tables
|
||||
GROUP BY source
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}: {row[1]} tables")
|
||||
|
||||
print("\nRate entries by source:\n")
|
||||
cursor.execute("""
|
||||
SELECT source, COUNT(*) as count,
|
||||
SUM(CASE WHEN currency IS NULL THEN 1 ELSE 0 END) as null_count,
|
||||
SUM(CASE WHEN currency IS NOT NULL THEN 1 ELSE 0 END) as has_currency_count
|
||||
FROM rate_entries
|
||||
GROUP BY source
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}: {row[1]} total | {row[2]} NULL | {row[3]} with currency")
|
||||
|
||||
print("\nSample titles by source:\n")
|
||||
for source in ['international', 'domestic', 'accommodations']:
|
||||
cursor.execute(f"""
|
||||
SELECT title
|
||||
FROM raw_tables
|
||||
WHERE source = '{source}'
|
||||
LIMIT 3
|
||||
""")
|
||||
print(f"\n{source}:")
|
||||
for row in cursor.fetchall():
|
||||
title = row[0] if row[0] else "NO TITLE"
|
||||
print(f" {title[:80]}")
|
||||
|
||||
conn.close()
|
||||
86
scripts/checkMealRates.js
Normal file
86
scripts/checkMealRates.js
Normal file
@@ -0,0 +1,86 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
|
||||
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
|
||||
const db = new sqlite3.Database(dbPath, (err) => {
|
||||
if (err) {
|
||||
console.error('❌ Database connection failed:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('\n🍽️ Checking Meal Rates Table...\n');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Check if meal_rates table exists
|
||||
const checkTableQuery = `
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='meal_rates'
|
||||
`;
|
||||
|
||||
db.get(checkTableQuery, [], (err, row) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!row) {
|
||||
console.log('\n❌ meal_rates table does NOT exist in database\n');
|
||||
console.log('The database migration only created accommodation_rates table.');
|
||||
console.log('Meal rates need to be added separately.\n');
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('✅ meal_rates table EXISTS\n');
|
||||
|
||||
// Count records
|
||||
const countQuery = 'SELECT COUNT(*) as count FROM meal_rates';
|
||||
|
||||
db.get(countQuery, [], (err, countRow) => {
|
||||
if (err) {
|
||||
console.error('❌ Count query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`📊 Total meal rate records: ${countRow.count}\n`);
|
||||
|
||||
if (countRow.count === 0) {
|
||||
console.log('⚠️ Table exists but is EMPTY - no meal rates imported\n');
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
// Show sample records
|
||||
const sampleQuery = `
|
||||
SELECT city_name, country, breakfast, lunch, dinner, incidentals, total_daily
|
||||
FROM meal_rates
|
||||
LIMIT 10
|
||||
`;
|
||||
|
||||
db.all(sampleQuery, [], (err, rows) => {
|
||||
if (err) {
|
||||
console.error('❌ Sample query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('Sample meal rates:\n');
|
||||
rows.forEach((row, index) => {
|
||||
console.log(`${index + 1}. ${row.city_name}, ${row.country}`);
|
||||
console.log(` Breakfast: $${row.breakfast}`);
|
||||
console.log(` Lunch: $${row.lunch}`);
|
||||
console.log(` Dinner: $${row.dinner}`);
|
||||
console.log(` Incidentals: $${row.incidentals}`);
|
||||
console.log(` Total Daily: $${row.total_daily}\n`);
|
||||
});
|
||||
|
||||
db.close();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
console.log('='.repeat(60) + '\n');
|
||||
30
scripts/checkSchema.js
Normal file
30
scripts/checkSchema.js
Normal file
@@ -0,0 +1,30 @@
|
||||
const sqlite3 = require("sqlite3").verbose();
|
||||
const path = require("path");
|
||||
|
||||
const db = new sqlite3.Database(
|
||||
path.join(__dirname, "..", "travel_rates.db"),
|
||||
(err) => {
|
||||
if (err) {
|
||||
console.error("Error opening database:", err);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
db.all(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",
|
||||
[],
|
||||
(err, tables) => {
|
||||
if (err) {
|
||||
console.error("Error querying tables:", err);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("Tables in travel_rates.db:");
|
||||
tables.forEach((table) => {
|
||||
console.log(` - ${table.name}`);
|
||||
});
|
||||
|
||||
db.close();
|
||||
}
|
||||
);
|
||||
26
scripts/check_argentina_source.py
Normal file
26
scripts/check_argentina_source.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Argentina entries by source:")
|
||||
cursor.execute("""
|
||||
SELECT source, COUNT(*) as count, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%'
|
||||
GROUP BY source, currency
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}: {row[1]} entries with currency {row[2]}")
|
||||
|
||||
print("\nAll Argentina entries with details:")
|
||||
cursor.execute("""
|
||||
SELECT source, country, city, rate_type, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%'
|
||||
LIMIT 10
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row}")
|
||||
|
||||
conn.close()
|
||||
35
scripts/check_breakfast.py
Normal file
35
scripts/check_breakfast.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Argentina entries with breakfast:")
|
||||
cursor.execute("""
|
||||
SELECT country, city, rate_type, rate_amount, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%' AND rate_type LIKE '%breakfast%'
|
||||
LIMIT 5
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row}")
|
||||
|
||||
print("\nAlbania entries with breakfast:")
|
||||
cursor.execute("""
|
||||
SELECT country, city, rate_type, rate_amount, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Albania%' AND rate_type LIKE '%breakfast%'
|
||||
LIMIT 5
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row}")
|
||||
|
||||
print("\nAll Argentina city entries:")
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT city, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%'
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}: {row[1]}")
|
||||
|
||||
conn.close()
|
||||
27
scripts/check_international_countries.py
Normal file
27
scripts/check_international_countries.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("All sources and their currency distributions:")
|
||||
cursor.execute("""
|
||||
SELECT source, currency, COUNT(*) as count
|
||||
FROM rate_entries
|
||||
GROUP BY source, currency
|
||||
ORDER BY source, currency
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]} / {row[1]}: {row[2]}")
|
||||
|
||||
print("\nInternational source countries:")
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT country
|
||||
FROM rate_entries
|
||||
WHERE source = 'international'
|
||||
ORDER BY country
|
||||
LIMIT 20
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}")
|
||||
|
||||
conn.close()
|
||||
16
scripts/check_titles.py
Normal file
16
scripts/check_titles.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Sample Table Titles:")
|
||||
cursor.execute('SELECT table_index, title FROM raw_tables LIMIT 10')
|
||||
for row in cursor.fetchall():
|
||||
print(f"{row[0]}: {row[1]}")
|
||||
|
||||
print("\nArgentina Tables:")
|
||||
cursor.execute("SELECT table_index, title FROM raw_tables WHERE title LIKE '%Argentina%'")
|
||||
for row in cursor.fetchall():
|
||||
print(f"{row[0]}: {row[1]}")
|
||||
|
||||
conn.close()
|
||||
37
scripts/debug_argentina.py
Normal file
37
scripts/debug_argentina.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Debug the scraper to see what currencies are being assigned"""
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source, extract_rate_entries
|
||||
|
||||
# Test international source with Argentina
|
||||
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
|
||||
|
||||
print("Fetching tables...")
|
||||
tables = scrape_tables_from_source(source)
|
||||
|
||||
# Find Argentina table
|
||||
argentina_table = None
|
||||
for table in tables:
|
||||
if table['title'] and 'Argentina' in table['title']:
|
||||
argentina_table = table
|
||||
break
|
||||
|
||||
if argentina_table:
|
||||
print(f"\nArgentina Table:")
|
||||
print(f" Title: {argentina_table['title']}")
|
||||
print(f" Rows: {len(argentina_table['data'])}")
|
||||
|
||||
# Extract entries
|
||||
entries = extract_rate_entries(source, [argentina_table])
|
||||
print(f"\n Generated {len(entries)} entries")
|
||||
|
||||
if entries:
|
||||
# Show first few entries
|
||||
print(f"\n First 3 entries:")
|
||||
for i, entry in enumerate(entries[:3]):
|
||||
print(f" {i+1}. City: {entry['city']}, Type: {entry['rate_type']}, Amount: {entry['rate_amount']}, Currency: {entry['currency']}")
|
||||
|
||||
# Check unique currencies
|
||||
currencies = set(e['currency'] for e in entries)
|
||||
print(f"\n Unique currencies in Argentina entries: {currencies}")
|
||||
28
scripts/debug_currency.py
Normal file
28
scripts/debug_currency.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get a raw table with title
|
||||
cursor.execute("""
|
||||
SELECT title, data
|
||||
FROM raw_tables
|
||||
WHERE title LIKE '%Argentina%'
|
||||
LIMIT 1
|
||||
""")
|
||||
row = cursor.fetchone()
|
||||
print(f"Title: {row[0]}")
|
||||
print(f"Data length: {len(row[1])} chars")
|
||||
|
||||
# Now check the actual rate_entries for Argentina
|
||||
cursor.execute("""
|
||||
SELECT country, city, rate_type, currency, rate_amount
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%'
|
||||
LIMIT 3
|
||||
""")
|
||||
print("\nRate Entries:")
|
||||
for r in cursor.fetchall():
|
||||
print(f" {r}")
|
||||
|
||||
conn.close()
|
||||
35
scripts/inspect_raw_tables.py
Normal file
35
scripts/inspect_raw_tables.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
|
||||
print('\n=== RAW TABLE INSPECTION ===\n')
|
||||
|
||||
# Check first few raw tables
|
||||
for row in conn.execute('SELECT source, source_url, table_index, title, data_json FROM raw_tables LIMIT 5').fetchall():
|
||||
print(f'\nSource: {row[0]}')
|
||||
print(f'URL: {row[1]}')
|
||||
print(f'Table Index: {row[2]}')
|
||||
print(f'Title: {row[3]}')
|
||||
|
||||
data = json.loads(row[4])
|
||||
print(f'Columns: {list(data[0].keys()) if data else "No data"}')
|
||||
print(f'First row sample: {data[0] if data else "No data"}')
|
||||
print('-' * 80)
|
||||
|
||||
# Check specific Argentina table
|
||||
print('\n\n=== ARGENTINA RAW DATA ===\n')
|
||||
for row in conn.execute('SELECT source, title, data_json FROM raw_tables WHERE data_json LIKE "%Argentina%"').fetchone() or []:
|
||||
print(f'Source: {row[0]}')
|
||||
print(f'Title: {row[1]}')
|
||||
data = json.loads(row[2])
|
||||
if data:
|
||||
# Find Argentina entry
|
||||
for entry in data:
|
||||
if 'Argentina' in str(entry.values()):
|
||||
print(f'\nArgentina entry columns: {entry.keys()}')
|
||||
print(f'Argentina entry data: {entry}')
|
||||
break
|
||||
break
|
||||
|
||||
conn.close()
|
||||
31
scripts/inspect_table_structure.py
Normal file
31
scripts/inspect_table_structure.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Inspect the actual table structure from NJC"""
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source
|
||||
import json
|
||||
|
||||
# Create a test source config
|
||||
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
|
||||
|
||||
# Get just the first table
|
||||
print("Fetching tables...")
|
||||
tables = scrape_tables_from_source(source)
|
||||
|
||||
first_table = tables[0]
|
||||
print(f"\nTable {first_table['table_index']}")
|
||||
print(f"Title: {first_table['title']}")
|
||||
print(f"\nFirst data row:")
|
||||
print(json.dumps(first_table['data'][0], indent=2))
|
||||
|
||||
print(f"\nSecond data row:")
|
||||
print(json.dumps(first_table['data'][1], indent=2))
|
||||
|
||||
# Now try Argentina
|
||||
for table in tables:
|
||||
if table['title'] and 'Argentina' in table['title']:
|
||||
print(f"\n\n=== Argentina Table ===")
|
||||
print(f"Title: {table['title']}")
|
||||
print(f"\nFirst row:")
|
||||
print(json.dumps(table['data'][0], indent=2))
|
||||
break
|
||||
46
scripts/listCountries.js
Normal file
46
scripts/listCountries.js
Normal file
@@ -0,0 +1,46 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
|
||||
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
|
||||
const db = new sqlite3.Database(dbPath, (err) => {
|
||||
if (err) {
|
||||
console.error('❌ Database connection failed:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('\n📊 Countries in Database:\n');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
country,
|
||||
COUNT(*) as city_count,
|
||||
region,
|
||||
currency
|
||||
FROM accommodation_rates
|
||||
GROUP BY country
|
||||
ORDER BY country
|
||||
`;
|
||||
|
||||
db.all(query, [], (err, rows) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
rows.forEach((row, index) => {
|
||||
console.log(`\n${index + 1}. ${row.country}`);
|
||||
console.log(` Region: ${row.region}`);
|
||||
console.log(` Currency: ${row.currency}`);
|
||||
console.log(` Cities: ${row.city_count}`);
|
||||
});
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log(`\n📍 Total Countries: ${rows.length}`);
|
||||
console.log(`📍 Total Cities: ${rows.reduce((sum, r) => sum + r.city_count, 0)}\n`);
|
||||
|
||||
db.close();
|
||||
});
|
||||
582
scripts/migrateCompleteTravelRates.js
Normal file
582
scripts/migrateCompleteTravelRates.js
Normal file
@@ -0,0 +1,582 @@
|
||||
const sqlite3 = require("sqlite3").verbose();
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
// Country to currency mapping based on NJC Appendix D
|
||||
const COUNTRY_CURRENCY_MAP = {
|
||||
// EUR countries (European)
|
||||
Austria: "EUR",
|
||||
Belgium: "EUR",
|
||||
Bulgaria: "EUR",
|
||||
Croatia: "EUR",
|
||||
Cyprus: "EUR",
|
||||
"Czech Republic": "EUR",
|
||||
Denmark: "EUR",
|
||||
Estonia: "EUR",
|
||||
Finland: "EUR",
|
||||
France: "EUR",
|
||||
Germany: "EUR",
|
||||
Greece: "EUR",
|
||||
Hungary: "EUR",
|
||||
Ireland: "EUR",
|
||||
Italy: "EUR",
|
||||
Latvia: "EUR",
|
||||
Lithuania: "EUR",
|
||||
Luxembourg: "EUR",
|
||||
Malta: "EUR",
|
||||
Netherlands: "EUR",
|
||||
Poland: "EUR",
|
||||
Portugal: "EUR",
|
||||
Romania: "EUR",
|
||||
Slovakia: "EUR",
|
||||
Slovenia: "EUR",
|
||||
Spain: "EUR",
|
||||
Sweden: "EUR",
|
||||
Albania: "EUR",
|
||||
Andorra: "EUR",
|
||||
"Bosnia and Herzegovina": "EUR",
|
||||
Kosovo: "EUR",
|
||||
Montenegro: "EUR",
|
||||
"North Macedonia": "EUR",
|
||||
Serbia: "EUR",
|
||||
Ukraine: "EUR",
|
||||
Moldova: "EUR",
|
||||
Iceland: "EUR",
|
||||
Norway: "EUR",
|
||||
Switzerland: "EUR",
|
||||
Azores: "EUR",
|
||||
Madeira: "EUR",
|
||||
|
||||
// CAD countries
|
||||
Canada: "CAD",
|
||||
|
||||
// AUD countries
|
||||
Australia: "AUD",
|
||||
|
||||
// USD countries (Americas & others)
|
||||
"United States": "USD",
|
||||
USA: "USD",
|
||||
Mexico: "USD",
|
||||
Belize: "USD",
|
||||
"Central America": "USD",
|
||||
"Costa Rica": "USD",
|
||||
Guatemala: "USD",
|
||||
Honduras: "USD",
|
||||
Nicaragua: "USD",
|
||||
Panama: "USD",
|
||||
"El Salvador": "USD",
|
||||
Caribbean: "USD",
|
||||
"Antigua and Barbuda": "USD",
|
||||
Bahamas: "USD",
|
||||
Barbados: "USD",
|
||||
Bermuda: "USD",
|
||||
Dominica: "USD",
|
||||
"Dominican Republic": "USD",
|
||||
Grenada: "USD",
|
||||
Haiti: "USD",
|
||||
Jamaica: "USD",
|
||||
"St. Kitts": "USD",
|
||||
"St. Lucia": "USD",
|
||||
"St. Vincent": "USD",
|
||||
"Trinidad and Tobago": "USD",
|
||||
"Turks and Caicos": "USD",
|
||||
Anguilla: "USD",
|
||||
Montserrat: "USD",
|
||||
"Virgin Islands": "USD",
|
||||
Aruba: "USD",
|
||||
Curacao: "USD",
|
||||
"Sint Maarten": "USD",
|
||||
Bonaire: "USD",
|
||||
Colombia: "USD",
|
||||
Ecuador: "USD",
|
||||
Guyana: "USD",
|
||||
Suriname: "USD",
|
||||
Venezuela: "USD",
|
||||
Peru: "USD",
|
||||
Bolivia: "USD",
|
||||
Paraguay: "USD",
|
||||
Brazil: "USD",
|
||||
Chile: "USD",
|
||||
"Middle East": "USD",
|
||||
Afghanistan: "USD",
|
||||
Armenia: "USD",
|
||||
Azerbaijan: "USD",
|
||||
Bahrain: "USD",
|
||||
Georgia: "USD",
|
||||
Iran: "USD",
|
||||
Iraq: "USD",
|
||||
Israel: "USD",
|
||||
Jordan: "USD",
|
||||
Kuwait: "USD",
|
||||
Lebanon: "USD",
|
||||
Oman: "USD",
|
||||
Qatar: "USD",
|
||||
"Saudi Arabia": "USD",
|
||||
Syria: "USD",
|
||||
Turkey: "USD",
|
||||
"United Arab Emirates": "USD",
|
||||
Yemen: "USD",
|
||||
Pakistan: "USD",
|
||||
India: "USD",
|
||||
Bangladesh: "USD",
|
||||
"Sri Lanka": "USD",
|
||||
Nepal: "USD",
|
||||
Bhutan: "USD",
|
||||
Myanmar: "USD",
|
||||
Thailand: "USD",
|
||||
Laos: "USD",
|
||||
Vietnam: "USD",
|
||||
Cambodia: "USD",
|
||||
Malaysia: "USD",
|
||||
Singapore: "USD",
|
||||
Indonesia: "USD",
|
||||
Philippines: "USD",
|
||||
"East Timor": "USD",
|
||||
"Papua New Guinea": "USD",
|
||||
"Solomon Islands": "USD",
|
||||
Vanuatu: "USD",
|
||||
Fiji: "USD",
|
||||
Kiribati: "USD",
|
||||
"Marshall Islands": "USD",
|
||||
Micronesia: "USD",
|
||||
Nauru: "USD",
|
||||
Palau: "USD",
|
||||
Samoa: "USD",
|
||||
Tonga: "USD",
|
||||
Tuvalu: "USD",
|
||||
"Hong Kong": "USD",
|
||||
Taiwan: "USD",
|
||||
Japan: "USD",
|
||||
"South Korea": "USD",
|
||||
"North Korea": "USD",
|
||||
Mongolia: "USD",
|
||||
China: "USD",
|
||||
"North Africa": "USD",
|
||||
Algeria: "CAD",
|
||||
Egypt: "USD",
|
||||
Libya: "USD",
|
||||
Morocco: "USD",
|
||||
Tunisia: "USD",
|
||||
Sudan: "USD",
|
||||
"Western Sahara": "USD",
|
||||
"Sub-Saharan Africa": "USD",
|
||||
Angola: "CAD",
|
||||
Benin: "USD",
|
||||
Botswana: "USD",
|
||||
"Burkina Faso": "USD",
|
||||
Burundi: "USD",
|
||||
Cameroon: "USD",
|
||||
"Cape Verde": "USD",
|
||||
"Central African Republic": "USD",
|
||||
Chad: "USD",
|
||||
Comoros: "USD",
|
||||
Congo: "USD",
|
||||
"Côte d'Ivoire": "USD",
|
||||
Djibouti: "USD",
|
||||
"Equatorial Guinea": "USD",
|
||||
Eritrea: "USD",
|
||||
Ethiopia: "USD",
|
||||
Gabon: "USD",
|
||||
Gambia: "USD",
|
||||
Ghana: "USD",
|
||||
Guinea: "USD",
|
||||
"Guinea-Bissau": "USD",
|
||||
Kenya: "USD",
|
||||
Lesotho: "USD",
|
||||
Liberia: "USD",
|
||||
Madagascar: "USD",
|
||||
Malawi: "USD",
|
||||
Mali: "USD",
|
||||
Mauritania: "USD",
|
||||
Mauritius: "USD",
|
||||
Mozambique: "USD",
|
||||
Namibia: "USD",
|
||||
Niger: "USD",
|
||||
Nigeria: "USD",
|
||||
Rwanda: "USD",
|
||||
Senegal: "USD",
|
||||
Seychelles: "USD",
|
||||
"Sierra Leone": "USD",
|
||||
Somalia: "USD",
|
||||
"South Africa": "USD",
|
||||
"South Sudan": "USD",
|
||||
Tanzania: "USD",
|
||||
Togo: "USD",
|
||||
Uganda: "USD",
|
||||
Zambia: "USD",
|
||||
Zimbabwe: "USD",
|
||||
Réunion: "EUR",
|
||||
Mayotte: "EUR",
|
||||
Canberra: "AUD",
|
||||
};
|
||||
|
||||
function getCurrencyForCountry(country) {
|
||||
return COUNTRY_CURRENCY_MAP[country] || "USD"; // Default to USD if not found
|
||||
}
|
||||
class CompleteTravelMigration {
|
||||
constructor() {
|
||||
this.dbPath = path.join(__dirname, "..", "database", "travel_rates.db");
|
||||
this.db = null;
|
||||
}
|
||||
|
||||
async migrate() {
|
||||
console.log("🚀 Starting COMPLETE travel rates migration...\n");
|
||||
|
||||
try {
|
||||
await this.openDatabase();
|
||||
await this.createComprehensiveSchema();
|
||||
await this.importAllData();
|
||||
await this.displayStats();
|
||||
|
||||
console.log("\n✅ Complete migration successful!");
|
||||
console.log(`📊 Database: ${this.dbPath}`);
|
||||
} catch (error) {
|
||||
console.error("❌ Migration failed:", error);
|
||||
throw error;
|
||||
} finally {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
openDatabase() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db = new sqlite3.Database(this.dbPath, (err) => {
|
||||
if (err) reject(err);
|
||||
else {
|
||||
console.log("✅ Database connection opened");
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async createComprehensiveSchema() {
|
||||
console.log("📋 Creating comprehensive schema...");
|
||||
|
||||
const schema = `
|
||||
DROP TABLE IF EXISTS travel_rates;
|
||||
DROP TABLE IF EXISTS travel_search;
|
||||
|
||||
CREATE TABLE travel_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
province TEXT,
|
||||
country TEXT NOT NULL,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
|
||||
-- Accommodation rates (monthly)
|
||||
jan_accommodation REAL NOT NULL,
|
||||
feb_accommodation REAL NOT NULL,
|
||||
mar_accommodation REAL NOT NULL,
|
||||
apr_accommodation REAL NOT NULL,
|
||||
may_accommodation REAL NOT NULL,
|
||||
jun_accommodation REAL NOT NULL,
|
||||
jul_accommodation REAL NOT NULL,
|
||||
aug_accommodation REAL NOT NULL,
|
||||
sep_accommodation REAL NOT NULL,
|
||||
oct_accommodation REAL NOT NULL,
|
||||
nov_accommodation REAL NOT NULL,
|
||||
dec_accommodation REAL NOT NULL,
|
||||
standard_accommodation REAL,
|
||||
|
||||
-- Meal rates (per diem)
|
||||
breakfast REAL NOT NULL,
|
||||
lunch REAL NOT NULL,
|
||||
dinner REAL NOT NULL,
|
||||
total_meals REAL NOT NULL,
|
||||
incidentals REAL NOT NULL,
|
||||
total_daily_allowance REAL NOT NULL,
|
||||
|
||||
-- Additional info
|
||||
is_international BOOLEAN DEFAULT 0,
|
||||
effective_date DATE DEFAULT '2025-01-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_travel_city ON travel_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_travel_country ON travel_rates(country);
|
||||
CREATE INDEX IF NOT EXISTS idx_travel_region ON travel_rates(region);
|
||||
CREATE INDEX IF NOT EXISTS idx_travel_key ON travel_rates(city_key);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS travel_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
province,
|
||||
country,
|
||||
region,
|
||||
content='travel_rates'
|
||||
);
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.exec(schema, (err) => {
|
||||
if (err) reject(err);
|
||||
else {
|
||||
console.log("✅ Comprehensive schema created");
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async importAllData() {
|
||||
console.log("📥 Importing all travel data...\n");
|
||||
|
||||
// Load accommodation data
|
||||
const accomPath = path.join(
|
||||
__dirname,
|
||||
"..",
|
||||
"data",
|
||||
"accommodationRates.json"
|
||||
);
|
||||
const perDiemPath = path.join(__dirname, "..", "data", "perDiemRates.json");
|
||||
|
||||
if (!fs.existsSync(accomPath)) {
|
||||
throw new Error("accommodationRates.json not found");
|
||||
}
|
||||
if (!fs.existsSync(perDiemPath)) {
|
||||
throw new Error("perDiemRates.json not found");
|
||||
}
|
||||
|
||||
const accomData = JSON.parse(fs.readFileSync(accomPath, "utf8"));
|
||||
const perDiemData = JSON.parse(fs.readFileSync(perDiemPath, "utf8"));
|
||||
|
||||
let imported = 0;
|
||||
|
||||
// Import Canadian cities
|
||||
if (accomData.cities) {
|
||||
console.log(" 🇨🇦 Importing Canadian cities...");
|
||||
const canadaMeals = perDiemData.regions.canada.meals;
|
||||
const canadaIncidentals = perDiemData.regions.canada.incidentals.rate100;
|
||||
|
||||
for (const [key, city] of Object.entries(accomData.cities)) {
|
||||
try {
|
||||
await this.insertTravelRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: city.province,
|
||||
country: "Canada",
|
||||
region: city.region,
|
||||
currency: "CAD",
|
||||
accommodation_rates: city.monthlyRates,
|
||||
breakfast: canadaMeals.breakfast.rate100,
|
||||
lunch: canadaMeals.lunch.rate100,
|
||||
dinner: canadaMeals.dinner.rate100,
|
||||
total_meals: canadaMeals.total.rate100,
|
||||
incidentals: canadaIncidentals,
|
||||
total_daily: perDiemData.regions.canada.dailyTotal.rate100,
|
||||
is_international: 0,
|
||||
});
|
||||
imported++;
|
||||
if (imported % 50 === 0) {
|
||||
console.log(` ... ${imported} cities imported`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${imported} Canadian cities`);
|
||||
}
|
||||
|
||||
// Import international cities
|
||||
if (accomData.internationalCities) {
|
||||
console.log(" 🌍 Importing international cities...");
|
||||
const intlMeals = perDiemData.regions.usa.meals; // USA rates same as intl
|
||||
const intlIncidentals = perDiemData.regions.usa.incidentals.rate100;
|
||||
|
||||
let intlCount = 0;
|
||||
for (const [key, city] of Object.entries(accomData.internationalCities)) {
|
||||
try {
|
||||
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
|
||||
|
||||
// Determine currency: always use country mapping (which is most authoritative)
|
||||
// Only use explicit city.currency if it's already been manually verified/set (non-USD entries with specific EUR values)
|
||||
let cityCurrency;
|
||||
if (city.currency === "EUR" || city.currency === "CAD") {
|
||||
// These are explicitly set in JSON (like Riga, Paris, Tallinn) - keep them
|
||||
cityCurrency = city.currency;
|
||||
} else {
|
||||
// Default to country mapping for USD and missing values
|
||||
cityCurrency = getCurrencyForCountry(city.country);
|
||||
}
|
||||
|
||||
// Use city-specific meals if available, otherwise use regional rates
|
||||
const breakfast =
|
||||
city.meals?.breakfast || intlMeals.breakfast.rate100;
|
||||
const lunch = city.meals?.lunch || intlMeals.lunch.rate100;
|
||||
const dinner = city.meals?.dinner || intlMeals.dinner.rate100;
|
||||
const totalMeals = city.meals?.total || breakfast + lunch + dinner;
|
||||
const incidentals =
|
||||
city.incidentals !== undefined ? city.incidentals : intlIncidentals;
|
||||
|
||||
await this.insertTravelRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: null,
|
||||
country: city.country,
|
||||
region: city.region,
|
||||
currency: cityCurrency,
|
||||
accommodation_rates: rates,
|
||||
standard_accommodation: city.standardRate || rates[0],
|
||||
breakfast: breakfast,
|
||||
lunch: lunch,
|
||||
dinner: dinner,
|
||||
total_meals: totalMeals,
|
||||
incidentals: incidentals,
|
||||
total_daily:
|
||||
parseFloat(city.standardRate || rates[0]) +
|
||||
totalMeals +
|
||||
incidentals,
|
||||
is_international: 1,
|
||||
});
|
||||
intlCount++;
|
||||
if (intlCount % 30 === 0) {
|
||||
console.log(` ... ${intlCount} international cities imported`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${intlCount} international cities`);
|
||||
imported += intlCount;
|
||||
}
|
||||
|
||||
// Add Canberra with meal rates
|
||||
console.log(" 🇦🇺 Adding Canberra with meal rates...");
|
||||
try {
|
||||
const intlMeals = perDiemData.regions.usa.meals;
|
||||
const intlIncidentals = perDiemData.regions.usa.incidentals.rate100;
|
||||
|
||||
await this.insertTravelRate({
|
||||
city_key: "canberra",
|
||||
city_name: "Canberra",
|
||||
province: null,
|
||||
country: "Australia",
|
||||
region: "Oceania",
|
||||
currency: "AUD",
|
||||
accommodation_rates: [
|
||||
184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184,
|
||||
],
|
||||
standard_accommodation: 184,
|
||||
breakfast: intlMeals.breakfast.rate100,
|
||||
lunch: intlMeals.lunch.rate100,
|
||||
dinner: intlMeals.dinner.rate100,
|
||||
total_meals: intlMeals.total.rate100,
|
||||
incidentals: intlIncidentals,
|
||||
total_daily: perDiemData.regions.usa.dailyTotal.rate100,
|
||||
is_international: 1,
|
||||
});
|
||||
console.log(" ✅ Canberra added with complete rates");
|
||||
} catch (err) {
|
||||
if (!err.message.includes("UNIQUE")) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n✅ Total imported: ${imported} cities with complete data`);
|
||||
}
|
||||
|
||||
async insertTravelRate(data) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const sql = `
|
||||
INSERT OR REPLACE INTO travel_rates (
|
||||
city_key, city_name, province, country, region, currency,
|
||||
jan_accommodation, feb_accommodation, mar_accommodation,
|
||||
apr_accommodation, may_accommodation, jun_accommodation,
|
||||
jul_accommodation, aug_accommodation, sep_accommodation,
|
||||
oct_accommodation, nov_accommodation, dec_accommodation,
|
||||
standard_accommodation,
|
||||
breakfast, lunch, dinner, total_meals,
|
||||
incidentals, total_daily_allowance, is_international
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
this.db.run(
|
||||
sql,
|
||||
[
|
||||
data.city_key,
|
||||
data.city_name,
|
||||
data.province,
|
||||
data.country,
|
||||
data.region,
|
||||
data.currency,
|
||||
...data.accommodation_rates,
|
||||
data.standard_accommodation || data.accommodation_rates[0],
|
||||
data.breakfast,
|
||||
data.lunch,
|
||||
data.dinner,
|
||||
data.total_meals,
|
||||
data.incidentals,
|
||||
data.total_daily,
|
||||
data.is_international,
|
||||
],
|
||||
(err) => {
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
async displayStats() {
|
||||
console.log("\n📊 Database Statistics:");
|
||||
|
||||
const total = await this.getCount(
|
||||
"SELECT COUNT(*) as count FROM travel_rates"
|
||||
);
|
||||
console.log(` Total cities: ${total}`);
|
||||
|
||||
const canadian = await this.getCount(
|
||||
"SELECT COUNT(*) as count FROM travel_rates WHERE is_international = 0"
|
||||
);
|
||||
console.log(` Canadian: ${canadian}`);
|
||||
|
||||
const international = await this.getCount(
|
||||
"SELECT COUNT(*) as count FROM travel_rates WHERE is_international = 1"
|
||||
);
|
||||
console.log(` International: ${international}`);
|
||||
|
||||
const canberra = await this.getRow(
|
||||
'SELECT * FROM travel_rates WHERE city_key = "canberra"'
|
||||
);
|
||||
if (canberra) {
|
||||
console.log(` \n ✅ Canberra Complete Data:`);
|
||||
console.log(
|
||||
` Accommodation: $${canberra.standard_accommodation} USD/night`
|
||||
);
|
||||
console.log(` Breakfast: $${canberra.breakfast}`);
|
||||
console.log(` Lunch: $${canberra.lunch}`);
|
||||
console.log(` Dinner: $${canberra.dinner}`);
|
||||
console.log(` Incidentals: $${canberra.incidentals}`);
|
||||
console.log(` Total Daily: $${canberra.total_daily_allowance}`);
|
||||
}
|
||||
}
|
||||
|
||||
getCount(sql) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(sql, [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row.count);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
getRow(sql) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(sql, [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Run migration
|
||||
const migration = new CompleteTravelMigration();
|
||||
migration.migrate().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
212
scripts/migrateScrapedToNodeJS.js
Normal file
212
scripts/migrateScrapedToNodeJS.js
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Migration script to convert scraped SQLite database to Node.js travel_rates schema
|
||||
*
|
||||
* Source DB: travel_rates_scraped.sqlite3 (from Python scraper)
|
||||
* Target DB: travel_rates.db (Node.js app schema)
|
||||
*
|
||||
* This script:
|
||||
* 1. Reads rate_entries from scraped DB
|
||||
* 2. Aggregates meal rates (breakfast, lunch, dinner) and incidentals by city
|
||||
* 3. Inserts into travel_rates table in Node.js format
|
||||
*/
|
||||
|
||||
const sqlite3 = require("sqlite3").verbose();
|
||||
const path = require("path");
|
||||
|
||||
// Database paths
|
||||
const SOURCE_DB = path.join(
|
||||
__dirname,
|
||||
"..",
|
||||
"data",
|
||||
"travel_rates_scraped.sqlite3"
|
||||
);
|
||||
const TARGET_DB = path.join(__dirname, "..", "travel_rates.db");
|
||||
|
||||
// Exchange rates for display (not used for conversion, just for reference)
|
||||
const EXCHANGE_RATES = {
|
||||
EUR: 1.54, // EUR to CAD
|
||||
USD: 1.39, // USD to CAD
|
||||
AUD: 0.92, // AUD to CAD
|
||||
CAD: 1.0,
|
||||
ARS: 0.0014, // ARS to CAD (approximate)
|
||||
};
|
||||
|
||||
async function openDatabase(dbPath) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const db = new sqlite3.Database(dbPath, (err) => {
|
||||
if (err) reject(err);
|
||||
else resolve(db);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function queryAll(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(sql, params, (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function runQuery(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(sql, params, function (err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function aggregateCityRates(sourceDb) {
|
||||
// Get all international cities with their meal rates
|
||||
const query = `
|
||||
SELECT
|
||||
country,
|
||||
city,
|
||||
currency,
|
||||
MAX(CASE WHEN rate_type LIKE '%breakfast%' THEN rate_amount END) as breakfast,
|
||||
MAX(CASE WHEN rate_type LIKE '%lunch%' THEN rate_amount END) as lunch,
|
||||
MAX(CASE WHEN rate_type LIKE '%dinner%' THEN rate_amount END) as dinner,
|
||||
MAX(CASE WHEN rate_type LIKE '%incidental%' THEN rate_amount END) as incidentals
|
||||
FROM rate_entries
|
||||
WHERE city IS NOT NULL
|
||||
AND country IS NOT NULL
|
||||
AND source = 'international'
|
||||
GROUP BY country, city, currency
|
||||
HAVING breakfast IS NOT NULL OR lunch IS NOT NULL OR dinner IS NOT NULL
|
||||
`;
|
||||
|
||||
return await queryAll(sourceDb, query);
|
||||
}
|
||||
|
||||
async function clearTargetDatabase(targetDb) {
|
||||
await runQuery(targetDb, "DELETE FROM travel_rates");
|
||||
console.log("Cleared existing travel_rates data");
|
||||
}
|
||||
|
||||
async function insertCityRates(targetDb, cities) {
|
||||
const insertStmt = `
|
||||
INSERT INTO travel_rates (
|
||||
city_key, city_name, country, breakfast, lunch, dinner,
|
||||
incidentals, currency, standardRate, standard_rate
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
let inserted = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const city of cities) {
|
||||
try {
|
||||
// Create city key (lowercase, spaces to dashes)
|
||||
const cityKey = `${city.city
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, "-")}-${city.country
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, "-")}`;
|
||||
|
||||
// Standard rate is typically breakfast + lunch + dinner
|
||||
const standardRate =
|
||||
(city.breakfast || 0) + (city.lunch || 0) + (city.dinner || 0);
|
||||
|
||||
await runQuery(targetDb, insertStmt, [
|
||||
cityKey,
|
||||
city.city,
|
||||
city.country,
|
||||
city.breakfast,
|
||||
city.lunch,
|
||||
city.dinner,
|
||||
city.incidentals,
|
||||
city.currency,
|
||||
standardRate,
|
||||
standardRate, // Both standardRate and standard_rate for compatibility
|
||||
]);
|
||||
|
||||
inserted++;
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`Error inserting ${city.city}, ${city.country}: ${err.message}`
|
||||
);
|
||||
skipped++;
|
||||
}
|
||||
}
|
||||
|
||||
return { inserted, skipped };
|
||||
}
|
||||
|
||||
async function migrate() {
|
||||
console.log(
|
||||
"Starting migration from scraped database to Node.js schema...\n"
|
||||
);
|
||||
|
||||
let sourceDb, targetDb;
|
||||
|
||||
try {
|
||||
// Open databases
|
||||
console.log(`Opening source database: ${SOURCE_DB}`);
|
||||
sourceDb = await openDatabase(SOURCE_DB);
|
||||
|
||||
console.log(`Opening target database: ${TARGET_DB}`);
|
||||
targetDb = await openDatabase(TARGET_DB);
|
||||
|
||||
// Aggregate city rates from scraped data
|
||||
console.log("\nAggregating city rates from scraped data...");
|
||||
const cities = await aggregateCityRates(sourceDb);
|
||||
console.log(`Found ${cities.length} cities with meal rates`);
|
||||
|
||||
// Show currency distribution
|
||||
const currencyCounts = cities.reduce((acc, city) => {
|
||||
acc[city.currency] = (acc[city.currency] || 0) + 1;
|
||||
return acc;
|
||||
}, {});
|
||||
console.log("\nCurrency distribution:");
|
||||
for (const [currency, count] of Object.entries(currencyCounts)) {
|
||||
console.log(` ${currency}: ${count} cities`);
|
||||
}
|
||||
|
||||
// Clear target database
|
||||
console.log("\nClearing target database...");
|
||||
await clearTargetDatabase(targetDb);
|
||||
|
||||
// Insert city rates
|
||||
console.log("\nInserting city rates into target database...");
|
||||
const result = await insertCityRates(targetDb, cities);
|
||||
|
||||
console.log(`\nMigration complete!`);
|
||||
console.log(` Inserted: ${result.inserted} cities`);
|
||||
console.log(` Skipped: ${result.skipped} cities`);
|
||||
|
||||
// Show sample entries
|
||||
console.log("\nSample migrated entries:");
|
||||
const samples = await queryAll(
|
||||
targetDb,
|
||||
`
|
||||
SELECT city_name, country, breakfast, lunch, dinner, incidentals, currency
|
||||
FROM travel_rates
|
||||
WHERE country IN ('Argentina', 'Albania', 'Australia')
|
||||
LIMIT 5
|
||||
`
|
||||
);
|
||||
for (const sample of samples) {
|
||||
console.log(
|
||||
` ${sample.city_name}, ${sample.country}: B:${sample.breakfast} L:${sample.lunch} D:${sample.dinner} I:${sample.incidentals} (${sample.currency})`
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("\nMigration failed:", err);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
if (sourceDb) sourceDb.close();
|
||||
if (targetDb) targetDb.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Run migration
|
||||
if (require.main === module) {
|
||||
migrate().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { migrate };
|
||||
322
scripts/migrateToDatabase.js
Normal file
322
scripts/migrateToDatabase.js
Normal file
@@ -0,0 +1,322 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
class DatabaseMigration {
|
||||
constructor() {
|
||||
this.dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
this.db = null;
|
||||
}
|
||||
|
||||
async migrate() {
|
||||
console.log('🚀 Starting database migration...\n');
|
||||
|
||||
try {
|
||||
// Ensure database directory exists
|
||||
const dbDir = path.join(__dirname, '..', 'database');
|
||||
if (!fs.existsSync(dbDir)) {
|
||||
fs.mkdirSync(dbDir, { recursive: true });
|
||||
console.log('✅ Created database directory');
|
||||
}
|
||||
|
||||
// Open database connection
|
||||
await this.openDatabase();
|
||||
|
||||
// Create tables (inline schema - no external file needed)
|
||||
await this.createTables();
|
||||
|
||||
// Import accommodation rates
|
||||
await this.importAccommodationRates();
|
||||
|
||||
// Add Canberra
|
||||
await this.addCanberra();
|
||||
|
||||
// Build search indexes
|
||||
await this.buildSearchIndexes();
|
||||
|
||||
// Display statistics
|
||||
await this.displayStats();
|
||||
|
||||
console.log('\n✅ Migration complete!');
|
||||
console.log(`📊 Database: ${this.dbPath}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Migration failed:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
openDatabase() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db = new sqlite3.Database(this.dbPath, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('✅ Database connection opened');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async createTables() {
|
||||
console.log('📋 Creating tables...');
|
||||
|
||||
// Inline schema - no external file dependency
|
||||
const schema = `
|
||||
CREATE TABLE IF NOT EXISTS accommodation_rates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city_key TEXT UNIQUE NOT NULL,
|
||||
city_name TEXT NOT NULL,
|
||||
province TEXT,
|
||||
country TEXT,
|
||||
region TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
jan_rate REAL NOT NULL,
|
||||
feb_rate REAL NOT NULL,
|
||||
mar_rate REAL NOT NULL,
|
||||
apr_rate REAL NOT NULL,
|
||||
may_rate REAL NOT NULL,
|
||||
jun_rate REAL NOT NULL,
|
||||
jul_rate REAL NOT NULL,
|
||||
aug_rate REAL NOT NULL,
|
||||
sep_rate REAL NOT NULL,
|
||||
oct_rate REAL NOT NULL,
|
||||
nov_rate REAL NOT NULL,
|
||||
dec_rate REAL NOT NULL,
|
||||
standard_rate REAL,
|
||||
is_international BOOLEAN DEFAULT 0,
|
||||
effective_date DATE DEFAULT '2025-01-01',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_city ON accommodation_rates(city_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_country ON accommodation_rates(country);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_region ON accommodation_rates(region);
|
||||
CREATE INDEX IF NOT EXISTS idx_accommodation_key ON accommodation_rates(city_key);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS accommodation_search USING fts5(
|
||||
city_key,
|
||||
city_name,
|
||||
province,
|
||||
country,
|
||||
region,
|
||||
content='accommodation_rates'
|
||||
);
|
||||
`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.exec(schema, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('✅ Tables created');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async importAccommodationRates() {
|
||||
console.log('📥 Importing accommodation rates...');
|
||||
|
||||
const jsonPath = path.join(__dirname, '..', 'data', 'accommodationRates.json');
|
||||
|
||||
console.log(` 📂 Looking for JSON at: ${jsonPath}`);
|
||||
|
||||
if (!fs.existsSync(jsonPath)) {
|
||||
console.error('❌ accommodationRates.json not found!');
|
||||
throw new Error('Missing accommodationRates.json file');
|
||||
}
|
||||
|
||||
console.log(' ✅ JSON file found, reading...');
|
||||
const rawData = fs.readFileSync(jsonPath, 'utf8');
|
||||
console.log(` 📄 File size: ${rawData.length} bytes`);
|
||||
|
||||
const data = JSON.parse(rawData);
|
||||
console.log(` ✅ JSON parsed successfully`);
|
||||
console.log(` 📊 Data keys: ${Object.keys(data).join(', ')}`);
|
||||
|
||||
let imported = 0;
|
||||
|
||||
// Import Canadian cities
|
||||
if (data.cities) {
|
||||
const cityCount = Object.keys(data.cities).length;
|
||||
console.log(` - Importing ${cityCount} Canadian cities...`);
|
||||
|
||||
for (const [key, city] of Object.entries(data.cities)) {
|
||||
try {
|
||||
await this.insertAccommodationRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: city.province,
|
||||
country: 'Canada',
|
||||
region: city.region,
|
||||
currency: city.currency,
|
||||
rates: city.monthlyRates,
|
||||
is_international: 0
|
||||
});
|
||||
imported++;
|
||||
if (imported % 50 === 0) {
|
||||
console.log(` ... ${imported} cities imported so far`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${imported} Canadian cities`);
|
||||
} else {
|
||||
console.log(' ⚠️ No "cities" key found in JSON');
|
||||
}
|
||||
|
||||
// Import international cities
|
||||
if (data.internationalCities) {
|
||||
const intlCityCount = Object.keys(data.internationalCities).length;
|
||||
console.log(` - Importing ${intlCityCount} international cities...`);
|
||||
let intlCount = 0;
|
||||
for (const [key, city] of Object.entries(data.internationalCities)) {
|
||||
try {
|
||||
const rates = city.monthlyRates || Array(12).fill(city.standardRate);
|
||||
|
||||
await this.insertAccommodationRate({
|
||||
city_key: key,
|
||||
city_name: city.name,
|
||||
province: null,
|
||||
country: city.country,
|
||||
region: city.region,
|
||||
currency: city.currency,
|
||||
rates: rates,
|
||||
standard_rate: city.standardRate || rates[0],
|
||||
is_international: 1
|
||||
});
|
||||
intlCount++;
|
||||
if (intlCount % 20 === 0) {
|
||||
console.log(` ... ${intlCount} international cities imported so far`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ⚠️ Failed to import ${city.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
console.log(` ✅ Imported ${intlCount} international cities`);
|
||||
imported += intlCount;
|
||||
} else {
|
||||
console.log(' ⚠️ No "internationalCities" key found in JSON');
|
||||
}
|
||||
|
||||
console.log(`✅ Total imported: ${imported} cities`);
|
||||
}
|
||||
|
||||
async addCanberra() {
|
||||
console.log('🇦🇺 Adding Canberra...');
|
||||
|
||||
try {
|
||||
await this.insertAccommodationRate({
|
||||
city_key: 'canberra',
|
||||
city_name: 'Canberra',
|
||||
province: null,
|
||||
country: 'Australia',
|
||||
region: 'Oceania',
|
||||
currency: 'USD',
|
||||
rates: [184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184],
|
||||
standard_rate: 184,
|
||||
is_international: 1
|
||||
});
|
||||
console.log('✅ Canberra added: $184 USD/night');
|
||||
} catch (err) {
|
||||
if (err.message.includes('UNIQUE')) {
|
||||
console.log('ℹ️ Canberra already exists');
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async insertAccommodationRate(city) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const sql = `
|
||||
INSERT OR REPLACE INTO accommodation_rates (
|
||||
city_key, city_name, province, country, region, currency,
|
||||
jan_rate, feb_rate, mar_rate, apr_rate, may_rate, jun_rate,
|
||||
jul_rate, aug_rate, sep_rate, oct_rate, nov_rate, dec_rate,
|
||||
standard_rate, is_international
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
this.db.run(sql, [
|
||||
city.city_key,
|
||||
city.city_name,
|
||||
city.province,
|
||||
city.country,
|
||||
city.region,
|
||||
city.currency,
|
||||
...city.rates,
|
||||
city.standard_rate || city.rates[0],
|
||||
city.is_international
|
||||
], (err) => {
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async buildSearchIndexes() {
|
||||
console.log('🔍 Building search indexes...');
|
||||
console.log(' ℹ️ Skipping FTS5 index population (can be done later if needed)');
|
||||
console.log(' ✅ Standard indexes already created with tables');
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
async displayStats() {
|
||||
console.log('\n📊 Database Statistics:');
|
||||
|
||||
const total = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates');
|
||||
console.log(` Total cities: ${total}`);
|
||||
|
||||
const canadian = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 0');
|
||||
console.log(` Canadian: ${canadian}`);
|
||||
|
||||
const international = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE is_international = 1');
|
||||
console.log(` International: ${international}`);
|
||||
|
||||
const canberra = await this.getCount('SELECT COUNT(*) as count FROM accommodation_rates WHERE city_key = "canberra"');
|
||||
console.log(` Canberra found: ${canberra > 0 ? '✅ YES' : '❌ NO'}`);
|
||||
|
||||
if (canberra > 0) {
|
||||
const rate = await this.getCanberraRate();
|
||||
console.log(` Canberra rate: $${rate} USD/night`);
|
||||
}
|
||||
}
|
||||
|
||||
getCount(sql) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(sql, [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row.count);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
getCanberraRate() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get('SELECT jan_rate FROM accommodation_rates WHERE city_key = "canberra"', [], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row ? row.jan_rate : null);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Run migration
|
||||
if (require.main === module) {
|
||||
const migration = new DatabaseMigration();
|
||||
migration.migrate().catch(err => {
|
||||
console.error('Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = DatabaseMigration;
|
||||
44
scripts/query_scraped_db.py
Normal file
44
scripts/query_scraped_db.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
|
||||
print('\n=== SCRAPED DATABASE ANALYSIS ===\n')
|
||||
|
||||
# Tables
|
||||
print('📊 Tables:')
|
||||
tables = [row[0] for row in conn.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall()]
|
||||
for table in tables:
|
||||
count = conn.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
|
||||
print(f' - {table}: {count} rows')
|
||||
|
||||
# Unique currencies
|
||||
print('\n💰 Unique Currencies in rate_entries:')
|
||||
currencies = [row[0] for row in conn.execute('SELECT DISTINCT currency FROM rate_entries WHERE currency IS NOT NULL ORDER BY currency').fetchall()]
|
||||
print(f' {", ".join(currencies)}')
|
||||
|
||||
# Argentina data
|
||||
print('\n🇦🇷 Argentina entries:')
|
||||
for row in conn.execute('SELECT country, city, currency, rate_type, rate_amount FROM rate_entries WHERE country="Argentina" LIMIT 10').fetchall():
|
||||
print(f' {row[0]} - {row[1]} - Currency: {row[2]} - {row[3]}: ${row[4]:.2f}')
|
||||
|
||||
# Sample rate entries by country
|
||||
print('\n🌍 Sample entries by country (first 3 countries):')
|
||||
for row in conn.execute('SELECT DISTINCT country FROM rate_entries WHERE country IS NOT NULL LIMIT 3').fetchall():
|
||||
country = row[0]
|
||||
print(f'\n {country}:')
|
||||
for entry in conn.execute('SELECT city, currency, rate_type, rate_amount FROM rate_entries WHERE country=? LIMIT 3', (country,)).fetchall():
|
||||
print(f' {entry[0]} - {entry[1]} - {entry[2]}: ${entry[3]:.2f}')
|
||||
|
||||
# Exchange rates
|
||||
print('\n💱 Exchange rates:')
|
||||
for row in conn.execute('SELECT currency, rate_to_cad, effective_date FROM exchange_rates WHERE currency IS NOT NULL LIMIT 10').fetchall():
|
||||
print(f' {row[0]}: {row[1]:.4f} CAD (effective: {row[2]})')
|
||||
|
||||
# Accommodations
|
||||
print('\n🏨 Accommodation entries (sample):')
|
||||
for row in conn.execute('SELECT property_name, city, province, rate_amount, currency FROM accommodations WHERE rate_amount IS NOT NULL LIMIT 10').fetchall():
|
||||
print(f' {row[0]} - {row[1]}, {row[2]} - ${row[3]:.2f} {row[4]}')
|
||||
|
||||
conn.close()
|
||||
print('\n✅ Done!')
|
||||
59
scripts/testCanberraAPI.js
Normal file
59
scripts/testCanberraAPI.js
Normal file
@@ -0,0 +1,59 @@
|
||||
const http = require('http');
|
||||
|
||||
async function testAPI() {
|
||||
console.log('\n🧪 Testing Canberra Search API...\n');
|
||||
|
||||
const options = {
|
||||
hostname: 'localhost',
|
||||
port: 5001,
|
||||
path: '/api/accommodation/search?city=canberra',
|
||||
method: 'GET'
|
||||
};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = http.request(options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
try {
|
||||
const json = JSON.parse(data);
|
||||
|
||||
if (json.city) {
|
||||
console.log('✅ SUCCESS! Canberra Found:\n');
|
||||
console.log(` City: ${json.city}`);
|
||||
console.log(` Country: ${json.country}`);
|
||||
console.log(` Region: ${json.region}`);
|
||||
console.log(` Currency: ${json.currency}`);
|
||||
console.log(` Standard Rate: $${json.rates.standard || json.rates[0]} ${json.currency}`);
|
||||
console.log(` January Rate: $${json.rates[0]} ${json.currency}`);
|
||||
console.log('\n🎉 CANBERRA IS 100% SEARCHABLE!\n');
|
||||
} else if (json.error) {
|
||||
console.log(`❌ API Error: ${json.error}`);
|
||||
} else {
|
||||
console.log('❓ Unexpected response:', json);
|
||||
}
|
||||
|
||||
resolve();
|
||||
} catch (err) {
|
||||
console.error('❌ Failed to parse response:', err.message);
|
||||
console.log('Raw response:', data);
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
req.on('error', (err) => {
|
||||
console.error(`❌ Connection failed: ${err.message}`);
|
||||
console.error('Make sure the server is running: node server.js');
|
||||
reject(err);
|
||||
});
|
||||
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
testAPI();
|
||||
118
scripts/testCompleteTravelRates.js
Normal file
118
scripts/testCompleteTravelRates.js
Normal file
@@ -0,0 +1,118 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
|
||||
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
|
||||
const db = new sqlite3.Database(dbPath, (err) => {
|
||||
if (err) {
|
||||
console.error('❌ Database connection failed:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('\n🧪 Testing Complete Travel Rates Database\n');
|
||||
console.log('='.repeat(70));
|
||||
|
||||
// Test 1: Check Canberra complete data
|
||||
console.log('\n1️⃣ Testing Canberra (Australia):\n');
|
||||
|
||||
const canberraQuery = `
|
||||
SELECT
|
||||
city_name, country, region, currency,
|
||||
standard_accommodation,
|
||||
breakfast, lunch, dinner, total_meals,
|
||||
incidentals, total_daily_allowance
|
||||
FROM travel_rates
|
||||
WHERE city_key = 'canberra'
|
||||
`;
|
||||
|
||||
db.get(canberraQuery, [], (err, row) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!row) {
|
||||
console.log('❌ CANBERRA NOT FOUND!\n');
|
||||
} else {
|
||||
console.log(`✅ ${row.city_name}, ${row.country}`);
|
||||
console.log(` Region: ${row.region}`);
|
||||
console.log(` Currency: ${row.currency}\n`);
|
||||
console.log(` 🏨 Accommodation: $${row.standard_accommodation}/night`);
|
||||
console.log(` 🍳 Breakfast: $${row.breakfast}`);
|
||||
console.log(` 🍱 Lunch: $${row.lunch}`);
|
||||
console.log(` 🍽️ Dinner: $${row.dinner}`);
|
||||
console.log(` 📝 Total Meals: $${row.total_meals}`);
|
||||
console.log(` 💼 Incidentals: $${row.incidentals}`);
|
||||
console.log(` 💰 Total Daily Allowance: $${row.total_daily_allowance}\n`);
|
||||
|
||||
const fullDayTotal = parseFloat(row.standard_accommodation) + parseFloat(row.total_daily_allowance);
|
||||
console.log(` 🎯 FULL DAY COST (Accommodation + Per Diem): $${fullDayTotal.toFixed(2)} ${row.currency}\n`);
|
||||
}
|
||||
|
||||
// Test 2: Sample Canadian city
|
||||
console.log('2️⃣ Testing Toronto (Canada):\n');
|
||||
|
||||
const torontoQuery = `
|
||||
SELECT
|
||||
city_name, country, province, currency,
|
||||
jan_accommodation, feb_accommodation, mar_accommodation,
|
||||
breakfast, lunch, dinner, total_meals,
|
||||
incidentals, total_daily_allowance
|
||||
FROM travel_rates
|
||||
WHERE city_key = 'toronto'
|
||||
`;
|
||||
|
||||
db.get(torontoQuery, [], (err, row) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!row) {
|
||||
console.log('❌ Toronto not found\n');
|
||||
} else {
|
||||
console.log(`✅ ${row.city_name}, ${row.province}`);
|
||||
console.log(` Currency: ${row.currency}\n`);
|
||||
console.log(` 🏨 Accommodation (Jan): $${row.jan_accommodation}/night`);
|
||||
console.log(` 🏨 Accommodation (Feb): $${row.feb_accommodation}/night`);
|
||||
console.log(` 🏨 Accommodation (Mar): $${row.mar_accommodation}/night`);
|
||||
console.log(` 🍳 Breakfast: $${row.breakfast}`);
|
||||
console.log(` 🍱 Lunch: $${row.lunch}`);
|
||||
console.log(` 🍽️ Dinner: $${row.dinner}`);
|
||||
console.log(` 💰 Total Daily Allowance: $${row.total_daily_allowance}\n`);
|
||||
}
|
||||
|
||||
// Test 3: Count verification
|
||||
console.log('3️⃣ Database Statistics:\n');
|
||||
|
||||
const statsQuery = `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN is_international = 0 THEN 1 END) as canadian,
|
||||
COUNT(CASE WHEN is_international = 1 THEN 1 END) as international,
|
||||
COUNT(DISTINCT country) as countries
|
||||
FROM travel_rates
|
||||
`;
|
||||
|
||||
db.get(statsQuery, [], (err, stats) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
db.close();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(` 📊 Total Cities: ${stats.total}`);
|
||||
console.log(` 🇨🇦 Canadian: ${stats.canadian}`);
|
||||
console.log(` 🌍 International: ${stats.international}`);
|
||||
console.log(` 🗺️ Countries: ${stats.countries}\n`);
|
||||
|
||||
console.log('='.repeat(70));
|
||||
console.log('\n✅ All tests passed! Database has complete accommodation + meal rates\n');
|
||||
|
||||
db.close();
|
||||
});
|
||||
});
|
||||
});
|
||||
65
scripts/testDatabase.js
Normal file
65
scripts/testDatabase.js
Normal file
@@ -0,0 +1,65 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
|
||||
const dbPath = path.join(__dirname, '..', 'database', 'travel_rates.db');
|
||||
|
||||
console.log('🔍 Testing Database...\n');
|
||||
console.log(`📁 Database path: ${dbPath}\n`);
|
||||
|
||||
const db = new sqlite3.Database(dbPath, (err) => {
|
||||
if (err) {
|
||||
console.error('❌ Failed to open database:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
// Test 1: Check if Canberra exists
|
||||
db.get('SELECT * FROM accommodation_rates WHERE city_key = ?', ['canberra'], (err, row) => {
|
||||
if (err) {
|
||||
console.error('❌ Query failed:', err);
|
||||
} else if (row) {
|
||||
console.log('✅ CANBERRA FOUND!');
|
||||
console.log(' City:', row.city_name);
|
||||
console.log(' Country:', row.country);
|
||||
console.log(' Region:', row.region);
|
||||
console.log(' Jan Rate:', `$${row.jan_rate} ${row.currency}`);
|
||||
console.log(' Standard Rate:', `$${row.standard_rate} ${row.currency}`);
|
||||
console.log(' International:', row.is_international ? 'Yes' : 'No');
|
||||
} else {
|
||||
console.log('❌ CANBERRA NOT FOUND IN DATABASE!');
|
||||
}
|
||||
});
|
||||
|
||||
// Test 2: Count total cities
|
||||
db.get('SELECT COUNT(*) as count FROM accommodation_rates', [], (err, row) => {
|
||||
if (err) {
|
||||
console.error('❌ Count query failed:', err);
|
||||
} else {
|
||||
console.log(`\n📊 Total cities in database: ${row.count}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Test 3: List all Australian cities
|
||||
db.all('SELECT city_key, city_name, standard_rate FROM accommodation_rates WHERE country = ?', ['Australia'], (err, rows) => {
|
||||
if (err) {
|
||||
console.error('❌ Australia query failed:', err);
|
||||
} else {
|
||||
console.log('\n🇦🇺 Australian cities:');
|
||||
if (rows.length === 0) {
|
||||
console.log(' ❌ No Australian cities found!');
|
||||
} else {
|
||||
rows.forEach(row => {
|
||||
console.log(` - ${row.city_name}: $${row.standard_rate} USD`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Close database
|
||||
db.close((err) => {
|
||||
if (err) {
|
||||
console.error('Error closing database:', err);
|
||||
} else {
|
||||
console.log('\n✅ Test complete!');
|
||||
}
|
||||
});
|
||||
});
|
||||
25
scripts/test_currency_extraction.py
Normal file
25
scripts/test_currency_extraction.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import re
|
||||
|
||||
def _extract_currency_from_title(title):
|
||||
"""Extract currency code from table title like 'Albania - Currency: Euro (EUR)'"""
|
||||
if not title:
|
||||
return None
|
||||
# Pattern: "Currency: [Name] ([CODE])"
|
||||
match = re.search(r"Currency:\s*[^(]+\(([A-Z]{3})\)", title)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
# Test cases
|
||||
test_titles = [
|
||||
"Argentina - Currency: Argentine Peso (ARS)",
|
||||
"Albania - Currency: Euro (EUR)",
|
||||
"Afghanistan - Currency: US Dollar (USD)",
|
||||
"Canada - Currency: Canadian Dollar (CAD)",
|
||||
None,
|
||||
"Some random text"
|
||||
]
|
||||
|
||||
for title in test_titles:
|
||||
result = _extract_currency_from_title(title)
|
||||
print(f"{title!r} -> {result}")
|
||||
61
scripts/test_extraction_debug.py
Normal file
61
scripts/test_extraction_debug.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Test currency extraction step by step"""
|
||||
import sqlite3
|
||||
import re
|
||||
|
||||
def _extract_currency_from_title(title):
|
||||
"""Extract currency code from table title like 'Albania - Currency: Euro (EUR)'"""
|
||||
if not title:
|
||||
return None
|
||||
# Pattern: "Currency: [Name] ([CODE])"
|
||||
match = re.search(r"Currency:\s*[^(]+\(([A-Z]{3})\)", title)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
conn = sqlite3.connect('data/travel_rates_scraped.sqlite3')
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Testing currency extraction from stored titles:\n")
|
||||
|
||||
# Get Argentina table title
|
||||
cursor.execute("""
|
||||
SELECT title
|
||||
FROM raw_tables
|
||||
WHERE title LIKE '%Argentina%'
|
||||
""")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
title = row[0]
|
||||
print(f"Argentina Title: {title}")
|
||||
currency = _extract_currency_from_title(title)
|
||||
print(f"Extracted Currency: {currency}")
|
||||
|
||||
# Get Albania table title
|
||||
cursor.execute("""
|
||||
SELECT title
|
||||
FROM raw_tables
|
||||
WHERE title LIKE '%Albania%'
|
||||
""")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
title = row[0]
|
||||
print(f"\nAlbania Title: {title}")
|
||||
currency = _extract_currency_from_title(title)
|
||||
print(f"Extracted Currency: {currency}")
|
||||
|
||||
# Check what entries we actually have
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM rate_entries
|
||||
WHERE currency IS NOT NULL
|
||||
""")
|
||||
print(f"\nTotal entries with currency: {cursor.fetchone()[0]}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM rate_entries
|
||||
WHERE currency IS NULL
|
||||
""")
|
||||
print(f"Total entries WITHOUT currency: {cursor.fetchone()[0]}")
|
||||
|
||||
conn.close()
|
||||
33
scripts/test_scraper.py
Normal file
33
scripts/test_scraper.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Test the scraper extract_rate_entries function with debug output"""
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from gov_travel.scrapers import SourceConfig, scrape_tables_from_source, extract_rate_entries
|
||||
|
||||
# Create a test source config
|
||||
source = SourceConfig(name="international", url="https://www.njc-cnm.gc.ca/directive/app_d.php?lang=en")
|
||||
|
||||
# Get just the first few tables
|
||||
print("Fetching tables...")
|
||||
tables = scrape_tables_from_source(source)
|
||||
print(f"Got {len(tables)} tables")
|
||||
|
||||
# Check first table
|
||||
first_table = tables[0]
|
||||
print(f"\nFirst table:")
|
||||
print(f" Index: {first_table['table_index']}")
|
||||
print(f" Title: {first_table['title']}")
|
||||
print(f" Data rows: {len(first_table['data'])}")
|
||||
|
||||
# Extract rate entries from just first table
|
||||
print("\nExtracting rate entries from first table...")
|
||||
entries = extract_rate_entries(source, [first_table])
|
||||
print(f"Got {len(entries)} entries")
|
||||
|
||||
if entries:
|
||||
print(f"\nFirst entry:")
|
||||
print(f" Country: {entries[0]['country']}")
|
||||
print(f" City: {entries[0]['city']}")
|
||||
print(f" Currency: {entries[0]['currency']}")
|
||||
print(f" Rate Type: {entries[0]['rate_type']}")
|
||||
print(f" Rate Amount: {entries[0]['rate_amount']}")
|
||||
58
scripts/verify_currencies.py
Normal file
58
scripts/verify_currencies.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import sqlite3
|
||||
import sys
|
||||
|
||||
db_path = "data/travel_rates_scraped.sqlite3"
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Count total entries
|
||||
cursor.execute("SELECT COUNT(*) FROM rate_entries")
|
||||
total = cursor.fetchone()[0]
|
||||
print(f"Total rate entries: {total}")
|
||||
|
||||
# Count entries with currency
|
||||
cursor.execute("SELECT COUNT(*) FROM rate_entries WHERE currency IS NOT NULL")
|
||||
with_currency = cursor.fetchone()[0]
|
||||
print(f"Entries with currency: {with_currency}")
|
||||
print(f"Missing currency: {total - with_currency}")
|
||||
|
||||
# Currency distribution
|
||||
print("\nCurrency Distribution:")
|
||||
cursor.execute("""
|
||||
SELECT currency, COUNT(*) as count
|
||||
FROM rate_entries
|
||||
GROUP BY currency
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
currency = row[0] if row[0] else "NULL"
|
||||
print(f" {currency}: {row[1]}")
|
||||
|
||||
# Sample entries with currency
|
||||
print("\nSample Entries (Argentina ARS):")
|
||||
cursor.execute("""
|
||||
SELECT country, city, rate_type, rate_amount, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Argentina%'
|
||||
LIMIT 5
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]} | {row[1]} | {row[2]}: ${row[3]} {row[4]}")
|
||||
|
||||
print("\nSample Entries (Albania EUR):")
|
||||
cursor.execute("""
|
||||
SELECT country, city, rate_type, rate_amount, currency
|
||||
FROM rate_entries
|
||||
WHERE country LIKE '%Albania%'
|
||||
LIMIT 5
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]} | {row[1]} | {row[2]}: ${row[3]} {row[4]}")
|
||||
|
||||
conn.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user