mirror of
https://github.com/mblanke/Gov_Travel_App.git
synced 2026-03-01 06:00:21 -05:00
Add Excel estimate export workflow to CLI
This commit is contained in:
29
README.md
29
README.md
@@ -19,8 +19,31 @@ python -m gov_travel.main --db data/travel_rates.sqlite3
|
||||
- `--sources international domestic accommodations` to limit which sources are scraped.
|
||||
- `--pause 1.5` to pause between processing tables.
|
||||
- `--log-level DEBUG` to increase logging verbosity.
|
||||
- `--no-scrape` to skip scraping and only work with existing database data.
|
||||
- `GOV_TRAVEL_USER_AGENT="YourOrg/1.0"` to override the default user agent.
|
||||
|
||||
## Export an estimate to Excel
|
||||
After data exists in SQLite (from a previous scrape), export a cost estimate workbook:
|
||||
|
||||
```bash
|
||||
python -m gov_travel.main \
|
||||
--db data/travel_rates.sqlite3 \
|
||||
--no-scrape \
|
||||
--export-estimate-xlsx output/travel_estimate.xlsx \
|
||||
--estimate-days 5 \
|
||||
--estimate-rate-type meal \
|
||||
--estimate-country Canada \
|
||||
--estimate-city Ottawa \
|
||||
--estimate-lodging-per-night 235 \
|
||||
--estimate-transport-total 175 \
|
||||
--estimate-misc-total 80
|
||||
```
|
||||
|
||||
Workbook sheets:
|
||||
- `estimate_summary`: Days, recommended meal allowance, line item subtotals, and grand total.
|
||||
- `matched_rate_entries`: Source rows used to derive the allowance recommendation.
|
||||
|
||||
## Database contents
|
||||
The database includes:
|
||||
- `raw_tables` for every scraped HTML table.
|
||||
- `rate_entries` for parsed rate rows (country/city/province + rate fields).
|
||||
@@ -28,3 +51,9 @@ The database includes:
|
||||
- `accommodations` for parsed lodging listings.
|
||||
|
||||
If a field is not detected by the heuristics, the full row is still preserved in `raw_tables` and the `raw_json` columns for deeper post-processing.
|
||||
|
||||
## Suggested next improvements
|
||||
- Add automated tests for parser heuristics and the estimate export path.
|
||||
- Add currency conversion in estimate exports using `exchange_rates` so totals can be normalized to CAD.
|
||||
- Add source-level freshness metadata to avoid duplicate inserts when scraping repeatedly.
|
||||
- Expose estimate/export in a small web UI for non-technical users.
|
||||
|
||||
@@ -13,6 +13,7 @@ dependencies = [
|
||||
"lxml==5.3.0",
|
||||
"pandas==2.2.3",
|
||||
"requests==2.32.3",
|
||||
"openpyxl==3.1.5",
|
||||
]
|
||||
|
||||
[tool.setuptools]
|
||||
|
||||
@@ -2,3 +2,4 @@ beautifulsoup4==4.12.3
|
||||
lxml==5.3.0
|
||||
pandas==2.2.3
|
||||
requests==2.32.3
|
||||
openpyxl==3.1.5
|
||||
|
||||
101
src/gov_travel/estimate.py
Normal file
101
src/gov_travel/estimate.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EstimateConfig:
|
||||
days: int
|
||||
lodging_per_night: float
|
||||
transport_total: float
|
||||
misc_total: float
|
||||
rate_type: str
|
||||
country: str | None = None
|
||||
city: str | None = None
|
||||
province: str | None = None
|
||||
|
||||
|
||||
def _load_rate_matches(connection: sqlite3.Connection, config: EstimateConfig) -> list[dict[str, Any]]:
|
||||
query = """
|
||||
SELECT
|
||||
source,
|
||||
source_url,
|
||||
table_title,
|
||||
country,
|
||||
city,
|
||||
province,
|
||||
currency,
|
||||
rate_type,
|
||||
rate_amount,
|
||||
effective_date
|
||||
FROM rate_entries
|
||||
WHERE rate_amount IS NOT NULL
|
||||
AND LOWER(rate_type) LIKE LOWER(?)
|
||||
"""
|
||||
params: list[Any] = [f"%{config.rate_type}%"]
|
||||
if config.country:
|
||||
query += " AND LOWER(country) = LOWER(?)"
|
||||
params.append(config.country)
|
||||
if config.city:
|
||||
query += " AND LOWER(city) = LOWER(?)"
|
||||
params.append(config.city)
|
||||
if config.province:
|
||||
query += " AND LOWER(province) = LOWER(?)"
|
||||
params.append(config.province)
|
||||
|
||||
query += " ORDER BY effective_date DESC, rate_amount DESC"
|
||||
rows = connection.execute(query, params).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def _pick_recommended_rate(matches: list[dict[str, Any]]) -> tuple[float, str]:
|
||||
if not matches:
|
||||
return 0.0, "CAD"
|
||||
|
||||
currency = matches[0].get("currency") or "CAD"
|
||||
latest_date = matches[0].get("effective_date")
|
||||
latest_rows = [
|
||||
row for row in matches if row.get("effective_date") == latest_date and row.get("currency") == currency
|
||||
]
|
||||
candidates = latest_rows or matches[:5]
|
||||
average_rate = sum(float(row["rate_amount"]) for row in candidates if row.get("rate_amount") is not None) / max(
|
||||
len(candidates),
|
||||
1,
|
||||
)
|
||||
return round(average_rate, 2), currency
|
||||
|
||||
|
||||
def export_estimate_xlsx(
|
||||
connection: sqlite3.Connection,
|
||||
output_path: Path,
|
||||
config: EstimateConfig,
|
||||
) -> Path:
|
||||
matches = _load_rate_matches(connection, config)
|
||||
recommended_rate, currency = _pick_recommended_rate(matches)
|
||||
|
||||
meals_total = round(recommended_rate * config.days, 2)
|
||||
lodging_total = round(config.lodging_per_night * config.days, 2)
|
||||
grand_total = round(meals_total + lodging_total + config.transport_total + config.misc_total, 2)
|
||||
|
||||
summary_rows = [
|
||||
{"item": "Days", "value": config.days, "currency": ""},
|
||||
{"item": f"Meal allowance ({config.rate_type})", "value": recommended_rate, "currency": currency},
|
||||
{"item": "Meals subtotal", "value": meals_total, "currency": currency},
|
||||
{"item": "Lodging per night", "value": config.lodging_per_night, "currency": currency},
|
||||
{"item": "Lodging subtotal", "value": lodging_total, "currency": currency},
|
||||
{"item": "Transport total", "value": config.transport_total, "currency": currency},
|
||||
{"item": "Misc total", "value": config.misc_total, "currency": currency},
|
||||
{"item": "Grand total", "value": grand_total, "currency": currency},
|
||||
]
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
pd.DataFrame(summary_rows).to_excel(writer, index=False, sheet_name="estimate_summary")
|
||||
pd.DataFrame(matches).to_excel(writer, index=False, sheet_name="matched_rate_entries")
|
||||
|
||||
return output_path
|
||||
@@ -5,6 +5,7 @@ import logging
|
||||
from pathlib import Path
|
||||
|
||||
from gov_travel import db
|
||||
from gov_travel.estimate import EstimateConfig, export_estimate_xlsx
|
||||
from gov_travel.scrapers import (
|
||||
SOURCES,
|
||||
build_session,
|
||||
@@ -42,6 +43,57 @@ def parse_args() -> argparse.Namespace:
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||
help="Logging level",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-scrape",
|
||||
action="store_true",
|
||||
help="Skip scraping and only use data already in the database",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-estimate-xlsx",
|
||||
type=Path,
|
||||
help="Optional output path to export a travel estimate workbook",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-days",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of travel days for the estimate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-rate-type",
|
||||
default="meal",
|
||||
help="Rate type filter used to find allowance rates (e.g., meal, breakfast, dinner)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-country",
|
||||
help="Optional country filter for estimate rate lookup",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-city",
|
||||
help="Optional city filter for estimate rate lookup",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-province",
|
||||
help="Optional province filter for estimate rate lookup",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-lodging-per-night",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Manual lodging cost per night",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-transport-total",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Manual transport total",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimate-misc-total",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Manual misc total",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -52,25 +104,40 @@ def main() -> None:
|
||||
connection = db.connect(args.db)
|
||||
db.init_db(connection)
|
||||
|
||||
session = build_session()
|
||||
selected = {name for name in args.sources}
|
||||
for source in SOURCES:
|
||||
if source.name not in selected:
|
||||
continue
|
||||
logger.info("Scraping %s (%s)", source.name, source.url)
|
||||
tables = scrape_tables_from_source(source, session=session, pause_seconds=args.pause)
|
||||
logger.info("Found %s tables for %s", len(tables), source.name)
|
||||
db.insert_raw_tables(connection, source.name, source.url, tables)
|
||||
if not args.no_scrape:
|
||||
session = build_session()
|
||||
selected = {name for name in args.sources}
|
||||
for source in SOURCES:
|
||||
if source.name not in selected:
|
||||
continue
|
||||
logger.info("Scraping %s (%s)", source.name, source.url)
|
||||
tables = scrape_tables_from_source(source, session=session, pause_seconds=args.pause)
|
||||
logger.info("Found %s tables for %s", len(tables), source.name)
|
||||
db.insert_raw_tables(connection, source.name, source.url, tables)
|
||||
|
||||
rate_entries = extract_rate_entries(source, tables)
|
||||
db.insert_rate_entries(connection, rate_entries)
|
||||
rate_entries = extract_rate_entries(source, tables)
|
||||
db.insert_rate_entries(connection, rate_entries)
|
||||
|
||||
exchange_rates = extract_exchange_rates(source, tables)
|
||||
db.insert_exchange_rates(connection, exchange_rates)
|
||||
exchange_rates = extract_exchange_rates(source, tables)
|
||||
db.insert_exchange_rates(connection, exchange_rates)
|
||||
|
||||
if source.name == "accommodations":
|
||||
accommodations = extract_accommodations(source, tables)
|
||||
db.insert_accommodations(connection, accommodations)
|
||||
if source.name == "accommodations":
|
||||
accommodations = extract_accommodations(source, tables)
|
||||
db.insert_accommodations(connection, accommodations)
|
||||
|
||||
if args.export_estimate_xlsx:
|
||||
estimate_config = EstimateConfig(
|
||||
days=args.estimate_days,
|
||||
lodging_per_night=args.estimate_lodging_per_night,
|
||||
transport_total=args.estimate_transport_total,
|
||||
misc_total=args.estimate_misc_total,
|
||||
rate_type=args.estimate_rate_type,
|
||||
country=args.estimate_country,
|
||||
city=args.estimate_city,
|
||||
province=args.estimate_province,
|
||||
)
|
||||
output_path = export_estimate_xlsx(connection, args.export_estimate_xlsx, estimate_config)
|
||||
logger.info("Estimate workbook exported to %s", output_path)
|
||||
|
||||
connection.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user