-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhts_core.py
More file actions
145 lines (114 loc) · 4.48 KB
/
hts_core.py
File metadata and controls
145 lines (114 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3
"""Core HTS lookup library — shared by CLI, MCP server, and any future interface."""
import os
import sqlite3
from pathlib import Path
from typing import Optional
DEFAULT_DB_PATH = "data/hts.db"
def get_db(db_path: Optional[str] = None) -> sqlite3.Connection:
"""Get a database connection.
Uses db_path if provided, otherwise checks HTS_DB_PATH env var,
otherwise falls back to data/hts.db.
Raises FileNotFoundError if the database file does not exist.
"""
path = Path(db_path or os.getenv("HTS_DB_PATH", DEFAULT_DB_PATH))
if not path.exists():
raise FileNotFoundError(f"{path} not found. Run ingest first.")
return sqlite3.connect(str(path))
# Column list for the 9-column SELECT used by CLI
CLI_COLUMNS = [
"id", "hts_code", "indent", "description", "unit",
"general_rate", "special_rate", "column2_rate", "chapter_id",
]
# Column list for the 6-column SELECT used by MCP
MCP_COLUMNS = [
"hts_code", "description", "unit",
"general_rate", "special_rate", "column2_rate",
]
CLI_SELECT = f"SELECT {', '.join(CLI_COLUMNS)} FROM hts_entries"
MCP_SELECT = f"SELECT {', '.join(MCP_COLUMNS)} FROM hts_entries"
def _row_to_dict(row: tuple, columns: list) -> dict:
"""Convert a database row to a dictionary using the given column names."""
return dict(zip(columns, row))
def row_to_cli_dict(row: tuple) -> dict:
"""Convert a 9-column CLI row to a dictionary."""
return _row_to_dict(row, CLI_COLUMNS)
def row_to_mcp_dict(row: tuple) -> dict:
"""Convert a 6-column MCP row to a dictionary, replacing None with empty string."""
d = _row_to_dict(row, MCP_COLUMNS)
return {k: (v if v is not None else "") for k, v in d.items()}
def search_entries(db: sqlite3.Connection, keyword: str, limit: int = 10, columns: str = None) -> list:
"""Search HTS entries by keyword in description."""
select = columns or CLI_SELECT
cursor = db.cursor()
cursor.execute(
f"{select} WHERE description LIKE ? LIMIT ?",
(f"%{keyword}%", limit),
)
return cursor.fetchall()
def get_entry(db: sqlite3.Connection, hts_code: str, columns: str = None) -> Optional[tuple]:
"""Get a single HTS entry by exact code."""
select = columns or CLI_SELECT
cursor = db.cursor()
cursor.execute(f"{select} WHERE hts_code = ? LIMIT 1", (hts_code,))
return cursor.fetchone()
def list_chapter_entries(db: sqlite3.Connection, chapter: str, columns: str = None) -> list:
"""List all entries in a chapter."""
select = columns or CLI_SELECT
chapter_padded = chapter.zfill(2)
cursor = db.cursor()
cursor.execute(
f"{select} WHERE hts_code LIKE ? ORDER BY hts_code",
(f"{chapter_padded}%",),
)
return cursor.fetchall()
def get_all_chapters(db: sqlite3.Connection) -> list:
"""Get all chapters with descriptions, entry counts, and freshness timestamps."""
cursor = db.cursor()
cursor.execute(
"""SELECT c.number, c.description, COUNT(h.id) as entry_count,
c.last_checked_at, c.last_changed_at
FROM chapters c
LEFT JOIN hts_entries h ON c.id = h.chapter_id
GROUP BY c.id
ORDER BY c.number"""
)
return cursor.fetchall()
def get_data_freshness(db: sqlite3.Connection) -> dict:
"""Get data freshness info: latest refresh metadata plus per-chapter timestamps."""
cursor = db.cursor()
cursor.execute(
"""SELECT last_full_refresh, refresh_duration_secs, chapters_changed, total_chapters
FROM data_freshness
ORDER BY id DESC LIMIT 1"""
)
freshness_row = cursor.fetchone()
if freshness_row:
freshness = {
"last_full_refresh": freshness_row[0],
"refresh_duration_secs": freshness_row[1],
"chapters_changed_in_last_refresh": freshness_row[2],
"total_chapters": freshness_row[3],
}
else:
freshness = {
"last_full_refresh": None,
"refresh_duration_secs": None,
"chapters_changed_in_last_refresh": None,
"total_chapters": None,
}
cursor.execute(
"""SELECT number, description, last_checked_at, last_changed_at
FROM chapters
ORDER BY number"""
)
freshness["chapters"] = [
{
"number": r[0],
"description": r[1] or "",
"last_checked_at": r[2] or "",
"last_changed_at": r[3] or "",
}
for r in cursor.fetchall()
]
return freshness