new-site/api/src/sanctions.ts
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

364 lines
13 KiB
TypeScript

/**
* CASL Sanctions Screening
*
* Screens director names against Canada's Consolidated Autonomous Sanctions List (CASL).
* List source: Global Affairs Canada, updated regularly under SEMA + JVCFOA.
* XML: https://www.international.gc.ca/world-monde/assets/office_docs/
* international_relations-relations_internationales/sanctions/sema-lmes.xml
*
* Matching strategy (three tiers):
* TIER 1 — Exact match (case-insensitive, normalized): result = 'hit' block order
* TIER 2 — High fuzzy match (score ≥ 85): result = 'hit' block order
* TIER 3 — Moderate fuzzy match (score 70-84): result = 'possible_match' → manual review queue
* Below 70: result = 'clear' proceed
*
* The list is cached in memory for CACHE_TTL_MS to avoid hammering Global Affairs Canada.
* Cache is refreshed on each server restart and on TTL expiry.
*
* This check runs BEFORE the order is saved to the database and BEFORE any payment
* is collected — no Stripe session is created for a sanctioned individual.
*/
import { pool } from "./db.js";
const CASL_XML_URL =
"https://www.international.gc.ca/world-monde/assets/office_docs/international_relations-relations_internationales/sanctions/sema-lmes.xml";
const CACHE_TTL_MS = 6 * 60 * 60 * 1000; // 6 hours
const HIT_THRESHOLD = 85; // score ≥ 85 → definite hit, block
const POSSIBLE_MATCH_THRESHOLD = 70; // score 70-84 → possible match, manual review
// ─── In-memory cache ──────────────────────────────────────────────────────────
interface CaslEntry {
last_name: string; // normalized (lowercase, no diacritics)
given_name: string;
aliases: string[];
country: string;
schedule: string;
item: string;
date_of_birth?: string;
date_of_listing: string;
// Raw for logging
raw_last_name: string;
raw_given_name: string;
}
interface CaslCache {
entries: CaslEntry[];
fetched_at: Date;
list_date: string;
}
let _cache: CaslCache | null = null;
let _fetching = false;
let _fetchQueue: Array<(c: CaslCache) => void> = [];
// ─── XML parsing ─────────────────────────────────────────────────────────────
function extractXmlTag(xml: string, tag: string): string {
const m = new RegExp(`<${tag}>(.*?)</${tag}>`, "s").exec(xml);
return m ? m[1].trim() : "";
}
function extractAllRecords(xml: string): string[] {
const records: string[] = [];
const re = /<record>([\s\S]*?)<\/record>/g;
let m: RegExpExecArray | null;
while ((m = re.exec(xml)) !== null) records.push(m[1]);
return records;
}
/** Remove diacritics and normalize to ASCII lowercase for comparison. */
function normalize(s: string): string {
return s
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.toLowerCase()
.replace(/[^a-z0-9\s]/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function parseRecord(recordXml: string): CaslEntry | null {
const lastName = extractXmlTag(recordXml, "LastName");
const givenName = extractXmlTag(recordXml, "GivenName");
if (!lastName && !givenName) return null; // ships/entities without names skip via entity check
const aliasesRaw = extractXmlTag(recordXml, "Aliases");
const aliases = aliasesRaw
? aliasesRaw.split(/[;,]/).map(a => normalize(a.trim())).filter(Boolean)
: [];
return {
last_name: normalize(lastName),
given_name: normalize(givenName),
aliases,
country: extractXmlTag(recordXml, "Country"),
schedule: extractXmlTag(recordXml, "Schedule"),
item: extractXmlTag(recordXml, "Item"),
date_of_birth: extractXmlTag(recordXml, "DateOfBirthOrShipBuildDate") || undefined,
date_of_listing: extractXmlTag(recordXml, "DateOfListing"),
raw_last_name: lastName,
raw_given_name: givenName,
};
}
function parseXml(xml: string): { entries: CaslEntry[]; list_date: string } {
const records = extractAllRecords(xml);
const entries: CaslEntry[] = [];
for (const r of records) {
const entry = parseRecord(r);
if (entry) entries.push(entry);
}
// Extract list date from XML header comment or first record's DateOfListing
const dateMatch = /updated on (\w+ \d+, \d{4})/.exec(xml);
const list_date = dateMatch ? dateMatch[1] : new Date().toISOString().slice(0, 10);
return { entries, list_date };
}
// ─── Cache management ─────────────────────────────────────────────────────────
async function fetchAndParse(): Promise<CaslCache> {
console.log("[sanctions] Fetching CASL XML from Global Affairs Canada…");
const resp = await fetch(CASL_XML_URL, {
headers: { "Accept": "application/xml, text/xml", "User-Agent": "PerformanceWest-ComplianceCheck/1.0" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) throw new Error(`CASL XML fetch failed: ${resp.status}`);
const xml = await resp.text();
const { entries, list_date } = parseXml(xml);
console.log(`[sanctions] Loaded ${entries.length} CASL entries (list date: ${list_date})`);
return { entries, fetched_at: new Date(), list_date };
}
async function getCache(): Promise<CaslCache> {
if (_cache && Date.now() - _cache.fetched_at.getTime() < CACHE_TTL_MS) {
return _cache;
}
if (_fetching) {
// Another request is already fetching — wait for it
return new Promise(resolve => _fetchQueue.push(resolve));
}
_fetching = true;
try {
const cache = await fetchAndParse();
_cache = cache;
_fetchQueue.forEach(resolve => resolve(cache));
_fetchQueue = [];
return cache;
} catch (err) {
_fetching = false;
_fetchQueue = [];
if (_cache) {
console.error("[sanctions] CASL refresh failed, using stale cache:", err);
return _cache;
}
throw err;
} finally {
_fetching = false;
}
}
// ─── Fuzzy matching ──────────────────────────────────────────────────────────
/**
* Levenshtein edit distance between two strings.
*/
function levenshtein(a: string, b: string): number {
if (a === b) return 0;
if (!a.length) return b.length;
if (!b.length) return a.length;
const m = a.length, n = b.length;
const dp: number[][] = Array.from({ length: m + 1 }, (_, i) =>
Array.from({ length: n + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0)),
);
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
dp[i][j] = a[i - 1] === b[j - 1]
? dp[i - 1][j - 1]
: 1 + Math.min(dp[i - 1][j - 1], dp[i][j - 1], dp[i - 1][j]);
}
}
return dp[m][n];
}
/**
* Similarity score 0-100 between two strings (higher = more similar).
*/
function similarity(a: string, b: string): number {
if (!a && !b) return 100;
if (!a || !b) return 0;
const dist = levenshtein(a, b);
const maxLen = Math.max(a.length, b.length);
return Math.round((1 - dist / maxLen) * 100);
}
/**
* Score a query name against a CASL entry.
*
* Tries multiple combinations:
* - "firstName lastName" vs entry's given+last
* - "lastName, firstName" vs entry
* - Last name only (high-weight partial match)
* - Any alias match
*/
function scoreEntry(queryNorm: string, entry: CaslEntry): number {
const queryParts = queryNorm.split(" ").filter(Boolean);
const queryLast = queryParts[queryParts.length - 1] ?? "";
const queryFirst = queryParts.slice(0, -1).join(" ");
// Full name combinations
const entryFull1 = `${entry.given_name} ${entry.last_name}`.trim();
const entryFull2 = `${entry.last_name} ${entry.given_name}`.trim();
const entryFull3 = `${entry.last_name}, ${entry.given_name}`.trim();
const scores: number[] = [
similarity(queryNorm, entryFull1),
similarity(queryNorm, entryFull2),
similarity(queryNorm, entryFull3),
];
// Last-name-only match (strong signal — surnames are more distinctive than given names)
if (queryLast && entry.last_name) {
const lastScore = similarity(queryLast, entry.last_name);
// If last names are very similar (≥ 90) and first names partially match, boost
if (lastScore >= 90 && queryFirst) {
const firstScore = similarity(queryFirst, entry.given_name);
scores.push(Math.round(lastScore * 0.6 + firstScore * 0.4));
} else {
scores.push(Math.round(lastScore * 0.7)); // last-name-only is penalized
}
}
// Alias matching
for (const alias of entry.aliases) {
scores.push(similarity(queryNorm, alias));
}
return Math.max(...scores);
}
// ─── Public screening API ────────────────────────────────────────────────────
export interface ScreeningMatch {
score: number;
last_name: string;
given_name: string;
country: string;
schedule: string;
item: string;
date_of_birth?: string;
date_of_listing: string;
}
export interface ScreeningResult {
result: "clear" | "hit" | "possible_match" | "error";
score: number | null; // best match score (null if clear/error)
match: ScreeningMatch | null;
list_date: string;
screened_name: string;
error?: string;
}
/**
* Screen a full name against the CASL.
*
* @param fullName Director's full legal name as entered on the order form.
* @returns ScreeningResult with result tier, best match, and list date.
*/
export async function screenName(fullName: string): Promise<ScreeningResult> {
const normalized = normalize(fullName);
if (!normalized || normalized.length < 2) {
return { result: "error", score: null, match: null, list_date: "", screened_name: fullName, error: "Name too short to screen" };
}
let cache: CaslCache;
try {
cache = await getCache();
} catch (err) {
console.error("[sanctions] Could not load CASL list:", err);
// FAIL OPEN with logging — do not block order if list is unreachable,
// but log prominently so admin can review
return { result: "error", score: null, match: null, list_date: "", screened_name: fullName, error: "CASL list unavailable" };
}
let bestScore = 0;
let bestEntry: CaslEntry | null = null;
for (const entry of cache.entries) {
const score = scoreEntry(normalized, entry);
if (score > bestScore) {
bestScore = score;
bestEntry = entry;
if (bestScore === 100) break; // exact match — stop early
}
}
const match: ScreeningMatch | null = bestEntry && bestScore >= POSSIBLE_MATCH_THRESHOLD ? {
score: bestScore,
last_name: bestEntry.raw_last_name,
given_name: bestEntry.raw_given_name,
country: bestEntry.country,
schedule: bestEntry.schedule,
item: bestEntry.item,
date_of_birth: bestEntry.date_of_birth,
date_of_listing: bestEntry.date_of_listing,
} : null;
let result: ScreeningResult["result"];
if (bestScore >= HIT_THRESHOLD) {
result = "hit";
} else if (bestScore >= POSSIBLE_MATCH_THRESHOLD) {
result = "possible_match";
} else {
result = "clear";
}
return {
result,
score: match ? bestScore : null,
match,
list_date: cache.list_date,
screened_name: fullName,
};
}
/**
* Log a screening result to the database for audit purposes.
*/
export async function logScreening(
result: ScreeningResult,
opts: { order_number?: string; ip_address?: string; user_agent?: string },
): Promise<void> {
try {
await pool.query(
`INSERT INTO sanctions_screenings
(screened_name, order_number, result, match_score, matched_entry, list_date, ip_address, user_agent)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
[
result.screened_name,
opts.order_number ?? null,
result.result,
result.score ?? null,
result.match ? JSON.stringify(result.match) : null,
result.list_date || null,
opts.ip_address ?? null,
opts.user_agent ?? null,
],
);
} catch (err) {
// Non-blocking — log failure but don't break the order flow
console.error("[sanctions] Failed to log screening result:", err);
}
}
/**
* Force-refresh the CASL cache (e.g. from an admin endpoint).
*/
export async function refreshCache(): Promise<{ entries: number; list_date: string }> {
_cache = null;
const cache = await getCache();
return { entries: cache.entries.length, list_date: cache.list_date };
}