phishguard-api / background.js
prashanth135's picture
Upload 38 files
bebe233 verified
// ============================================================
// PhishGuard AI - background.js
// MV3 Service Worker with feedback, retraining triggers, and
// model version polling.
//
// State (chrome.storage.local):
// phishguard_feedback_queue: FeedbackRecord[] (max 500, FIFO)
// scan_count: int (resets at 50)
// feedback_count: int (labeled samples since last retrain)
// last_retrain_ts: ISO8601
// model_version: int
// session_id: UUIDv4
//
// Triggers:
// 1. scan_count >= 50 AND feedback_count >= 10
// 2. chrome.alarms "retrain_alarm" (24h) AND feedback_count >= 10
// ============================================================
// ── Backend URL ──────────────────────────────────────────────────────
const BACKEND_URL = "https://phishguard-api-z2wj.onrender.com";
const ANALYZE_URL = `${BACKEND_URL}/analyze`;
const RETRAIN_URL = `${BACKEND_URL}/retrain`;
const MODEL_VERSION_URL = `${BACKEND_URL}/model_version`;
// ── Constants ────────────────────────────────────────────────────────
const CACHE_TTL_MS = 30 * 60 * 1000;
const MAX_QUEUE_SIZE = 500;
const RETRAIN_URL_THRESHOLD = 50;
const MIN_LABELED_SAMPLES = 10;
// ── In-memory caches ─────────────────────────────────────────────────
const urlCache = new Map();
const tabResultCache = new Map();
const pageSignals = new Map();
// ── TIER 1: Whitelist (O(1) Set lookup) ──────────────────────────────
const WHITELIST = new Set([
"google.com","youtube.com","facebook.com","amazon.com","wikipedia.org",
"twitter.com","instagram.com","linkedin.com","microsoft.com","apple.com",
"github.com","stackoverflow.com","reddit.com","netflix.com","paypal.com",
"bankofamerica.com","chase.com","wellsfargo.com","yahoo.com","bing.com",
"outlook.com","office.com","live.com","adobe.com","dropbox.com",
"zoom.us","slack.com","spotify.com","twitch.tv","ebay.com",
"walmart.com","target.com","bestbuy.com","airbnb.com",
"x.com","tiktok.com","pinterest.com","quora.com","medium.com"
]);
function getRootDomain(url) {
try {
const host = new URL(url).hostname.replace(/^www\./, "");
const parts = host.split(".");
return parts.slice(-2).join(".");
} catch { return null; }
}
// ── TIER 2: Local heuristic scoring ──────────────────────────────────
function heuristicScore(url) {
let score = 0;
const signals = [];
const u = url.toLowerCase();
// IP as hostname (25 pts)
if (/https?:\/\/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/.test(url)) {
score += 25; signals.push("IP as hostname");
}
// Suspicious TLD (20 pts)
const badTLDs = [".xyz",".tk",".ml",".ga",".cf",".gq",".pw",".top",".click"];
for (const tld of badTLDs) {
if (u.includes(tld)) { score += 20; signals.push(`Suspicious TLD (${tld})`); break; }
}
// Phishing keywords (15 pts)
const keywords = ["login","verify","secure","update","account","banking",
"signin","reset","confirm","suspend","webscr","cmd","payment","alert"];
const kwHits = keywords.filter(kw => u.includes(kw));
if (kwHits.length > 0) { score += 15; signals.push(`Keywords: ${kwHits.join(", ")}`); }
// Brand spoofing (15 pts)
const brands = ["paypal","google","apple","microsoft","amazon","netflix",
"facebook","instagram","chase","wellsfargo","bankofamerica"];
try {
const domain = getRootDomain(url);
for (const brand of brands) {
if (u.includes(brand) && domain && !domain.startsWith(brand)) {
score += 15; signals.push(`Brand spoofing: ${brand}`); break;
}
}
} catch {}
// Excessive subdomains (10 pts)
try {
const host = new URL(url).hostname;
const subCount = host.split(".").length - 2;
if (subCount >= 3) { score += 10; signals.push(`${subCount} subdomains`); }
} catch {}
// URL length (5 pts)
if (url.length > 100) { score += 5; signals.push(`Long URL (${url.length} chars)`); }
// Hyphens (5 pts)
try {
const host = new URL(url).hostname;
const hyphens = (host.match(/-/g) || []).length;
if (hyphens >= 3) { score += 5; signals.push(`${hyphens} hyphens in domain`); }
} catch {}
// Non-standard port (5 pts)
try {
const port = new URL(url).port;
if (port && port !== "80" && port !== "443") {
score += 5; signals.push(`Non-standard port :${port}`);
}
} catch {}
return { score: Math.min(score, 100), signals };
}
// ── URL Cache ────────────────────────────────────────────────────────
function getCached(url) {
const entry = urlCache.get(url);
if (!entry) return null;
if (Date.now() - entry.ts > CACHE_TTL_MS) { urlCache.delete(url); return null; }
return entry.result;
}
function setCache(url, result) {
urlCache.set(url, { result, ts: Date.now() });
if (urlCache.size > 500) {
const firstKey = urlCache.keys().next().value;
urlCache.delete(firstKey);
}
}
// ── Badge ────────────────────────────────────────────────────────────
function setBadge(tabId, status, text) {
const colors = {
safe: "#22C55E", blocked: "#EF4444", warn: "#F59E0B",
loading: "#534AB7", none: "#888888"
};
chrome.action.setBadgeBackgroundColor({ color: colors[status] || colors.none, tabId });
chrome.action.setBadgeText({ text: text || "", tabId });
}
// ── Backend fetch with retry ─────────────────────────────────────────
async function fetchBackend(url, payload, retryCount = 1) {
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 15000);
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
signal: controller.signal,
});
clearTimeout(timeout);
if (!response.ok) throw new Error(`Server ${response.status}`);
return await response.json();
} catch (err) {
if (retryCount > 0) {
await new Promise(r => setTimeout(r, 2000));
return fetchBackend(url, payload, retryCount - 1);
}
throw err;
}
}
// ── SHA256 hash ──────────────────────────────────────────────────────
async function sha256(text) {
const encoded = new TextEncoder().encode(text);
const hash = await crypto.subtle.digest("SHA-256", encoded);
return Array.from(new Uint8Array(hash)).map(b => b.toString(16).padStart(2, "0")).join("");
}
// ── Storage helpers ──────────────────────────────────────────────────
async function getStorage(keys) {
return new Promise(resolve => chrome.storage.local.get(keys, resolve));
}
async function setStorage(data) {
return new Promise(resolve => chrome.storage.local.set(data, resolve));
}
async function getQueue() {
const data = await getStorage(["phishguard_feedback_queue"]);
return data.phishguard_feedback_queue || [];
}
async function setQueue(queue) {
// FIFO eviction
if (queue.length > MAX_QUEUE_SIZE) {
queue = queue.slice(queue.length - MAX_QUEUE_SIZE);
}
await setStorage({ phishguard_feedback_queue: queue });
}
// ── ON INSTALL ───────────────────────────────────────────────────────
chrome.runtime.onInstalled.addListener(async () => {
const sessionId = crypto.randomUUID();
await setStorage({
session_id: sessionId,
scan_count: 0,
feedback_count: 0,
last_retrain_ts: null,
model_version: 0,
phishguard_feedback_queue: [],
});
// 24-hour retraining alarm
chrome.alarms.create("retrain_alarm", { periodInMinutes: 1440 });
// 30-minute model polling alarm
chrome.alarms.create("model_poll_alarm", { periodInMinutes: 30 });
console.log("[PhishGuard] Installed. Session:", sessionId);
});
// ── ALARM HANDLERS ───────────────────────────────────────────────────
chrome.alarms.onAlarm.addListener(async (alarm) => {
if (alarm.name === "retrain_alarm") {
console.log("[PhishGuard] Retrain alarm fired");
await checkRetrain("timer");
}
if (alarm.name === "model_poll_alarm") {
await pollModelVersion();
}
});
// ── MAIN URL LISTENER ────────────────────────────────────────────────
chrome.webNavigation.onCompleted.addListener(async (details) => {
if (details.frameId !== 0) return;
const url = details.url;
if (!url.startsWith("http")) return;
const tabId = details.tabId;
const domain = getRootDomain(url);
if (!domain) return;
setBadge(tabId, "loading", "…");
// TIER 1: Whitelist
if (WHITELIST.has(domain)) {
const result = {
url, status: "safe", tier: 1, method: "whitelist",
confidence: 0, heuristic_score: 0, signals: []
};
await setStorage({ lastResult: result });
tabResultCache.set(tabId, result);
setBadge(tabId, "safe", "βœ“");
return;
}
// Cache check
const cached = getCached(url);
if (cached) {
await setStorage({ lastResult: cached });
tabResultCache.set(tabId, cached);
setBadge(tabId, cached.status, cached.status === "blocked" ? "!" : "βœ“");
if (cached.status === "blocked") blockPage(tabId, url, cached);
return;
}
// TIER 2: Heuristic
const hResult = heuristicScore(url);
if (hResult.score >= 80) {
const result = {
url, status: "blocked", tier: 2, method: "heuristic",
confidence: hResult.score / 100, heuristic_score: hResult.score,
signals: hResult.signals, is_phishing: true
};
setCache(url, result);
await setStorage({ lastResult: result });
tabResultCache.set(tabId, result);
setBadge(tabId, "blocked", "!");
blockPage(tabId, url, result);
await storeFeedbackRecord(url, result);
await incrementScanCount();
return;
}
// TIER 3+4: Send to backend
const signals = pageSignals.get(tabId) || {};
try {
const apiResult = await fetchBackend(ANALYZE_URL, {
url,
heuristic_score: hResult.score,
page_title: signals.title || "",
page_snippet: signals.snippet || "",
});
const finalResult = {
url,
status: apiResult.is_phishing ? "blocked" : "safe",
tier: apiResult.tier || 3,
method: apiResult.method || "ensemble",
confidence: apiResult.confidence || 0,
heuristic_score: apiResult.heuristic_score || hResult.score,
signals: apiResult.signals || hResult.signals,
is_phishing: apiResult.is_phishing,
details: apiResult.details || {},
};
setCache(url, finalResult);
await setStorage({ lastResult: finalResult });
tabResultCache.set(tabId, finalResult);
if (finalResult.status === "blocked") {
setBadge(tabId, "blocked", "!");
blockPage(tabId, url, finalResult);
} else if (finalResult.confidence >= 0.4) {
setBadge(tabId, "warn", "?");
} else {
setBadge(tabId, "safe", "βœ“");
}
await storeFeedbackRecord(url, finalResult);
} catch (err) {
console.log("[PhishGuard] Backend unreachable:", err.message);
const fallback = {
url,
status: hResult.score >= 50 ? "blocked" : "safe",
tier: 2,
method: "heuristic-fallback",
confidence: hResult.score / 100,
heuristic_score: hResult.score,
signals: hResult.signals,
is_phishing: hResult.score >= 50,
details: { backend_error: err.message },
};
setCache(url, fallback);
await setStorage({ lastResult: fallback });
tabResultCache.set(tabId, fallback);
if (hResult.score >= 50) {
setBadge(tabId, "blocked", "!");
blockPage(tabId, url, fallback);
} else if (hResult.score >= 30) {
setBadge(tabId, "warn", "?");
} else {
setBadge(tabId, "none", "");
}
await storeFeedbackRecord(url, fallback);
}
await incrementScanCount();
await checkRetrain("count");
pageSignals.delete(tabId);
}, { url: [{ schemes: ["http", "https"] }] });
// ── Feedback Record Storage ──────────────────────────────────────────
async function storeFeedbackRecord(url, result) {
const urlHash = await sha256(url);
const record = {
url,
verdict: result.is_phishing ? "phishing" : "safe",
confidence: result.confidence || 0,
tier_used: result.tier || 0,
heuristic_score: result.heuristic_score || 0,
signals: result.signals || [],
user_feedback: null,
timestamp: new Date().toISOString(),
feedback_ts: null,
url_hash: urlHash,
session_id: (await getStorage(["session_id"])).session_id || "",
};
const queue = await getQueue();
queue.push(record);
await setQueue(queue);
}
async function incrementScanCount() {
const data = await getStorage(["scan_count"]);
await setStorage({ scan_count: (data.scan_count || 0) + 1 });
}
// ── Block Page ───────────────────────────────────────────────────────
function blockPage(tabId, url, result) {
chrome.storage.local.set({ lastResult: { ...result, status: "blocked" } });
tabResultCache.set(tabId, result);
const score = Math.round((result.confidence || 0) * 100);
chrome.tabs.update(tabId, {
url: chrome.runtime.getURL("popup.html") +
"?blocked=1&url=" + encodeURIComponent(url) +
"&score=" + score +
"&method=" + encodeURIComponent(result.method || "")
});
}
// ── Retrain Check ────────────────────────────────────────────────────
async function checkRetrain(trigger = "count") {
const queue = await getQueue();
const labeled = queue.filter(r => r.user_feedback !== null);
if (labeled.length < MIN_LABELED_SAMPLES) {
console.log(`[PhishGuard] Not enough labeled samples (${labeled.length}/${MIN_LABELED_SAMPLES})`);
return;
}
const data = await getStorage(["scan_count"]);
const scanCount = data.scan_count || 0;
if (trigger === "timer" || scanCount >= RETRAIN_URL_THRESHOLD) {
console.log(`[PhishGuard] Triggering retrain: trigger=${trigger}, labeled=${labeled.length}, scans=${scanCount}`);
await sendRetrainRequest(labeled, trigger);
}
}
async function sendRetrainRequest(samples, trigger) {
const data = await getStorage(["session_id"]);
try {
const result = await fetchBackend(RETRAIN_URL, {
samples,
trigger,
session_id: data.session_id || "",
extension_version: "3.0",
});
if (result.status === "success") {
// Reset counters
await setStorage({
scan_count: 0,
feedback_count: 0,
last_retrain_ts: new Date().toISOString(),
});
// Remove sent records from queue
const queue = await getQueue();
const sentHashes = new Set(samples.map(s => s.url_hash));
const remaining = queue.filter(r => !sentHashes.has(r.url_hash));
await setQueue(remaining);
// Show notification
showRetrainNotification(result.accuracy_delta || {});
console.log("[PhishGuard] Retrain success:", result);
}
} catch (err) {
console.error("[PhishGuard] Retrain request failed:", err.message);
}
}
function showRetrainNotification(delta) {
const bertDelta = delta.bert ? `BERT: ${(delta.bert * 100).toFixed(1)}%` : "";
const gnnDelta = delta.gnn ? `GNN: ${(delta.gnn * 100).toFixed(1)}%` : "";
const parts = [bertDelta, gnnDelta].filter(Boolean).join(", ");
chrome.notifications.create("retrain_complete", {
type: "basic",
iconUrl: "icons/icon48.png",
title: "PhishGuard AI Updated",
message: parts ? `Models improved! ${parts} accuracy from your feedback` :
"Models updated with your feedback",
});
}
// ── Model Version Polling ────────────────────────────────────────────
async function pollModelVersion() {
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 10000);
const resp = await fetch(MODEL_VERSION_URL, { signal: controller.signal });
clearTimeout(timeout);
if (!resp.ok) return;
const info = await resp.json();
const stored = await getStorage(["model_version"]);
if (info.version > (stored.model_version || 0)) {
await setStorage({ model_version: info.version });
// Clear URL cache (stale results)
urlCache.clear();
chrome.notifications.create("model_updated", {
type: "basic",
iconUrl: "icons/icon48.png",
title: "PhishGuard Models Updated",
message: `Model v${info.version} is now active`,
});
}
} catch (err) {
// Silently fail β€” model polling is best-effort
}
}
// ── Message Handler ──────────────────────────────────────────────────
chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
// Page signals from content.js
if (msg.type === "page_signals") {
if (sender.tab) {
pageSignals.set(sender.tab.id, {
title: msg.title || "",
snippet: msg.snippet || "",
signals: msg.signals || [],
});
}
}
// Submit feedback from popup.js / content.js
if (msg.type === "submit_feedback") {
(async () => {
const queue = await getQueue();
const idx = queue.findIndex(r => r.url_hash === msg.url_hash);
if (idx >= 0) {
queue[idx].user_feedback = msg.feedback; // "correct" or "incorrect"
queue[idx].feedback_ts = new Date().toISOString();
await setQueue(queue);
// Increment feedback count
const data = await getStorage(["feedback_count"]);
await setStorage({ feedback_count: (data.feedback_count || 0) + 1 });
// Check if we should trigger retraining
await checkRetrain("count");
sendResponse({ success: true });
} else {
sendResponse({ success: false, error: "Record not found" });
}
})();
return true; // async response
}
// Get status for popup
if (msg.type === "get_status") {
(async () => {
const data = await getStorage([
"scan_count", "feedback_count", "last_retrain_ts",
"model_version", "session_id"
]);
const queue = await getQueue();
const labeled = queue.filter(r => r.user_feedback !== null).length;
const lastRetrain = data.last_retrain_ts ? new Date(data.last_retrain_ts) : null;
const now = Date.now();
const nextTimerMs = lastRetrain
? Math.max(0, (24 * 60 * 60 * 1000) - (now - lastRetrain.getTime()))
: 24 * 60 * 60 * 1000;
sendResponse({
scan_count: data.scan_count || 0,
feedback_count: data.feedback_count || 0,
labeled_count: labeled,
last_retrain_ts: data.last_retrain_ts,
model_version: data.model_version || 0,
next_retrain_urls_remaining: Math.max(0, RETRAIN_URL_THRESHOLD - (data.scan_count || 0)),
next_retrain_time_remaining_ms: nextTimerMs,
min_labeled_needed: Math.max(0, MIN_LABELED_SAMPLES - labeled),
});
})();
return true;
}
// Per-tab result cache query from popup
if (msg.type === "get_tab_result") {
const result = tabResultCache.get(msg.tabId);
sendResponse({ result: result || null });
return false;
}
// User override (Proceed Anyway)
if (msg.type === "whitelist_url") {
const override = {
url: msg.url, status: "safe", tier: 0,
method: "user-override", confidence: 0
};
setCache(msg.url, override);
chrome.storage.local.set({ lastResult: override });
sendResponse({ success: true });
}
// Gmail scanner bridge
if (msg.action === "analyzeEmail") {
const emailURL = ANALYZE_URL.replace(/\/analyze\/?$/, "/analyze/email");
fetch(emailURL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(msg.data),
})
.then(r => r.ok ? r.json() : Promise.reject(new Error(`${r.status}`)))
.then(data => sendResponse(data))
.catch(err => sendResponse({
status: "error",
analysis: { isPhishing: false, probability: 0, reason: "Backend unreachable" }
}));
return true;
}
});
// ── Tab cleanup ──────────────────────────────────────────────────────
chrome.tabs.onRemoved.addListener(tabId => {
pageSignals.delete(tabId);
tabResultCache.delete(tabId);
});
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
if (changeInfo.url) {
tabResultCache.delete(tabId);
setBadge(tabId, "none", "");
}
});