"""OpenFDA API Provider for adverse events, drug labels, recalls, and more.""" import re import logging from typing import List, Dict, Any, Callable, Optional import httpx from functools import lru_cache from core.base_provider import BaseProvider from core.decorators import safe_json_return, with_retry from providers import register_provider logger = logging.getLogger(__name__) # OpenFDA API endpoints OPENFDA_DRUG_EVENT = "https://api.fda.gov/drug/event.json" OPENFDA_DRUG_LABEL = "https://api.fda.gov/drug/label.json" OPENFDA_DRUG_NDC = "https://api.fda.gov/drug/ndc.json" OPENFDA_DRUG_ENFORCEMENT = "https://api.fda.gov/drug/enforcement.json" OPENFDA_DRUG_DRUGSFDA = "https://api.fda.gov/drug/drugsfda.json" OPENFDA_DEVICE_EVENT = "https://api.fda.gov/device/event.json" OPENFDA_DEVICE_ENFORCEMENT = "https://api.fda.gov/device/enforcement.json" OPENFDA_DEVICE_CLASSIFICATION = "https://api.fda.gov/device/classification.json" OPENFDA_DEVICE_510K = "https://api.fda.gov/device/510k.json" OPENFDA_DEVICE_PMA = "https://api.fda.gov/device/pma.json" OPENFDA_FOOD_ENFORCEMENT = "https://api.fda.gov/food/enforcement.json" OPENFDA_FOOD_EVENT = "https://api.fda.gov/food/event.json" def normalize_drug_name(raw_name: str) -> str: """Extract base drug name from full medication name.""" if not raw_name: return "" cleaned = raw_name patterns = [ r'\s+\d+(?:\.\d+)?\s*(MG|MCG|ML|G|%)', r'\s+Oral\s+', r'\s+Tablet\s*', r'\s+Capsule\s*', r'\s+Injectable\s*', r'\s+Solution\s*', r'\s+Suspension\s*' ] for pattern in patterns: cleaned = re.split(pattern, cleaned, flags=re.IGNORECASE)[0] base_name = cleaned.strip().split()[0] if cleaned.strip() else cleaned.strip() return base_name @register_provider("openfda") class OpenFDAProvider(BaseProvider): """Provider for OpenFDA APIs.""" def __init__(self, client: httpx.AsyncClient, api_key: Optional[str] = None): super().__init__("openfda", client) self.api_key = api_key async def initialize(self) -> None: """Initialize OpenFDA provider.""" logger.info("OpenFDA provider initialized") def get_tools(self) -> List[Callable]: """Return all OpenFDA tools.""" return [ self.openfda_get_adverse_event_summary, self.openfda_fetch_adverse_events, self.openfda_top_reactions, self.openfda_search_drug_labels, self.openfda_search_ndc, self.openfda_search_drug_recalls, self.openfda_search_drugs_fda, self.openfda_search_device_events, self.openfda_search_device_recalls, self.openfda_search_device_classifications, self.openfda_search_510k, self.openfda_search_pma, self.openfda_search_food_recalls, self.openfda_search_food_events, ] def _build_params(self, search: str, limit: int = 10) -> Dict[str, Any]: """Build query parameters with optional API key.""" params = {"search": search, "limit": min(limit, 100)} if self.api_key: params["api_key"] = self.api_key return params @with_retry async def _fetch_fda_data(self, url: str, params: Dict[str, Any]) -> Dict[str, Any]: """Fetch data from OpenFDA API with retry logic.""" response = await self.client.get(url, params=params) response.raise_for_status() return response.json() # Drug Adverse Events @safe_json_return async def openfda_get_adverse_event_summary(self, drug_name: str) -> Dict[str, Any]: """ Get high-level adverse event summary for a medication from FAERS database. Args: drug_name: Name of the medication (generic or brand name) Returns: Summary with total reports, serious reports, and top reactions """ clean_name = normalize_drug_name(drug_name) logger.info(f"FDA adverse event query: '{drug_name}' -> '{clean_name}'") # Query for count and reactions search_query = f'patient.drug.medicinalproduct:"{clean_name}"' params = self._build_params(search_query, limit=1) params["count"] = "patient.reaction.reactionmeddrapt.exact" data = await self._fetch_fda_data(OPENFDA_DRUG_EVENT, params) reactions = data.get("results", [])[:5] top_reactions = [ {"reaction": r.get("term", "Unknown"), "count": r.get("count", 0)} for r in reactions ] # Calculate total from sum of all reaction counts (not just top 5) # When using count param, meta.results.total may not reflect actual total all_reactions = data.get("results", []) total_reports = sum(r.get("count", 0) for r in all_reactions) # Get serious event count serious_query = f'{search_query}+AND+serious:1' serious_params = self._build_params(serious_query, limit=1) try: serious_data = await self._fetch_fda_data(OPENFDA_DRUG_EVENT, serious_params) serious_reports = serious_data.get("meta", {}).get("results", {}).get("total", 0) except Exception: serious_reports = 0 return { "drug": clean_name, "total_reports": total_reports, "serious_reports": serious_reports, "top_reactions": top_reactions } @safe_json_return async def openfda_fetch_adverse_events( self, drug_name: str, limit: int = 25, max_pages: int = 1 ) -> Dict[str, Any]: """ Fetch raw adverse event reports from OpenFDA with pagination. Args: drug_name: Name of the medication limit: Number of events per page (max 100) max_pages: Number of pages to fetch Returns: List of adverse event reports with metadata """ clean_name = normalize_drug_name(drug_name) search_query = f'patient.drug.medicinalproduct:"{clean_name}"' all_events = [] for page in range(max_pages): params = self._build_params(search_query, limit=min(limit, 100)) params["skip"] = page * limit try: data = await self._fetch_fda_data(OPENFDA_DRUG_EVENT, params) results = data.get("results", []) for result in results: reactions = result.get("patient", {}).get("reaction", []) reaction_list = [r.get("reactionmeddrapt", "Unknown") for r in reactions] all_events.append({ "safety_report_id": result.get("safetyreportid", "Unknown"), "receive_date": result.get("receivedate", "Unknown"), "serious": result.get("serious", 0) == 1, "reactions": reaction_list[:5], "patient_age": result.get("patient", {}).get("patientonsetage", "Unknown"), "patient_sex": result.get("patient", {}).get("patientsex", "Unknown") }) if len(results) < limit: break except Exception as e: logger.warning(f"Error fetching page {page}: {e}") break return { "drug": clean_name, "events": all_events, "total_fetched": len(all_events) } @safe_json_return async def openfda_top_reactions(self, drug_name: str) -> Dict[str, Any]: """ Get top 5 most commonly reported adverse reactions for a medication. Args: drug_name: Name of the medication Returns: Top 5 reactions with counts """ clean_name = normalize_drug_name(drug_name) search_query = f'patient.drug.medicinalproduct:"{clean_name}"' params = self._build_params(search_query, limit=1) params["count"] = "patient.reaction.reactionmeddrapt.exact" data = await self._fetch_fda_data(OPENFDA_DRUG_EVENT, params) reactions = data.get("results", [])[:5] return { "drug": clean_name, "top_reactions": [ {"reaction": r.get("term", "Unknown"), "count": r.get("count", 0)} for r in reactions ] } # Drug Labels @safe_json_return async def openfda_search_drug_labels(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA drug labeling information (warnings, indications, dosage). Args: query: Drug name, active ingredient, or condition limit: Maximum results (max 100) Returns: Drug label information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DRUG_LABEL, params) results = data.get("results", []) labels = [] for result in results: openfda = result.get("openfda", {}) labels.append({ "brand_name": openfda.get("brand_name", ["Unknown"])[0], "generic_name": openfda.get("generic_name", ["Unknown"])[0], "manufacturer": openfda.get("manufacturer_name", ["Unknown"])[0], "purpose": result.get("purpose", ["Not specified"])[0] if result.get("purpose") else "Not specified", "warnings": result.get("warnings", ["Not specified"])[0][:500] if result.get("warnings") else "Not specified", "indications_and_usage": result.get("indications_and_usage", ["Not specified"])[0][:500] if result.get("indications_and_usage") else "Not specified" }) return {"results": labels, "total": len(labels)} # NDC Directory @safe_json_return async def openfda_search_ndc(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search National Drug Code (NDC) directory for drug product information. Args: query: Brand name, generic name, or NDC number limit: Maximum results (max 100) Returns: NDC product information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DRUG_NDC, params) results = data.get("results", []) products = [] for result in results: products.append({ "product_ndc": result.get("product_ndc", "Unknown"), "brand_name": result.get("brand_name", "Unknown"), "generic_name": result.get("generic_name", "Unknown"), "manufacturer": result.get("labeler_name", "Unknown"), "dosage_form": result.get("dosage_form", "Unknown"), "route": result.get("route", ["Unknown"])[0] if result.get("route") else "Unknown", "marketing_status": result.get("marketing_status", "Unknown") }) return {"results": products, "total": len(products)} # Drug Recalls @safe_json_return async def openfda_search_drug_recalls(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA drug recall and enforcement reports. Args: query: Drug name, manufacturer, or reason for recall limit: Maximum results (max 100) Returns: Drug recall information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DRUG_ENFORCEMENT, params) results = data.get("results", []) recalls = [] for result in results: recalls.append({ "product_description": result.get("product_description", "Unknown"), "reason_for_recall": result.get("reason_for_recall", "Unknown"), "classification": result.get("classification", "Unknown"), "status": result.get("status", "Unknown"), "recall_date": result.get("recall_initiation_date", "Unknown"), "recalling_firm": result.get("recalling_firm", "Unknown") }) return {"results": recalls, "total": len(recalls)} # Drugs@FDA @safe_json_return async def openfda_search_drugs_fda(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search Drugs@FDA database for approved drug products and applications. Args: query: Drug name or active ingredient limit: Maximum results (max 100) Returns: Approved drug information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DRUG_DRUGSFDA, params) results = data.get("results", []) drugs = [] for result in results: products = result.get("products", []) for product in products: drugs.append({ "application_number": result.get("application_number", "Unknown"), "sponsor_name": result.get("sponsor_name", "Unknown"), "brand_name": product.get("brand_name", "Unknown"), "active_ingredients": product.get("active_ingredients", []), "dosage_form": product.get("dosage_form", "Unknown"), "route": product.get("route", "Unknown"), "marketing_status": product.get("marketing_status", "Unknown") }) return {"results": drugs, "total": len(drugs)} # Device Events @safe_json_return async def openfda_search_device_events(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search medical device adverse event reports. Args: query: Device name or brand limit: Maximum results (max 100) Returns: Device adverse event information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DEVICE_EVENT, params) results = data.get("results", []) events = [] for result in results: device = result.get("device", [{}])[0] events.append({ "report_number": result.get("report_number", "Unknown"), "date_received": result.get("date_received", "Unknown"), "device_name": device.get("brand_name", "Unknown"), "manufacturer": device.get("manufacturer_d_name", "Unknown"), "event_type": result.get("event_type", "Unknown"), "device_problem": device.get("device_problem_codes", ["Unknown"])[0] if device.get("device_problem_codes") else "Unknown" }) return {"results": events, "total": len(events)} # Device Recalls @safe_json_return async def openfda_search_device_recalls(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search medical device recall and enforcement reports. Args: query: Device name, manufacturer, or reason limit: Maximum results (max 100) Returns: Device recall information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DEVICE_ENFORCEMENT, params) results = data.get("results", []) recalls = [] for result in results: recalls.append({ "product_description": result.get("product_description", "Unknown"), "reason_for_recall": result.get("reason_for_recall", "Unknown"), "classification": result.get("classification", "Unknown"), "status": result.get("status", "Unknown"), "recall_date": result.get("recall_initiation_date", "Unknown"), "recalling_firm": result.get("recalling_firm", "Unknown") }) return {"results": recalls, "total": len(recalls)} # Device Classifications @safe_json_return async def openfda_search_device_classifications(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search medical device classification database. Args: query: Device type or classification limit: Maximum results (max 100) Returns: Device classification information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DEVICE_CLASSIFICATION, params) results = data.get("results", []) classifications = [] for result in results: classifications.append({ "device_name": result.get("device_name", "Unknown"), "device_class": result.get("device_class", "Unknown"), "medical_specialty": result.get("medical_specialty_description", "Unknown"), "regulation_number": result.get("regulation_number", "Unknown"), "product_code": result.get("product_code", "Unknown") }) return {"results": classifications, "total": len(classifications)} # 510(k) Clearances @safe_json_return async def openfda_search_510k(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA 510(k) premarket clearance database. Args: query: Device name or manufacturer limit: Maximum results (max 100) Returns: 510(k) clearance information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DEVICE_510K, params) results = data.get("results", []) clearances = [] for result in results: clearances.append({ "k_number": result.get("k_number", "Unknown"), "device_name": result.get("device_name", "Unknown"), "applicant": result.get("applicant", "Unknown"), "clearance_date": result.get("date_received", "Unknown"), "decision_description": result.get("decision_description", "Unknown"), "product_code": result.get("product_code", "Unknown") }) return {"results": clearances, "total": len(clearances)} # PMA Approvals @safe_json_return async def openfda_search_pma(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA premarket approval (PMA) database. Args: query: Device name or manufacturer limit: Maximum results (max 100) Returns: PMA approval information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_DEVICE_PMA, params) results = data.get("results", []) approvals = [] for result in results: approvals.append({ "pma_number": result.get("pma_number", "Unknown"), "device_name": result.get("device_name", "Unknown"), "applicant": result.get("applicant", "Unknown"), "approval_date": result.get("date_received", "Unknown"), "decision_description": result.get("decision_description", "Unknown"), "product_code": result.get("product_code", "Unknown") }) return {"results": approvals, "total": len(approvals)} # Food Recalls @safe_json_return async def openfda_search_food_recalls(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA food recall and enforcement reports. Args: query: Food product or reason for recall limit: Maximum results (max 100) Returns: Food recall information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_FOOD_ENFORCEMENT, params) results = data.get("results", []) recalls = [] for result in results: recalls.append({ "product_description": result.get("product_description", "Unknown"), "reason_for_recall": result.get("reason_for_recall", "Unknown"), "classification": result.get("classification", "Unknown"), "status": result.get("status", "Unknown"), "recall_date": result.get("recall_initiation_date", "Unknown"), "recalling_firm": result.get("recalling_firm", "Unknown") }) return {"results": recalls, "total": len(recalls)} # Food Events @safe_json_return async def openfda_search_food_events(self, query: str, limit: int = 10) -> Dict[str, Any]: """ Search FDA food adverse event reports. Args: query: Food product or reaction limit: Maximum results (max 100) Returns: Food adverse event information """ params = self._build_params(query, limit) data = await self._fetch_fda_data(OPENFDA_FOOD_EVENT, params) results = data.get("results", []) events = [] for result in results: products = result.get("products", [{}]) reactions = result.get("reactions", []) events.append({ "report_number": result.get("report_number", "Unknown"), "date_started": result.get("date_started", "Unknown"), "products": [p.get("name_brand", "Unknown") for p in products], "reactions": [r.get("reaction", "Unknown") for r in reactions][:5], "outcomes": result.get("outcomes", ["Unknown"]) }) return {"results": events, "total": len(events)}