import os import logging import tempfile from typing import Dict, Any, List, Optional, Union from pathlib import Path import json import uvicorn from fastapi import FastAPI, Depends, File, UploadFile, HTTPException, Form, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.responses import JSONResponse from pydantic import BaseModel # Import modules from ocr_module.ocr_processor import OCRProcessor from ocr_module.fhir_mapper import FHIRMapper from fhir_module.fhir_repository import FHIRRepository from security_module.auth import auth_handler from compliance_module.audit_logger import audit_logger, AuditMiddleware from compliance_module.privacy_filter import privacy_filter # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) # Create FastAPI app app = FastAPI( title="FHIR OCR API", description="API for processing healthcare documents with OCR and storing extracted data via FHIR", version="0.1.0" ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], # Adjust in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Add audit middleware app.add_middleware(AuditMiddleware) # Initialize components ocr_processor = OCRProcessor() fhir_mapper = FHIRMapper() fhir_repository = FHIRRepository(storage_dir=os.path.join(os.getcwd(), "fhir_storage")) # Security scheme for Swagger UI security_scheme = HTTPBearer() # Pydantic models for API requests/responses class TokenRequest(BaseModel): username: str password: str class TokenResponse(BaseModel): access_token: str token_type: str = "bearer" class OCRRequest(BaseModel): image_url: Optional[str] = None process_as: Optional[str] = "auto" # 'insurance_card', 'lab_result', etc. class OCRResponse(BaseModel): raw_text: str structured_data: Dict[str, Any] confidence: float patient_id: Optional[str] = None observation_id: Optional[str] = None class ResourceResponse(BaseModel): resource_type: str id: str data: Dict[str, Any] # API routes @app.post("/auth/token", response_model=TokenResponse, tags=["Authentication"]) async def login_for_access_token(form_data: TokenRequest): """ Get an access token for API authentication. """ # This is a simplified authentication for the POC # In production, this would validate credentials against a user database # For POC, we just check if the username is not empty if not form_data.username: raise HTTPException(status_code=400, detail="Invalid username") # Determine roles based on username (for demonstration) roles = ["user"] if form_data.username == "admin": roles.append("admin") # Create access token access_token = auth_handler.create_access_token(form_data.username, roles) # Log the authentication audit_logger.log_event( event_type="authentication", user_id=form_data.username, action="login", details={"roles": roles} ) return {"access_token": access_token, "token_type": "bearer"} @app.post("/ocr/process", response_model=OCRResponse, tags=["OCR"]) async def process_document( file: UploadFile = File(...), process_as: str = Form("auto"), user: Dict[str, Any] = Depends(auth_handler.get_current_user) ): """ Process a document with OCR and extract healthcare data. Optionally specify how to process the document (insurance_card, lab_result, etc.) """ try: # Save uploaded file to temporary location with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file: temp_file.write(await file.read()) temp_file_path = temp_file.name # Process image with OCR ocr_result = ocr_processor.process_image(temp_file_path) # If process_as is specified, override the detected document type if process_as != "auto": ocr_result["structured_data"]["document_type"] = process_as # Map OCR data to FHIR resources patient = fhir_mapper.map_to_patient(ocr_result) # Create patient resource in FHIR repository patient_data = fhir_repository.create_resource(patient) # Log the creation audit_logger.log_create( user_id=user.get("user_id", "unknown"), resource_type="Patient", resource_id=patient.id, details={"document_type": ocr_result["structured_data"]["document_type"]} ) # Map to observation if applicable observation_id = None if ocr_result["structured_data"]["document_type"] in ["lab_result", "prescription"]: observation = fhir_mapper.map_to_observation(ocr_result, patient.id) if observation: observation_data = fhir_repository.create_resource(observation) observation_id = observation.id # Log the creation audit_logger.log_create( user_id=user.get("user_id", "unknown"), resource_type="Observation", resource_id=observation.id, details={"document_type": ocr_result["structured_data"]["document_type"]} ) # Clean up temporary file os.unlink(temp_file_path) return { "raw_text": ocr_result["raw_text"], "structured_data": ocr_result["structured_data"], "confidence": ocr_result["confidence"], "patient_id": patient.id, "observation_id": observation_id } except Exception as e: logger.error(f"Error processing document: {str(e)}") raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}") @app.get("/fhir/Patient/{patient_id}", response_model=ResourceResponse, tags=["FHIR"]) async def get_patient( patient_id: str, mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"), user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"])) ): """ Get a patient resource by ID. """ try: # Get patient resource patient_data = fhir_repository.read_resource("Patient", patient_id) # Log the access audit_logger.log_access( user_id=user.get("user_id", "unknown"), resource_type="Patient", resource_id=patient_id ) # Apply privacy filter if requested if mask_sensitive: patient_data = privacy_filter.filter_resource(patient_data, "Patient") return { "resource_type": "Patient", "id": patient_id, "data": patient_data } except FileNotFoundError: raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found") except Exception as e: logger.error(f"Error retrieving patient: {str(e)}") raise HTTPException(status_code=500, detail=f"Error retrieving patient: {str(e)}") @app.get("/fhir/Observation/{observation_id}", response_model=ResourceResponse, tags=["FHIR"]) async def get_observation( observation_id: str, mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"), user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"])) ): """ Get an observation resource by ID. """ try: # Get observation resource observation_data = fhir_repository.read_resource("Observation", observation_id) # Log the access audit_logger.log_access( user_id=user.get("user_id", "unknown"), resource_type="Observation", resource_id=observation_id ) # Apply privacy filter if requested if mask_sensitive: observation_data = privacy_filter.filter_resource(observation_data, "Observation") return { "resource_type": "Observation", "id": observation_id, "data": observation_data } except FileNotFoundError: raise HTTPException(status_code=404, detail=f"Observation {observation_id} not found") except Exception as e: logger.error(f"Error retrieving observation: {str(e)}") raise HTTPException(status_code=500, detail=f"Error retrieving observation: {str(e)}") @app.get("/fhir/Patient", response_model=List[ResourceResponse], tags=["FHIR"]) async def search_patients( name: Optional[str] = None, gender: Optional[str] = None, mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"), user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"])) ): """ Search for patients. """ try: # Build search parameters params = {} if name: params["name.family"] = name if gender: params["gender"] = gender # Search for patients patients = fhir_repository.search_resources("Patient", params) # Log the search audit_logger.log_access( user_id=user.get("user_id", "unknown"), resource_type="Patient", details={"search_params": params} ) # Apply privacy filter if requested if mask_sensitive: patients = [privacy_filter.filter_resource(p, "Patient") for p in patients] return [ { "resource_type": "Patient", "id": p.get("id", "unknown"), "data": p } for p in patients ] except Exception as e: logger.error(f"Error searching patients: {str(e)}") raise HTTPException(status_code=500, detail=f"Error searching patients: {str(e)}") @app.delete("/fhir/Patient/{patient_id}", tags=["FHIR"]) async def delete_patient( patient_id: str, user: Dict[str, Any] = Depends(auth_handler.has_role(["admin"])) ): """ Delete a patient resource (admin only). """ try: # Delete patient success = fhir_repository.delete_resource("Patient", patient_id) if not success: raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found") # Log the deletion audit_logger.log_delete( user_id=user.get("user_id", "unknown"), resource_type="Patient", resource_id=patient_id ) return {"detail": f"Patient {patient_id} deleted"} except HTTPException: raise except Exception as e: logger.error(f"Error deleting patient: {str(e)}") raise HTTPException(status_code=500, detail=f"Error deleting patient: {str(e)}") @app.get("/health", tags=["System"]) async def health_check(): """ Health check endpoint. """ return {"status": "OK", "version": app.version} if __name__ == "__main__": # Create FHIR storage directories for resource_type in ["Patient", "Observation"]: os.makedirs(os.path.join(os.getcwd(), "fhir_storage", resource_type), exist_ok=True) # Run server uvicorn.run(app, host="0.0.0.0", port=8000)