You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
11 KiB
Python

import os
import logging
import tempfile
from typing import Dict, Any, List, Optional, Union
from pathlib import Path
import json
import uvicorn
from fastapi import FastAPI, Depends, File, UploadFile, HTTPException, Form, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from fastapi.responses import JSONResponse
from pydantic import BaseModel
# Import modules
from ocr_module.ocr_processor import OCRProcessor
from ocr_module.fhir_mapper import FHIRMapper
from fhir_module.fhir_repository import FHIRRepository
from security_module.auth import auth_handler
from compliance_module.audit_logger import audit_logger, AuditMiddleware
from compliance_module.privacy_filter import privacy_filter
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# Create FastAPI app
app = FastAPI(
title="FHIR OCR API",
description="API for processing healthcare documents with OCR and storing extracted data via FHIR",
version="0.1.0"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Adjust in production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add audit middleware
app.add_middleware(AuditMiddleware)
# Initialize components
ocr_processor = OCRProcessor()
fhir_mapper = FHIRMapper()
fhir_repository = FHIRRepository(storage_dir=os.path.join(os.getcwd(), "fhir_storage"))
# Security scheme for Swagger UI
security_scheme = HTTPBearer()
# Pydantic models for API requests/responses
class TokenRequest(BaseModel):
username: str
password: str
class TokenResponse(BaseModel):
access_token: str
token_type: str = "bearer"
class OCRRequest(BaseModel):
image_url: Optional[str] = None
process_as: Optional[str] = "auto" # 'insurance_card', 'lab_result', etc.
class OCRResponse(BaseModel):
raw_text: str
structured_data: Dict[str, Any]
confidence: float
patient_id: Optional[str] = None
observation_id: Optional[str] = None
class ResourceResponse(BaseModel):
resource_type: str
id: str
data: Dict[str, Any]
# API routes
@app.post("/auth/token", response_model=TokenResponse, tags=["Authentication"])
async def login_for_access_token(form_data: TokenRequest):
"""
Get an access token for API authentication.
"""
# This is a simplified authentication for the POC
# In production, this would validate credentials against a user database
# For POC, we just check if the username is not empty
if not form_data.username:
raise HTTPException(status_code=400, detail="Invalid username")
# Determine roles based on username (for demonstration)
roles = ["user"]
if form_data.username == "admin":
roles.append("admin")
# Create access token
access_token = auth_handler.create_access_token(form_data.username, roles)
# Log the authentication
audit_logger.log_event(
event_type="authentication",
user_id=form_data.username,
action="login",
details={"roles": roles}
)
return {"access_token": access_token, "token_type": "bearer"}
@app.post("/ocr/process", response_model=OCRResponse, tags=["OCR"])
async def process_document(
file: UploadFile = File(...),
process_as: str = Form("auto"),
user: Dict[str, Any] = Depends(auth_handler.get_current_user)
):
"""
Process a document with OCR and extract healthcare data.
Optionally specify how to process the document (insurance_card, lab_result, etc.)
"""
try:
# Save uploaded file to temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name
# Process image with OCR
ocr_result = ocr_processor.process_image(temp_file_path)
# If process_as is specified, override the detected document type
if process_as != "auto":
ocr_result["structured_data"]["document_type"] = process_as
# Map OCR data to FHIR resources
patient = fhir_mapper.map_to_patient(ocr_result)
# Create patient resource in FHIR repository
patient_data = fhir_repository.create_resource(patient)
# Log the creation
audit_logger.log_create(
user_id=user.get("user_id", "unknown"),
resource_type="Patient",
resource_id=patient.id,
details={"document_type": ocr_result["structured_data"]["document_type"]}
)
# Map to observation if applicable
observation_id = None
if ocr_result["structured_data"]["document_type"] in ["lab_result", "prescription"]:
observation = fhir_mapper.map_to_observation(ocr_result, patient.id)
if observation:
observation_data = fhir_repository.create_resource(observation)
observation_id = observation.id
# Log the creation
audit_logger.log_create(
user_id=user.get("user_id", "unknown"),
resource_type="Observation",
resource_id=observation.id,
details={"document_type": ocr_result["structured_data"]["document_type"]}
)
# Clean up temporary file
os.unlink(temp_file_path)
return {
"raw_text": ocr_result["raw_text"],
"structured_data": ocr_result["structured_data"],
"confidence": ocr_result["confidence"],
"patient_id": patient.id,
"observation_id": observation_id
}
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
@app.get("/fhir/Patient/{patient_id}", response_model=ResourceResponse, tags=["FHIR"])
async def get_patient(
patient_id: str,
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
):
"""
Get a patient resource by ID.
"""
try:
# Get patient resource
patient_data = fhir_repository.read_resource("Patient", patient_id)
# Log the access
audit_logger.log_access(
user_id=user.get("user_id", "unknown"),
resource_type="Patient",
resource_id=patient_id
)
# Apply privacy filter if requested
if mask_sensitive:
patient_data = privacy_filter.filter_resource(patient_data, "Patient")
return {
"resource_type": "Patient",
"id": patient_id,
"data": patient_data
}
except FileNotFoundError:
raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
except Exception as e:
logger.error(f"Error retrieving patient: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving patient: {str(e)}")
@app.get("/fhir/Observation/{observation_id}", response_model=ResourceResponse, tags=["FHIR"])
async def get_observation(
observation_id: str,
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
):
"""
Get an observation resource by ID.
"""
try:
# Get observation resource
observation_data = fhir_repository.read_resource("Observation", observation_id)
# Log the access
audit_logger.log_access(
user_id=user.get("user_id", "unknown"),
resource_type="Observation",
resource_id=observation_id
)
# Apply privacy filter if requested
if mask_sensitive:
observation_data = privacy_filter.filter_resource(observation_data, "Observation")
return {
"resource_type": "Observation",
"id": observation_id,
"data": observation_data
}
except FileNotFoundError:
raise HTTPException(status_code=404, detail=f"Observation {observation_id} not found")
except Exception as e:
logger.error(f"Error retrieving observation: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error retrieving observation: {str(e)}")
@app.get("/fhir/Patient", response_model=List[ResourceResponse], tags=["FHIR"])
async def search_patients(
name: Optional[str] = None,
gender: Optional[str] = None,
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
):
"""
Search for patients.
"""
try:
# Build search parameters
params = {}
if name:
params["name.family"] = name
if gender:
params["gender"] = gender
# Search for patients
patients = fhir_repository.search_resources("Patient", params)
# Log the search
audit_logger.log_access(
user_id=user.get("user_id", "unknown"),
resource_type="Patient",
details={"search_params": params}
)
# Apply privacy filter if requested
if mask_sensitive:
patients = [privacy_filter.filter_resource(p, "Patient") for p in patients]
return [
{
"resource_type": "Patient",
"id": p.get("id", "unknown"),
"data": p
}
for p in patients
]
except Exception as e:
logger.error(f"Error searching patients: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error searching patients: {str(e)}")
@app.delete("/fhir/Patient/{patient_id}", tags=["FHIR"])
async def delete_patient(
patient_id: str,
user: Dict[str, Any] = Depends(auth_handler.has_role(["admin"]))
):
"""
Delete a patient resource (admin only).
"""
try:
# Delete patient
success = fhir_repository.delete_resource("Patient", patient_id)
if not success:
raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
# Log the deletion
audit_logger.log_delete(
user_id=user.get("user_id", "unknown"),
resource_type="Patient",
resource_id=patient_id
)
return {"detail": f"Patient {patient_id} deleted"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error deleting patient: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error deleting patient: {str(e)}")
@app.get("/health", tags=["System"])
async def health_check():
"""
Health check endpoint.
"""
return {"status": "OK", "version": app.version}
if __name__ == "__main__":
# Create FHIR storage directories
for resource_type in ["Patient", "Observation"]:
os.makedirs(os.path.join(os.getcwd(), "fhir_storage", resource_type), exist_ok=True)
# Run server
uvicorn.run(app, host="0.0.0.0", port=8000)