You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
11 KiB
Python
343 lines
11 KiB
Python
import os
|
|
import logging
|
|
import tempfile
|
|
from typing import Dict, Any, List, Optional, Union
|
|
from pathlib import Path
|
|
import json
|
|
import uvicorn
|
|
|
|
from fastapi import FastAPI, Depends, File, UploadFile, HTTPException, Form, Query
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
from fastapi.responses import JSONResponse
|
|
from pydantic import BaseModel
|
|
|
|
# Import modules
|
|
from ocr_module.ocr_processor import OCRProcessor
|
|
from ocr_module.fhir_mapper import FHIRMapper
|
|
from fhir_module.fhir_repository import FHIRRepository
|
|
from security_module.auth import auth_handler
|
|
from compliance_module.audit_logger import audit_logger, AuditMiddleware
|
|
from compliance_module.privacy_filter import privacy_filter
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Create FastAPI app
|
|
app = FastAPI(
|
|
title="FHIR OCR API",
|
|
description="API for processing healthcare documents with OCR and storing extracted data via FHIR",
|
|
version="0.1.0"
|
|
)
|
|
|
|
# Add CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # Adjust in production
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Add audit middleware
|
|
app.add_middleware(AuditMiddleware)
|
|
|
|
# Initialize components
|
|
ocr_processor = OCRProcessor()
|
|
fhir_mapper = FHIRMapper()
|
|
fhir_repository = FHIRRepository(storage_dir=os.path.join(os.getcwd(), "fhir_storage"))
|
|
|
|
# Security scheme for Swagger UI
|
|
security_scheme = HTTPBearer()
|
|
|
|
# Pydantic models for API requests/responses
|
|
class TokenRequest(BaseModel):
|
|
username: str
|
|
password: str
|
|
|
|
class TokenResponse(BaseModel):
|
|
access_token: str
|
|
token_type: str = "bearer"
|
|
|
|
class OCRRequest(BaseModel):
|
|
image_url: Optional[str] = None
|
|
process_as: Optional[str] = "auto" # 'insurance_card', 'lab_result', etc.
|
|
|
|
class OCRResponse(BaseModel):
|
|
raw_text: str
|
|
structured_data: Dict[str, Any]
|
|
confidence: float
|
|
patient_id: Optional[str] = None
|
|
observation_id: Optional[str] = None
|
|
|
|
class ResourceResponse(BaseModel):
|
|
resource_type: str
|
|
id: str
|
|
data: Dict[str, Any]
|
|
|
|
# API routes
|
|
@app.post("/auth/token", response_model=TokenResponse, tags=["Authentication"])
|
|
async def login_for_access_token(form_data: TokenRequest):
|
|
"""
|
|
Get an access token for API authentication.
|
|
"""
|
|
# This is a simplified authentication for the POC
|
|
# In production, this would validate credentials against a user database
|
|
|
|
# For POC, we just check if the username is not empty
|
|
if not form_data.username:
|
|
raise HTTPException(status_code=400, detail="Invalid username")
|
|
|
|
# Determine roles based on username (for demonstration)
|
|
roles = ["user"]
|
|
if form_data.username == "admin":
|
|
roles.append("admin")
|
|
|
|
# Create access token
|
|
access_token = auth_handler.create_access_token(form_data.username, roles)
|
|
|
|
# Log the authentication
|
|
audit_logger.log_event(
|
|
event_type="authentication",
|
|
user_id=form_data.username,
|
|
action="login",
|
|
details={"roles": roles}
|
|
)
|
|
|
|
return {"access_token": access_token, "token_type": "bearer"}
|
|
|
|
@app.post("/ocr/process", response_model=OCRResponse, tags=["OCR"])
|
|
async def process_document(
|
|
file: UploadFile = File(...),
|
|
process_as: str = Form("auto"),
|
|
user: Dict[str, Any] = Depends(auth_handler.get_current_user)
|
|
):
|
|
"""
|
|
Process a document with OCR and extract healthcare data.
|
|
|
|
Optionally specify how to process the document (insurance_card, lab_result, etc.)
|
|
"""
|
|
try:
|
|
# Save uploaded file to temporary location
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
|
|
temp_file.write(await file.read())
|
|
temp_file_path = temp_file.name
|
|
|
|
# Process image with OCR
|
|
ocr_result = ocr_processor.process_image(temp_file_path)
|
|
|
|
# If process_as is specified, override the detected document type
|
|
if process_as != "auto":
|
|
ocr_result["structured_data"]["document_type"] = process_as
|
|
|
|
# Map OCR data to FHIR resources
|
|
patient = fhir_mapper.map_to_patient(ocr_result)
|
|
|
|
# Create patient resource in FHIR repository
|
|
patient_data = fhir_repository.create_resource(patient)
|
|
|
|
# Log the creation
|
|
audit_logger.log_create(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Patient",
|
|
resource_id=patient.id,
|
|
details={"document_type": ocr_result["structured_data"]["document_type"]}
|
|
)
|
|
|
|
# Map to observation if applicable
|
|
observation_id = None
|
|
if ocr_result["structured_data"]["document_type"] in ["lab_result", "prescription"]:
|
|
observation = fhir_mapper.map_to_observation(ocr_result, patient.id)
|
|
if observation:
|
|
observation_data = fhir_repository.create_resource(observation)
|
|
observation_id = observation.id
|
|
|
|
# Log the creation
|
|
audit_logger.log_create(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Observation",
|
|
resource_id=observation.id,
|
|
details={"document_type": ocr_result["structured_data"]["document_type"]}
|
|
)
|
|
|
|
# Clean up temporary file
|
|
os.unlink(temp_file_path)
|
|
|
|
return {
|
|
"raw_text": ocr_result["raw_text"],
|
|
"structured_data": ocr_result["structured_data"],
|
|
"confidence": ocr_result["confidence"],
|
|
"patient_id": patient.id,
|
|
"observation_id": observation_id
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing document: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
|
|
|
|
@app.get("/fhir/Patient/{patient_id}", response_model=ResourceResponse, tags=["FHIR"])
|
|
async def get_patient(
|
|
patient_id: str,
|
|
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
|
|
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
|
|
):
|
|
"""
|
|
Get a patient resource by ID.
|
|
"""
|
|
try:
|
|
# Get patient resource
|
|
patient_data = fhir_repository.read_resource("Patient", patient_id)
|
|
|
|
# Log the access
|
|
audit_logger.log_access(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Patient",
|
|
resource_id=patient_id
|
|
)
|
|
|
|
# Apply privacy filter if requested
|
|
if mask_sensitive:
|
|
patient_data = privacy_filter.filter_resource(patient_data, "Patient")
|
|
|
|
return {
|
|
"resource_type": "Patient",
|
|
"id": patient_id,
|
|
"data": patient_data
|
|
}
|
|
|
|
except FileNotFoundError:
|
|
raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving patient: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error retrieving patient: {str(e)}")
|
|
|
|
@app.get("/fhir/Observation/{observation_id}", response_model=ResourceResponse, tags=["FHIR"])
|
|
async def get_observation(
|
|
observation_id: str,
|
|
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
|
|
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
|
|
):
|
|
"""
|
|
Get an observation resource by ID.
|
|
"""
|
|
try:
|
|
# Get observation resource
|
|
observation_data = fhir_repository.read_resource("Observation", observation_id)
|
|
|
|
# Log the access
|
|
audit_logger.log_access(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Observation",
|
|
resource_id=observation_id
|
|
)
|
|
|
|
# Apply privacy filter if requested
|
|
if mask_sensitive:
|
|
observation_data = privacy_filter.filter_resource(observation_data, "Observation")
|
|
|
|
return {
|
|
"resource_type": "Observation",
|
|
"id": observation_id,
|
|
"data": observation_data
|
|
}
|
|
|
|
except FileNotFoundError:
|
|
raise HTTPException(status_code=404, detail=f"Observation {observation_id} not found")
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving observation: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error retrieving observation: {str(e)}")
|
|
|
|
@app.get("/fhir/Patient", response_model=List[ResourceResponse], tags=["FHIR"])
|
|
async def search_patients(
|
|
name: Optional[str] = None,
|
|
gender: Optional[str] = None,
|
|
mask_sensitive: bool = Query(False, description="Whether to mask sensitive information"),
|
|
user: Dict[str, Any] = Depends(auth_handler.has_role(["user", "admin"]))
|
|
):
|
|
"""
|
|
Search for patients.
|
|
"""
|
|
try:
|
|
# Build search parameters
|
|
params = {}
|
|
if name:
|
|
params["name.family"] = name
|
|
if gender:
|
|
params["gender"] = gender
|
|
|
|
# Search for patients
|
|
patients = fhir_repository.search_resources("Patient", params)
|
|
|
|
# Log the search
|
|
audit_logger.log_access(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Patient",
|
|
details={"search_params": params}
|
|
)
|
|
|
|
# Apply privacy filter if requested
|
|
if mask_sensitive:
|
|
patients = [privacy_filter.filter_resource(p, "Patient") for p in patients]
|
|
|
|
return [
|
|
{
|
|
"resource_type": "Patient",
|
|
"id": p.get("id", "unknown"),
|
|
"data": p
|
|
}
|
|
for p in patients
|
|
]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching patients: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error searching patients: {str(e)}")
|
|
|
|
@app.delete("/fhir/Patient/{patient_id}", tags=["FHIR"])
|
|
async def delete_patient(
|
|
patient_id: str,
|
|
user: Dict[str, Any] = Depends(auth_handler.has_role(["admin"]))
|
|
):
|
|
"""
|
|
Delete a patient resource (admin only).
|
|
"""
|
|
try:
|
|
# Delete patient
|
|
success = fhir_repository.delete_resource("Patient", patient_id)
|
|
|
|
if not success:
|
|
raise HTTPException(status_code=404, detail=f"Patient {patient_id} not found")
|
|
|
|
# Log the deletion
|
|
audit_logger.log_delete(
|
|
user_id=user.get("user_id", "unknown"),
|
|
resource_type="Patient",
|
|
resource_id=patient_id
|
|
)
|
|
|
|
return {"detail": f"Patient {patient_id} deleted"}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting patient: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error deleting patient: {str(e)}")
|
|
|
|
@app.get("/health", tags=["System"])
|
|
async def health_check():
|
|
"""
|
|
Health check endpoint.
|
|
"""
|
|
return {"status": "OK", "version": app.version}
|
|
|
|
if __name__ == "__main__":
|
|
# Create FHIR storage directories
|
|
for resource_type in ["Patient", "Observation"]:
|
|
os.makedirs(os.path.join(os.getcwd(), "fhir_storage", resource_type), exist_ok=True)
|
|
|
|
# Run server
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |