You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
NORM/test_ocr_flow.py

110 lines
4.2 KiB
Python

#!/usr/bin/env python
"""
Test script for OCR to FHIR flow.
This script demonstrates the full flow of OCR processing and FHIR resource creation,
using the local implementation without requiring the API to be running.
"""
import os
import sys
import json
import logging
import argparse
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
def main():
"""Run the OCR to FHIR test flow."""
# Parse command line arguments
parser = argparse.ArgumentParser(description='Test OCR to FHIR flow')
parser.add_argument('--image', required=True, help='Path to the image file to process')
parser.add_argument('--output', default='test_results', help='Directory to store results')
parser.add_argument('--tesseract', help='Path to Tesseract executable')
args = parser.parse_args()
# Verify image file exists
if not os.path.exists(args.image):
logger.error(f"Image file not found: {args.image}")
sys.exit(1)
# Create output directory
os.makedirs(args.output, exist_ok=True)
try:
# Import local modules
# Add the current directory to the path if running from a different directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from ocr_module.ocr_processor import OCRProcessor
from ocr_module.fhir_mapper import FHIRMapper
from fhir_module.fhir_repository import FHIRRepository
# Initialize components
logger.info("Initializing components...")
ocr_processor = OCRProcessor(tesseract_cmd=args.tesseract)
fhir_mapper = FHIRMapper()
fhir_repository = FHIRRepository(storage_dir=args.output)
# Process image with OCR
logger.info(f"Processing image: {args.image}")
ocr_result = ocr_processor.process_image(args.image)
# Save OCR results to file
ocr_output_file = os.path.join(args.output, 'ocr_result.json')
with open(ocr_output_file, 'w') as f:
json.dump(ocr_result, f, indent=2)
logger.info(f"OCR results saved to: {ocr_output_file}")
logger.info(f"Document type detected: {ocr_result['structured_data']['document_type']}")
logger.info(f"OCR confidence: {ocr_result['confidence']}%")
# Map OCR data to FHIR Patient resource
logger.info("Mapping OCR data to FHIR Patient resource...")
patient = fhir_mapper.map_to_patient(ocr_result)
# Create patient resource in FHIR repository
patient_data = fhir_repository.create_resource(patient)
logger.info(f"Patient resource created with ID: {patient.id}")
# Map to observation if applicable
if ocr_result["structured_data"]["document_type"] in ["lab_result", "prescription"]:
logger.info("Mapping OCR data to FHIR Observation resource...")
observation = fhir_mapper.map_to_observation(ocr_result, patient.id)
if observation:
observation_data = fhir_repository.create_resource(observation)
logger.info(f"Observation resource created with ID: {observation.id}")
# Print patient information
logger.info("\nExtracted Patient Information:")
patient_info = ocr_result["structured_data"]["patient"]
for key, value in patient_info.items():
if value:
logger.info(f" {key.capitalize()}: {value}")
# Test reading the patient from the repository
retrieved_patient = fhir_repository.read_resource("Patient", patient.id)
# Save retrieved patient to file
patient_output_file = os.path.join(args.output, 'patient_resource.json')
with open(patient_output_file, 'w') as f:
json.dump(retrieved_patient, f, indent=2)
logger.info(f"Patient resource saved to: {patient_output_file}")
logger.info("Test completed successfully!")
except Exception as e:
logger.error(f"Error in OCR to FHIR flow: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()