NORM/test_ocr_flow.py

#!/usr/bin/env python
"""
Test script for OCR to FHIR flow.

This script demonstrates the full flow of OCR processing and FHIR resource creation,
using the local implementation without requiring the API to be running.
"""

import os
import sys
import json
import logging
import argparse
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

def main():
    """Run the OCR to FHIR test flow."""
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Test OCR to FHIR flow')
    parser.add_argument('--image', required=True, help='Path to the image file to process')
    parser.add_argument('--output', default='test_results', help='Directory to store results')
    parser.add_argument('--tesseract', help='Path to Tesseract executable')
    args = parser.parse_args()

    # Verify image file exists
    if not os.path.exists(args.image):
        logger.error(f"Image file not found: {args.image}")
        sys.exit(1)

    # Create output directory
    os.makedirs(args.output, exist_ok=True)

    try:
        # Import local modules
        # Add the current directory to the path if running from a different directory
        sys.path.append(os.path.dirname(os.path.abspath(__file__)))

        from ocr_module.ocr_processor import OCRProcessor
        from ocr_module.fhir_mapper import FHIRMapper
        from fhir_module.fhir_repository import FHIRRepository

        # Initialize components
        logger.info("Initializing components...")
        ocr_processor = OCRProcessor(tesseract_cmd=args.tesseract)
        fhir_mapper = FHIRMapper()
        fhir_repository = FHIRRepository(storage_dir=args.output)

        # Process image with OCR
        logger.info(f"Processing image: {args.image}")
        ocr_result = ocr_processor.process_image(args.image)

        # Save OCR results to file
        ocr_output_file = os.path.join(args.output, 'ocr_result.json')
        with open(ocr_output_file, 'w') as f:
            json.dump(ocr_result, f, indent=2)

        logger.info(f"OCR results saved to: {ocr_output_file}")
        logger.info(f"Document type detected: {ocr_result['structured_data']['document_type']}")
        logger.info(f"OCR confidence: {ocr_result['confidence']}%")

        # Map OCR data to FHIR Patient resource
        logger.info("Mapping OCR data to FHIR Patient resource...")
        patient = fhir_mapper.map_to_patient(ocr_result)

        # Create patient resource in FHIR repository
        patient_data = fhir_repository.create_resource(patient)

        logger.info(f"Patient resource created with ID: {patient.id}")

        # Map to observation if applicable
        if ocr_result["structured_data"]["document_type"] in ["lab_result", "prescription"]:
            logger.info("Mapping OCR data to FHIR Observation resource...")
            observation = fhir_mapper.map_to_observation(ocr_result, patient.id)

            if observation:
                observation_data = fhir_repository.create_resource(observation)
                logger.info(f"Observation resource created with ID: {observation.id}")

        # Print patient information
        logger.info("\nExtracted Patient Information:")
        patient_info = ocr_result["structured_data"]["patient"]
        for key, value in patient_info.items():
            if value:
                logger.info(f"  {key.capitalize()}: {value}")

        # Test reading the patient from the repository
        retrieved_patient = fhir_repository.read_resource("Patient", patient.id)

        # Save retrieved patient to file
        patient_output_file = os.path.join(args.output, 'patient_resource.json')
        with open(patient_output_file, 'w') as f:
            json.dump(retrieved_patient, f, indent=2)

        logger.info(f"Patient resource saved to: {patient_output_file}")
        logger.info("Test completed successfully!")

    except Exception as e:
        logger.error(f"Error in OCR to FHIR flow: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    main()