NORM/FHIR_OCR_POC/ocr_module/fhir_mapper.py

import uuid
import datetime
from typing import Dict, Any, Optional
from fhir.resources.patient import Patient
from fhir.resources.humanname import HumanName
from fhir.resources.identifier import Identifier
from fhir.resources.observation import Observation
from fhir.resources.codeableconcept import CodeableConcept
from fhir.resources.coding import Coding
from fhir.resources.reference import Reference

class FHIRMapper:
    """
    Maps OCR extracted data to FHIR resources.
    """

    def __init__(self):
        """Initialize the FHIR mapper."""
        pass

    def map_to_patient(self, ocr_data: Dict[str, Any]) -> Patient:
        """
        Map OCR extracted data to a FHIR Patient resource.

        Args:
            ocr_data: Dictionary with OCR extracted data

        Returns:
            FHIR Patient resource
        """
        # Extract patient data from OCR results
        patient_data = ocr_data.get('structured_data', {}).get('patient', {})

        # Create a unique ID for the patient
        patient_id = str(uuid.uuid4())

        # Parse name
        name = None
        if patient_data.get('name'):
            # Simple parsing - in production would need more sophisticated name parsing
            name_parts = patient_data['name'].split()
            if len(name_parts) > 1:
                given = name_parts[:-1]
                family = name_parts[-1]
            else:
                given = name_parts
                family = ""

            name = HumanName(given=given, family=family, use="official")

        # Parse DOB
        birth_date = None
        if patient_data.get('dob'):
            # Try to parse date - this is simplified and would need better handling
            try:
                # Attempt to parse common date formats
                for fmt in ('%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%B %d, %Y'):
                    try:
                        birth_date = datetime.datetime.strptime(
                            patient_data['dob'], fmt
                        ).strftime('%Y-%m-%d')
                        break
                    except ValueError:
                        continue
            except Exception:
                # If date parsing fails, store as is
                birth_date = patient_data['dob']

        # Create patient resource
        patient = Patient(
            id=patient_id,
            identifier=[
                Identifier(
                    system="http://example.org/fhir/ocr-extracted-mrn",
                    value=patient_data.get('id', f"OCR-{patient_id}")
                )
            ],
            active=True
        )

        # Add name if available
        if name:
            patient.name = [name]

        # Add birth date if available
        if birth_date:
            patient.birthDate = birth_date

        # Add gender if available
        if patient_data.get('gender'):
            # Map to FHIR gender values
            gender_map = {
                'male': 'male',
                'm': 'male',
                'female': 'female',
                'f': 'female',
                'other': 'other',
                'unknown': 'unknown'
            }
            gender_value = patient_data['gender'].lower()
            patient.gender = gender_map.get(gender_value, 'unknown')

        return patient

    def map_to_observation(self, ocr_data: Dict[str, Any], patient_id: str) -> Optional[Observation]:
        """
        Map OCR extracted data to a FHIR Observation resource.

        Args:
            ocr_data: Dictionary with OCR extracted data
            patient_id: ID of the associated patient

        Returns:
            FHIR Observation resource or None if no observation data found
        """
        # This is a simplified example that would need to be expanded based on
        # the specific type of document being processed

        document_type = ocr_data.get('structured_data', {}).get('document_type')

        # Only process certain document types for observations
        if document_type not in ['lab_result', 'prescription', 'clinical_note']:
            return None

        # Create a unique ID for the observation
        observation_id = str(uuid.uuid4())

        # Create basic observation structure
        observation = Observation(
            id=observation_id,
            status="final",
            subject=Reference(reference=f"Patient/{patient_id}"),
            effectiveDateTime=datetime.datetime.now().isoformat()
        )

        # Set category based on document type
        if document_type == 'lab_result':
            observation.category = [
                CodeableConcept(
                    coding=[
                        Coding(
                            system="http://terminology.hl7.org/CodeSystem/observation-category",
                            code="laboratory",
                            display="Laboratory"
                        )
                    ],
                    text="Laboratory"
                )
            ]
        elif document_type == 'prescription':
            observation.category = [
                CodeableConcept(
                    coding=[
                        Coding(
                            system="http://terminology.hl7.org/CodeSystem/observation-category",
                            code="medication",
                            display="Medication"
                        )
                    ],
                    text="Medication"
                )
            ]

        # In a real implementation, we would extract specific lab values or medication
        # information from the OCR data and populate the observation accordingly

        # Example: store raw text in note for demonstration purposes
        observation.note = [{
            "text": f"OCR extracted text: {ocr_data.get('raw_text', '')[:200]}..."
        }]

        return observation