You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
6.0 KiB
Python

import uuid
import datetime
from typing import Dict, Any, Optional
from fhir.resources.patient import Patient
from fhir.resources.humanname import HumanName
from fhir.resources.identifier import Identifier
from fhir.resources.observation import Observation
from fhir.resources.codeableconcept import CodeableConcept
from fhir.resources.coding import Coding
from fhir.resources.reference import Reference
class FHIRMapper:
"""
Maps OCR extracted data to FHIR resources.
"""
def __init__(self):
"""Initialize the FHIR mapper."""
pass
def map_to_patient(self, ocr_data: Dict[str, Any]) -> Patient:
"""
Map OCR extracted data to a FHIR Patient resource.
Args:
ocr_data: Dictionary with OCR extracted data
Returns:
FHIR Patient resource
"""
# Extract patient data from OCR results
patient_data = ocr_data.get('structured_data', {}).get('patient', {})
# Create a unique ID for the patient
patient_id = str(uuid.uuid4())
# Parse name
name = None
if patient_data.get('name'):
# Simple parsing - in production would need more sophisticated name parsing
name_parts = patient_data['name'].split()
if len(name_parts) > 1:
given = name_parts[:-1]
family = name_parts[-1]
else:
given = name_parts
family = ""
name = HumanName(given=given, family=family, use="official")
# Parse DOB
birth_date = None
if patient_data.get('dob'):
# Try to parse date - this is simplified and would need better handling
try:
# Attempt to parse common date formats
for fmt in ('%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%B %d, %Y'):
try:
birth_date = datetime.datetime.strptime(
patient_data['dob'], fmt
).strftime('%Y-%m-%d')
break
except ValueError:
continue
except Exception:
# If date parsing fails, store as is
birth_date = patient_data['dob']
# Create patient resource
patient = Patient(
id=patient_id,
identifier=[
Identifier(
system="http://example.org/fhir/ocr-extracted-mrn",
value=patient_data.get('id', f"OCR-{patient_id}")
)
],
active=True
)
# Add name if available
if name:
patient.name = [name]
# Add birth date if available
if birth_date:
patient.birthDate = birth_date
# Add gender if available
if patient_data.get('gender'):
# Map to FHIR gender values
gender_map = {
'male': 'male',
'm': 'male',
'female': 'female',
'f': 'female',
'other': 'other',
'unknown': 'unknown'
}
gender_value = patient_data['gender'].lower()
patient.gender = gender_map.get(gender_value, 'unknown')
return patient
def map_to_observation(self, ocr_data: Dict[str, Any], patient_id: str) -> Optional[Observation]:
"""
Map OCR extracted data to a FHIR Observation resource.
Args:
ocr_data: Dictionary with OCR extracted data
patient_id: ID of the associated patient
Returns:
FHIR Observation resource or None if no observation data found
"""
# This is a simplified example that would need to be expanded based on
# the specific type of document being processed
document_type = ocr_data.get('structured_data', {}).get('document_type')
# Only process certain document types for observations
if document_type not in ['lab_result', 'prescription', 'clinical_note']:
return None
# Create a unique ID for the observation
observation_id = str(uuid.uuid4())
# Create basic observation structure
observation = Observation(
id=observation_id,
status="final",
subject=Reference(reference=f"Patient/{patient_id}"),
effectiveDateTime=datetime.datetime.now().isoformat()
)
# Set category based on document type
if document_type == 'lab_result':
observation.category = [
CodeableConcept(
coding=[
Coding(
system="http://terminology.hl7.org/CodeSystem/observation-category",
code="laboratory",
display="Laboratory"
)
],
text="Laboratory"
)
]
elif document_type == 'prescription':
observation.category = [
CodeableConcept(
coding=[
Coding(
system="http://terminology.hl7.org/CodeSystem/observation-category",
code="medication",
display="Medication"
)
],
text="Medication"
)
]
# In a real implementation, we would extract specific lab values or medication
# information from the OCR data and populate the observation accordingly
# Example: store raw text in note for demonstration purposes
observation.note = [{
"text": f"OCR extracted text: {ocr_data.get('raw_text', '')[:200]}..."
}]
return observation