You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
172 lines
6.0 KiB
Python
172 lines
6.0 KiB
Python
import uuid
|
|
import datetime
|
|
from typing import Dict, Any, Optional
|
|
from fhir.resources.patient import Patient
|
|
from fhir.resources.humanname import HumanName
|
|
from fhir.resources.identifier import Identifier
|
|
from fhir.resources.observation import Observation
|
|
from fhir.resources.codeableconcept import CodeableConcept
|
|
from fhir.resources.coding import Coding
|
|
from fhir.resources.reference import Reference
|
|
|
|
class FHIRMapper:
|
|
"""
|
|
Maps OCR extracted data to FHIR resources.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the FHIR mapper."""
|
|
pass
|
|
|
|
def map_to_patient(self, ocr_data: Dict[str, Any]) -> Patient:
|
|
"""
|
|
Map OCR extracted data to a FHIR Patient resource.
|
|
|
|
Args:
|
|
ocr_data: Dictionary with OCR extracted data
|
|
|
|
Returns:
|
|
FHIR Patient resource
|
|
"""
|
|
# Extract patient data from OCR results
|
|
patient_data = ocr_data.get('structured_data', {}).get('patient', {})
|
|
|
|
# Create a unique ID for the patient
|
|
patient_id = str(uuid.uuid4())
|
|
|
|
# Parse name
|
|
name = None
|
|
if patient_data.get('name'):
|
|
# Simple parsing - in production would need more sophisticated name parsing
|
|
name_parts = patient_data['name'].split()
|
|
if len(name_parts) > 1:
|
|
given = name_parts[:-1]
|
|
family = name_parts[-1]
|
|
else:
|
|
given = name_parts
|
|
family = ""
|
|
|
|
name = HumanName(given=given, family=family, use="official")
|
|
|
|
# Parse DOB
|
|
birth_date = None
|
|
if patient_data.get('dob'):
|
|
# Try to parse date - this is simplified and would need better handling
|
|
try:
|
|
# Attempt to parse common date formats
|
|
for fmt in ('%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%B %d, %Y'):
|
|
try:
|
|
birth_date = datetime.datetime.strptime(
|
|
patient_data['dob'], fmt
|
|
).strftime('%Y-%m-%d')
|
|
break
|
|
except ValueError:
|
|
continue
|
|
except Exception:
|
|
# If date parsing fails, store as is
|
|
birth_date = patient_data['dob']
|
|
|
|
# Create patient resource
|
|
patient = Patient(
|
|
id=patient_id,
|
|
identifier=[
|
|
Identifier(
|
|
system="http://example.org/fhir/ocr-extracted-mrn",
|
|
value=patient_data.get('id', f"OCR-{patient_id}")
|
|
)
|
|
],
|
|
active=True
|
|
)
|
|
|
|
# Add name if available
|
|
if name:
|
|
patient.name = [name]
|
|
|
|
# Add birth date if available
|
|
if birth_date:
|
|
patient.birthDate = birth_date
|
|
|
|
# Add gender if available
|
|
if patient_data.get('gender'):
|
|
# Map to FHIR gender values
|
|
gender_map = {
|
|
'male': 'male',
|
|
'm': 'male',
|
|
'female': 'female',
|
|
'f': 'female',
|
|
'other': 'other',
|
|
'unknown': 'unknown'
|
|
}
|
|
gender_value = patient_data['gender'].lower()
|
|
patient.gender = gender_map.get(gender_value, 'unknown')
|
|
|
|
return patient
|
|
|
|
def map_to_observation(self, ocr_data: Dict[str, Any], patient_id: str) -> Optional[Observation]:
|
|
"""
|
|
Map OCR extracted data to a FHIR Observation resource.
|
|
|
|
Args:
|
|
ocr_data: Dictionary with OCR extracted data
|
|
patient_id: ID of the associated patient
|
|
|
|
Returns:
|
|
FHIR Observation resource or None if no observation data found
|
|
"""
|
|
# This is a simplified example that would need to be expanded based on
|
|
# the specific type of document being processed
|
|
|
|
document_type = ocr_data.get('structured_data', {}).get('document_type')
|
|
|
|
# Only process certain document types for observations
|
|
if document_type not in ['lab_result', 'prescription', 'clinical_note']:
|
|
return None
|
|
|
|
# Create a unique ID for the observation
|
|
observation_id = str(uuid.uuid4())
|
|
|
|
# Create basic observation structure
|
|
observation = Observation(
|
|
id=observation_id,
|
|
status="final",
|
|
subject=Reference(reference=f"Patient/{patient_id}"),
|
|
effectiveDateTime=datetime.datetime.now().isoformat()
|
|
)
|
|
|
|
# Set category based on document type
|
|
if document_type == 'lab_result':
|
|
observation.category = [
|
|
CodeableConcept(
|
|
coding=[
|
|
Coding(
|
|
system="http://terminology.hl7.org/CodeSystem/observation-category",
|
|
code="laboratory",
|
|
display="Laboratory"
|
|
)
|
|
],
|
|
text="Laboratory"
|
|
)
|
|
]
|
|
elif document_type == 'prescription':
|
|
observation.category = [
|
|
CodeableConcept(
|
|
coding=[
|
|
Coding(
|
|
system="http://terminology.hl7.org/CodeSystem/observation-category",
|
|
code="medication",
|
|
display="Medication"
|
|
)
|
|
],
|
|
text="Medication"
|
|
)
|
|
]
|
|
|
|
# In a real implementation, we would extract specific lab values or medication
|
|
# information from the OCR data and populate the observation accordingly
|
|
|
|
# Example: store raw text in note for demonstration purposes
|
|
observation.note = [{
|
|
"text": f"OCR extracted text: {ocr_data.get('raw_text', '')[:200]}..."
|
|
}]
|
|
|
|
return observation |