You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
4.9 KiB
Python

import os
import unittest
import logging
from unittest.mock import patch, MagicMock
import json
from .ocr_processor import OCRProcessor
from .fhir_mapper import FHIRMapper
# Set up logging for tests
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class TestOCRModule(unittest.TestCase):
"""Test cases for OCR module."""
def setUp(self):
"""Set up test fixtures."""
# Mock Tesseract to avoid dependency on actual installation for tests
self.tesseract_patcher = patch('pytesseract.pytesseract.image_to_string')
self.mock_image_to_string = self.tesseract_patcher.start()
self.mock_image_to_string.return_value = """
Patient: John Doe
DOB: 01/15/1980
Sex: Male
MRN: 12345678
Insurance: HealthCorp
Policy #: HC987654321
"""
# Mock image_to_data to return confidence scores
self.data_patcher = patch('pytesseract.pytesseract.image_to_data')
self.mock_image_to_data = self.data_patcher.start()
self.mock_image_to_data.return_value = {
'conf': [90, 95, 85, 92, 88]
}
# Mock Tesseract version
self.version_patcher = patch('pytesseract.pytesseract.get_tesseract_version')
self.mock_get_version = self.version_patcher.start()
self.mock_get_version.return_value = '4.1.1'
# Create OCR processor with mocks
self.ocr = OCRProcessor()
# Create FHIR mapper
self.mapper = FHIRMapper()
def tearDown(self):
"""Tear down test fixtures."""
self.tesseract_patcher.stop()
self.data_patcher.stop()
self.version_patcher.stop()
@patch('cv2.imread')
@patch('cv2.cvtColor')
@patch('cv2.threshold')
@patch('os.path.exists')
def test_process_image(self, mock_exists, mock_threshold, mock_cvtcolor, mock_imread):
"""Test image processing and OCR extraction."""
# Set up mocks
mock_exists.return_value = True
mock_imread.return_value = MagicMock()
mock_cvtcolor.return_value = MagicMock()
mock_threshold.return_value = (None, MagicMock())
# Process a mock image
result = self.ocr.process_image("test_image.jpg")
# Verify results
self.assertIsNotNone(result)
self.assertIn("raw_text", result)
self.assertIn("structured_data", result)
self.assertIn("confidence", result)
self.assertIn("metadata", result)
# Check structured data extraction
patient_data = result["structured_data"]["patient"]
self.assertEqual(patient_data["name"], "John Doe")
self.assertEqual(patient_data["dob"], "01/15/1980")
self.assertEqual(patient_data["gender"], "Male")
self.assertEqual(patient_data["id"], "12345678")
# Check document type detection
self.assertEqual(result["structured_data"]["document_type"], "insurance_card")
def test_map_to_fhir_patient(self):
"""Test mapping OCR data to FHIR Patient resource."""
# Create sample OCR data
ocr_data = {
"raw_text": "Patient: John Doe\nDOB: 01/15/1980\nSex: Male\nMRN: 12345678",
"structured_data": {
"patient": {
"name": "John Doe",
"dob": "01/15/1980",
"gender": "Male",
"id": "12345678"
},
"document_type": "insurance_card"
}
}
# Map to FHIR Patient
patient = self.mapper.map_to_patient(ocr_data)
# Verify FHIR resource
self.assertIsNotNone(patient)
self.assertEqual(patient.gender, "male")
self.assertEqual(patient.name[0].family, "Doe")
self.assertEqual(patient.name[0].given[0], "John")
self.assertEqual(patient.birthDate, "1980-01-15")
self.assertEqual(patient.identifier[0].value, "12345678")
def test_map_to_fhir_observation(self):
"""Test mapping OCR data to FHIR Observation resource."""
# Create sample OCR data for a lab result
ocr_data = {
"raw_text": "Lab Result\nPatient: John Doe\nTest: Blood Glucose\nResult: 120 mg/dL",
"structured_data": {
"patient": {
"name": "John Doe",
"id": "12345678"
},
"document_type": "lab_result"
}
}
# Map to FHIR Observation
observation = self.mapper.map_to_observation(ocr_data, "patient-123")
# Verify FHIR resource
self.assertIsNotNone(observation)
self.assertEqual(observation.status, "final")
self.assertEqual(observation.subject.reference, "Patient/patient-123")
self.assertEqual(observation.category[0].coding[0].code, "laboratory")
if __name__ == "__main__":
unittest.main()