import os import unittest import logging from unittest.mock import patch, MagicMock import json from .ocr_processor import OCRProcessor from .fhir_mapper import FHIRMapper # Set up logging for tests logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class TestOCRModule(unittest.TestCase): """Test cases for OCR module.""" def setUp(self): """Set up test fixtures.""" # Mock Tesseract to avoid dependency on actual installation for tests self.tesseract_patcher = patch('pytesseract.pytesseract.image_to_string') self.mock_image_to_string = self.tesseract_patcher.start() self.mock_image_to_string.return_value = """ Patient: John Doe DOB: 01/15/1980 Sex: Male MRN: 12345678 Insurance: HealthCorp Policy #: HC987654321 """ # Mock image_to_data to return confidence scores self.data_patcher = patch('pytesseract.pytesseract.image_to_data') self.mock_image_to_data = self.data_patcher.start() self.mock_image_to_data.return_value = { 'conf': [90, 95, 85, 92, 88] } # Mock Tesseract version self.version_patcher = patch('pytesseract.pytesseract.get_tesseract_version') self.mock_get_version = self.version_patcher.start() self.mock_get_version.return_value = '4.1.1' # Create OCR processor with mocks self.ocr = OCRProcessor() # Create FHIR mapper self.mapper = FHIRMapper() def tearDown(self): """Tear down test fixtures.""" self.tesseract_patcher.stop() self.data_patcher.stop() self.version_patcher.stop() @patch('cv2.imread') @patch('cv2.cvtColor') @patch('cv2.threshold') @patch('os.path.exists') def test_process_image(self, mock_exists, mock_threshold, mock_cvtcolor, mock_imread): """Test image processing and OCR extraction.""" # Set up mocks mock_exists.return_value = True mock_imread.return_value = MagicMock() mock_cvtcolor.return_value = MagicMock() mock_threshold.return_value = (None, MagicMock()) # Process a mock image result = self.ocr.process_image("test_image.jpg") # Verify results self.assertIsNotNone(result) self.assertIn("raw_text", result) self.assertIn("structured_data", result) self.assertIn("confidence", result) self.assertIn("metadata", result) # Check structured data extraction patient_data = result["structured_data"]["patient"] self.assertEqual(patient_data["name"], "John Doe") self.assertEqual(patient_data["dob"], "01/15/1980") self.assertEqual(patient_data["gender"], "Male") self.assertEqual(patient_data["id"], "12345678") # Check document type detection self.assertEqual(result["structured_data"]["document_type"], "insurance_card") def test_map_to_fhir_patient(self): """Test mapping OCR data to FHIR Patient resource.""" # Create sample OCR data ocr_data = { "raw_text": "Patient: John Doe\nDOB: 01/15/1980\nSex: Male\nMRN: 12345678", "structured_data": { "patient": { "name": "John Doe", "dob": "01/15/1980", "gender": "Male", "id": "12345678" }, "document_type": "insurance_card" } } # Map to FHIR Patient patient = self.mapper.map_to_patient(ocr_data) # Verify FHIR resource self.assertIsNotNone(patient) self.assertEqual(patient.gender, "male") self.assertEqual(patient.name[0].family, "Doe") self.assertEqual(patient.name[0].given[0], "John") self.assertEqual(patient.birthDate, "1980-01-15") self.assertEqual(patient.identifier[0].value, "12345678") def test_map_to_fhir_observation(self): """Test mapping OCR data to FHIR Observation resource.""" # Create sample OCR data for a lab result ocr_data = { "raw_text": "Lab Result\nPatient: John Doe\nTest: Blood Glucose\nResult: 120 mg/dL", "structured_data": { "patient": { "name": "John Doe", "id": "12345678" }, "document_type": "lab_result" } } # Map to FHIR Observation observation = self.mapper.map_to_observation(ocr_data, "patient-123") # Verify FHIR resource self.assertIsNotNone(observation) self.assertEqual(observation.status, "final") self.assertEqual(observation.subject.reference, "Patient/patient-123") self.assertEqual(observation.category[0].coding[0].code, "laboratory") if __name__ == "__main__": unittest.main()