diff --git a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt index f125a02..b573150 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt @@ -22,6 +22,13 @@ class DocumentExtractionService( fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult { val results = linkedMapOf() + val ocrResult = paddleOcrClient.extract(document.storagePath) + if (ocrResult?.rejected == true) { + results["docType"] = "REJECTED" + results["rejectReason"] = "LOW_OCR_SCORE" + results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN" + return ExtractionResult(results, false) + } val detections = listOf( Detection( detect = { @@ -89,7 +96,7 @@ class DocumentExtractionService( for ((key, question) in aadharFrontQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } - ensureAadhaarId(localImageUrl, publicImageUrl, document, results) + ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult) } } ), @@ -278,7 +285,8 @@ class DocumentExtractionService( localImageUrl: String, publicImageUrl: String, document: GuestDocument, - results: MutableMap + results: MutableMap, + ocrResult: PaddleOcrResult? ) { val key = DocumentPrompts.ID_NUMBER.first val current = cleanedValue(results[key]) @@ -296,7 +304,6 @@ class DocumentExtractionService( results[key] = formatAadhaar(retryNormalized) return } - val ocrResult = paddleOcrClient.extract(document.storagePath) if (ocrResult != null) { val ocrCandidate = ocrResult.aadhaar if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) { diff --git a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt index c96deb6..3648652 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt @@ -22,7 +22,9 @@ class PaddleOcrClient( @Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}") private val baseUrl: String, @Value("\${ocr.paddle.minScore:0.9}") - private val minScore: Double + private val minScore: Double, + @Value("\${ocr.paddle.minAverageScore:0.8}") + private val minAverageScore: Double ) { private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java) @@ -33,9 +35,11 @@ class PaddleOcrClient( return try { val output = callOcr(path) + val average = averageScore(output.scores) val filtered = filterByScore(output.texts, output.scores, minScore) val aadhaar = extractAadhaar(filtered) - PaddleOcrResult(filtered, aadhaar) + val rejected = average != null && average < minAverageScore + PaddleOcrResult(filtered, aadhaar, average, rejected) } catch (ex: Exception) { logger.warn("PaddleOCR failed: {}", ex.message) null @@ -71,6 +75,11 @@ class PaddleOcrClient( } } + private fun averageScore(scores: List): Double? { + if (scores.isEmpty()) return null + return scores.sum() / scores.size + } + private fun extractAadhaar(texts: List): String? { val joined = texts.joinToString(" ") val candidates = mutableListOf() @@ -88,7 +97,9 @@ class PaddleOcrClient( data class PaddleOcrResult( val texts: List, - val aadhaar: String? + val aadhaar: String?, + val averageScore: Double?, + val rejected: Boolean ) private data class OcrPayload( diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index ea62716..907994a 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -29,4 +29,6 @@ ai.llama.minP=0.2 ai.llama.repeatPenalty=1.0 ai.llama.topK=40 ocr.paddle.enabled=true +ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/ ocr.paddle.minScore=0.9 +ocr.paddle.minAverageScore=0.8