diff --git a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt index b573150..005a2ef 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt @@ -29,15 +29,18 @@ class DocumentExtractionService( results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN" return ExtractionResult(results, false) } + val ocrText = ocrResult?.texts?.takeIf { it.isNotEmpty() }?.joinToString("\n") val detections = listOf( Detection( detect = { - results["isVehiclePhoto"] = llamaClient.ask( + results["isVehiclePhoto"] = askWithContext( + ocrText, localImageUrl, "IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only." ) if (!isYes(results["isVehiclePhoto"])) return@Detection false - val candidate = llamaClient.ask( + val candidate = askWithContext( + ocrText, localImageUrl, "VEHICLE NUMBER PLATE? Reply only number or NONE." ) @@ -55,11 +58,13 @@ class DocumentExtractionService( ), Detection( detect = { - results["hasAadhar"] = llamaClient.ask( + results["hasAadhar"] = askWithContext( + ocrText, localImageUrl, "CONTAINS AADHAAR? Answer YES or NO only." ) - results["hasUidai"] = llamaClient.ask( + results["hasUidai"] = askWithContext( + ocrText, localImageUrl, "CONTAINS UIDAI? Answer YES or NO only." ) @@ -72,7 +77,7 @@ class DocumentExtractionService( "hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO." ) for ((key, question) in aadharQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } val hasAddress = isYes(results["hasAddress"]) if (hasAddress) { @@ -81,7 +86,7 @@ class DocumentExtractionService( DocumentPrompts.ADDRESS ) for ((key, question) in addressQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } val hasDob = isYes(results["hasDob"]) @@ -94,19 +99,21 @@ class DocumentExtractionService( DocumentPrompts.GENDER ) for ((key, question) in aadharFrontQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } - ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult) + ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult, ocrText) } } ), Detection( detect = { - results["hasDrivingLicence"] = llamaClient.ask( + results["hasDrivingLicence"] = askWithContext( + ocrText, localImageUrl, "CONTAINS DRIVING LICENCE? Answer YES or NO only." ) - results["hasTransportDept"] = llamaClient.ask( + results["hasTransportDept"] = askWithContext( + ocrText, localImageUrl, "CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only." ) @@ -124,13 +131,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in drivingQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } ), Detection( detect = { - results["hasElectionCommission"] = llamaClient.ask( + results["hasElectionCommission"] = askWithContext( + ocrText, localImageUrl, "CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only." ) @@ -148,13 +156,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in voterQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } ), Detection( detect = { - results["hasIncomeTaxDept"] = llamaClient.ask( + results["hasIncomeTaxDept"] = askWithContext( + ocrText, localImageUrl, "CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only." ) @@ -172,13 +181,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in panQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } ), Detection( detect = { - results["hasPassport"] = llamaClient.ask( + results["hasPassport"] = askWithContext( + ocrText, localImageUrl, "CONTAINS PASSPORT? Answer YES or NO only." ) @@ -196,7 +206,7 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in passportQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } ) @@ -224,11 +234,12 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in generalQuestions) { - results[key] = llamaClient.ask(localImageUrl, question) + results[key] = askWithContext(ocrText, localImageUrl, question) } } normalizePinCode(results) + normalizeIdNumber(results) results["docType"] = computeDocType(results, handled) applyGuestUpdates(document, propertyId, results) return ExtractionResult(results, handled) @@ -257,6 +268,15 @@ class DocumentExtractionService( results[pinKey] = chosen ?: "NONE" } + private fun normalizeIdNumber(results: MutableMap) { + val idKey = DocumentPrompts.ID_NUMBER.first + val raw = cleanedValue(results[idKey]) + val digits = normalizeDigits(raw) + if (digits != null && isValidAadhaar(digits)) { + results[idKey] = digits + } + } + private fun computeDocType(results: Map, handled: Boolean): String { if (!handled) return "GENERAL" return when { @@ -286,39 +306,45 @@ class DocumentExtractionService( publicImageUrl: String, document: GuestDocument, results: MutableMap, - ocrResult: PaddleOcrResult? + ocrResult: PaddleOcrResult?, + ocrText: String? ) { val key = DocumentPrompts.ID_NUMBER.first val current = cleanedValue(results[key]) val normalized = normalizeDigits(current) if (normalized != null && isValidAadhaar(normalized)) { - results[key] = formatAadhaar(normalized) + results[key] = normalized return } - val retry = llamaClient.ask( + val retry = askWithContext( + ocrText, localImageUrl, "AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE." ) val retryNormalized = normalizeDigits(cleanedValue(retry)) if (retryNormalized != null && isValidAadhaar(retryNormalized)) { - results[key] = formatAadhaar(retryNormalized) + results[key] = retryNormalized return } if (ocrResult != null) { val ocrCandidate = ocrResult.aadhaar - if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) { - results[key] = ocrCandidate - return + if (ocrCandidate != null) { + val ocrDigits = ocrCandidate.replace(" ", "") + if (isValidAadhaar(ocrDigits)) { + results[key] = ocrDigits + return + } } if (ocrResult.texts.isNotEmpty()) { val ocrText = ocrResult.texts.joinToString("\n") - val ocrAsk = llamaClient.askText( + val ocrAsk = askWithContext( ocrText, + localImageUrl, "AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE." ) val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk)) if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) { - results[key] = formatAadhaar(ocrAskNormalized) + results[key] = ocrAskNormalized return } } @@ -328,6 +354,14 @@ class DocumentExtractionService( results[key] = "NONE" } + private fun askWithContext(ocrText: String?, imageUrl: String, question: String): String { + return if (ocrText != null) { + llamaClient.askWithOcr(imageUrl, ocrText, question) + } else { + llamaClient.ask(imageUrl, question) + } + } + private fun applyGuestUpdates( document: GuestDocument, propertyId: UUID, diff --git a/src/main/kotlin/com/android/trisolarisserver/component/LlamaClient.kt b/src/main/kotlin/com/android/trisolarisserver/component/LlamaClient.kt index 9aca0c6..4cfe5c3 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/LlamaClient.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/LlamaClient.kt @@ -56,6 +56,34 @@ class LlamaClient( return post(payload) } + fun askWithOcr(imageUrl: String, ocrText: String, question: String): String { + val payload = mapOf( + "model" to "qwen", + "temperature" to temperature, + "top_p" to topP, + "min_p" to minP, + "repeat_penalty" to repeatPenalty, + "top_k" to topK, + "messages" to listOf( + mapOf( + "role" to "system", + "content" to systemPrompt + ), + mapOf( + "role" to "user", + "content" to listOf( + mapOf( + "type" to "text", + "text" to "${question}\n\nOCR:\n${ocrText}" + ), + mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl)) + ) + ) + ) + ) + return post(payload) + } + fun askText(content: String, question: String): String { val payload = mapOf( "model" to "qwen", diff --git a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt index 3648652..db65ed2 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/PaddleOcrClient.kt @@ -24,7 +24,9 @@ class PaddleOcrClient( @Value("\${ocr.paddle.minScore:0.9}") private val minScore: Double, @Value("\${ocr.paddle.minAverageScore:0.8}") - private val minAverageScore: Double + private val minAverageScore: Double, + @Value("\${ocr.paddle.minTextLength:4}") + private val minTextLength: Int ) { private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java) @@ -36,7 +38,7 @@ class PaddleOcrClient( return try { val output = callOcr(path) val average = averageScore(output.scores) - val filtered = filterByScore(output.texts, output.scores, minScore) + val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength) val aadhaar = extractAadhaar(filtered) val rejected = average != null && average < minAverageScore PaddleOcrResult(filtered, aadhaar, average, rejected) @@ -68,10 +70,10 @@ class PaddleOcrClient( return OcrPayload(parsedTexts, parsedScores) } - private fun filterByScore(texts: List, scores: List, min: Double): List { + private fun filterByScore(texts: List, scores: List, min: Double, minLen: Int): List { if (scores.size != texts.size || scores.isEmpty()) return texts return texts.mapIndexedNotNull { index, text -> - if (scores[index] >= min) text else null + if (scores[index] >= min && text.trim().length >= minLen) text else null } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 907994a..8d673bb 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -32,3 +32,4 @@ ocr.paddle.enabled=true ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/ ocr.paddle.minScore=0.9 ocr.paddle.minAverageScore=0.8 +ocr.paddle.minTextLength=4