extractor :ocr score below 80% reject doc
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s

This commit is contained in:
androidlover5842
2026-01-31 09:15:46 +05:30
parent ced92c34f8
commit ab5a8c0154
3 changed files with 26 additions and 6 deletions

View File

@@ -22,6 +22,13 @@ class DocumentExtractionService(
fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult { fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult {
val results = linkedMapOf<String, String>() val results = linkedMapOf<String, String>()
val ocrResult = paddleOcrClient.extract(document.storagePath)
if (ocrResult?.rejected == true) {
results["docType"] = "REJECTED"
results["rejectReason"] = "LOW_OCR_SCORE"
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
return ExtractionResult(results, false)
}
val detections = listOf( val detections = listOf(
Detection( Detection(
detect = { detect = {
@@ -89,7 +96,7 @@ class DocumentExtractionService(
for ((key, question) in aadharFrontQuestions) { for ((key, question) in aadharFrontQuestions) {
results[key] = llamaClient.ask(localImageUrl, question) results[key] = llamaClient.ask(localImageUrl, question)
} }
ensureAadhaarId(localImageUrl, publicImageUrl, document, results) ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
} }
} }
), ),
@@ -278,7 +285,8 @@ class DocumentExtractionService(
localImageUrl: String, localImageUrl: String,
publicImageUrl: String, publicImageUrl: String,
document: GuestDocument, document: GuestDocument,
results: MutableMap<String, String> results: MutableMap<String, String>,
ocrResult: PaddleOcrResult?
) { ) {
val key = DocumentPrompts.ID_NUMBER.first val key = DocumentPrompts.ID_NUMBER.first
val current = cleanedValue(results[key]) val current = cleanedValue(results[key])
@@ -296,7 +304,6 @@ class DocumentExtractionService(
results[key] = formatAadhaar(retryNormalized) results[key] = formatAadhaar(retryNormalized)
return return
} }
val ocrResult = paddleOcrClient.extract(document.storagePath)
if (ocrResult != null) { if (ocrResult != null) {
val ocrCandidate = ocrResult.aadhaar val ocrCandidate = ocrResult.aadhaar
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) { if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {

View File

@@ -22,7 +22,9 @@ class PaddleOcrClient(
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}") @Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
private val baseUrl: String, private val baseUrl: String,
@Value("\${ocr.paddle.minScore:0.9}") @Value("\${ocr.paddle.minScore:0.9}")
private val minScore: Double private val minScore: Double,
@Value("\${ocr.paddle.minAverageScore:0.8}")
private val minAverageScore: Double
) { ) {
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java) private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
@@ -33,9 +35,11 @@ class PaddleOcrClient(
return try { return try {
val output = callOcr(path) val output = callOcr(path)
val average = averageScore(output.scores)
val filtered = filterByScore(output.texts, output.scores, minScore) val filtered = filterByScore(output.texts, output.scores, minScore)
val aadhaar = extractAadhaar(filtered) val aadhaar = extractAadhaar(filtered)
PaddleOcrResult(filtered, aadhaar) val rejected = average != null && average < minAverageScore
PaddleOcrResult(filtered, aadhaar, average, rejected)
} catch (ex: Exception) { } catch (ex: Exception) {
logger.warn("PaddleOCR failed: {}", ex.message) logger.warn("PaddleOCR failed: {}", ex.message)
null null
@@ -71,6 +75,11 @@ class PaddleOcrClient(
} }
} }
private fun averageScore(scores: List<Double>): Double? {
if (scores.isEmpty()) return null
return scores.sum() / scores.size
}
private fun extractAadhaar(texts: List<String>): String? { private fun extractAadhaar(texts: List<String>): String? {
val joined = texts.joinToString(" ") val joined = texts.joinToString(" ")
val candidates = mutableListOf<String>() val candidates = mutableListOf<String>()
@@ -88,7 +97,9 @@ class PaddleOcrClient(
data class PaddleOcrResult( data class PaddleOcrResult(
val texts: List<String>, val texts: List<String>,
val aadhaar: String? val aadhaar: String?,
val averageScore: Double?,
val rejected: Boolean
) )
private data class OcrPayload( private data class OcrPayload(

View File

@@ -29,4 +29,6 @@ ai.llama.minP=0.2
ai.llama.repeatPenalty=1.0 ai.llama.repeatPenalty=1.0
ai.llama.topK=40 ai.llama.topK=40
ocr.paddle.enabled=true ocr.paddle.enabled=true
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
ocr.paddle.minScore=0.9 ocr.paddle.minScore=0.9
ocr.paddle.minAverageScore=0.8