extractor :ocr score below 80% reject doc
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s

This commit is contained in:
androidlover5842
2026-01-31 09:15:46 +05:30
parent ced92c34f8
commit ab5a8c0154
3 changed files with 26 additions and 6 deletions

View File

@@ -22,6 +22,13 @@ class DocumentExtractionService(
fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult {
val results = linkedMapOf<String, String>()
val ocrResult = paddleOcrClient.extract(document.storagePath)
if (ocrResult?.rejected == true) {
results["docType"] = "REJECTED"
results["rejectReason"] = "LOW_OCR_SCORE"
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
return ExtractionResult(results, false)
}
val detections = listOf(
Detection(
detect = {
@@ -89,7 +96,7 @@ class DocumentExtractionService(
for ((key, question) in aadharFrontQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
}
ensureAadhaarId(localImageUrl, publicImageUrl, document, results)
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
}
}
),
@@ -278,7 +285,8 @@ class DocumentExtractionService(
localImageUrl: String,
publicImageUrl: String,
document: GuestDocument,
results: MutableMap<String, String>
results: MutableMap<String, String>,
ocrResult: PaddleOcrResult?
) {
val key = DocumentPrompts.ID_NUMBER.first
val current = cleanedValue(results[key])
@@ -296,7 +304,6 @@ class DocumentExtractionService(
results[key] = formatAadhaar(retryNormalized)
return
}
val ocrResult = paddleOcrClient.extract(document.storagePath)
if (ocrResult != null) {
val ocrCandidate = ocrResult.aadhaar
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {

View File

@@ -22,7 +22,9 @@ class PaddleOcrClient(
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
private val baseUrl: String,
@Value("\${ocr.paddle.minScore:0.9}")
private val minScore: Double
private val minScore: Double,
@Value("\${ocr.paddle.minAverageScore:0.8}")
private val minAverageScore: Double
) {
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
@@ -33,9 +35,11 @@ class PaddleOcrClient(
return try {
val output = callOcr(path)
val average = averageScore(output.scores)
val filtered = filterByScore(output.texts, output.scores, minScore)
val aadhaar = extractAadhaar(filtered)
PaddleOcrResult(filtered, aadhaar)
val rejected = average != null && average < minAverageScore
PaddleOcrResult(filtered, aadhaar, average, rejected)
} catch (ex: Exception) {
logger.warn("PaddleOCR failed: {}", ex.message)
null
@@ -71,6 +75,11 @@ class PaddleOcrClient(
}
}
private fun averageScore(scores: List<Double>): Double? {
if (scores.isEmpty()) return null
return scores.sum() / scores.size
}
private fun extractAadhaar(texts: List<String>): String? {
val joined = texts.joinToString(" ")
val candidates = mutableListOf<String>()
@@ -88,7 +97,9 @@ class PaddleOcrClient(
data class PaddleOcrResult(
val texts: List<String>,
val aadhaar: String?
val aadhaar: String?,
val averageScore: Double?,
val rejected: Boolean
)
private data class OcrPayload(

View File

@@ -29,4 +29,6 @@ ai.llama.minP=0.2
ai.llama.repeatPenalty=1.0
ai.llama.topK=40
ocr.paddle.enabled=true
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
ocr.paddle.minScore=0.9
ocr.paddle.minAverageScore=0.8