extractor :ocr score below 80% reject doc
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s
This commit is contained in:
@@ -22,6 +22,13 @@ class DocumentExtractionService(
|
||||
|
||||
fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult {
|
||||
val results = linkedMapOf<String, String>()
|
||||
val ocrResult = paddleOcrClient.extract(document.storagePath)
|
||||
if (ocrResult?.rejected == true) {
|
||||
results["docType"] = "REJECTED"
|
||||
results["rejectReason"] = "LOW_OCR_SCORE"
|
||||
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
|
||||
return ExtractionResult(results, false)
|
||||
}
|
||||
val detections = listOf(
|
||||
Detection(
|
||||
detect = {
|
||||
@@ -89,7 +96,7 @@ class DocumentExtractionService(
|
||||
for ((key, question) in aadharFrontQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
}
|
||||
ensureAadhaarId(localImageUrl, publicImageUrl, document, results)
|
||||
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
|
||||
}
|
||||
}
|
||||
),
|
||||
@@ -278,7 +285,8 @@ class DocumentExtractionService(
|
||||
localImageUrl: String,
|
||||
publicImageUrl: String,
|
||||
document: GuestDocument,
|
||||
results: MutableMap<String, String>
|
||||
results: MutableMap<String, String>,
|
||||
ocrResult: PaddleOcrResult?
|
||||
) {
|
||||
val key = DocumentPrompts.ID_NUMBER.first
|
||||
val current = cleanedValue(results[key])
|
||||
@@ -296,7 +304,6 @@ class DocumentExtractionService(
|
||||
results[key] = formatAadhaar(retryNormalized)
|
||||
return
|
||||
}
|
||||
val ocrResult = paddleOcrClient.extract(document.storagePath)
|
||||
if (ocrResult != null) {
|
||||
val ocrCandidate = ocrResult.aadhaar
|
||||
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
|
||||
|
||||
@@ -22,7 +22,9 @@ class PaddleOcrClient(
|
||||
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
|
||||
private val baseUrl: String,
|
||||
@Value("\${ocr.paddle.minScore:0.9}")
|
||||
private val minScore: Double
|
||||
private val minScore: Double,
|
||||
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
||||
private val minAverageScore: Double
|
||||
) {
|
||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||
|
||||
@@ -33,9 +35,11 @@ class PaddleOcrClient(
|
||||
|
||||
return try {
|
||||
val output = callOcr(path)
|
||||
val average = averageScore(output.scores)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore)
|
||||
val aadhaar = extractAadhaar(filtered)
|
||||
PaddleOcrResult(filtered, aadhaar)
|
||||
val rejected = average != null && average < minAverageScore
|
||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||
} catch (ex: Exception) {
|
||||
logger.warn("PaddleOCR failed: {}", ex.message)
|
||||
null
|
||||
@@ -71,6 +75,11 @@ class PaddleOcrClient(
|
||||
}
|
||||
}
|
||||
|
||||
private fun averageScore(scores: List<Double>): Double? {
|
||||
if (scores.isEmpty()) return null
|
||||
return scores.sum() / scores.size
|
||||
}
|
||||
|
||||
private fun extractAadhaar(texts: List<String>): String? {
|
||||
val joined = texts.joinToString(" ")
|
||||
val candidates = mutableListOf<String>()
|
||||
@@ -88,7 +97,9 @@ class PaddleOcrClient(
|
||||
|
||||
data class PaddleOcrResult(
|
||||
val texts: List<String>,
|
||||
val aadhaar: String?
|
||||
val aadhaar: String?,
|
||||
val averageScore: Double?,
|
||||
val rejected: Boolean
|
||||
)
|
||||
|
||||
private data class OcrPayload(
|
||||
|
||||
@@ -29,4 +29,6 @@ ai.llama.minP=0.2
|
||||
ai.llama.repeatPenalty=1.0
|
||||
ai.llama.topK=40
|
||||
ocr.paddle.enabled=true
|
||||
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
|
||||
ocr.paddle.minScore=0.9
|
||||
ocr.paddle.minAverageScore=0.8
|
||||
|
||||
Reference in New Issue
Block a user