ocr: attach high confidance stuff to all images
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s

This commit is contained in:
androidlover5842
2026-01-31 09:49:37 +05:30
parent b1efb2828f
commit c2d786e4e0
4 changed files with 96 additions and 31 deletions

View File

@@ -29,15 +29,18 @@ class DocumentExtractionService(
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
return ExtractionResult(results, false)
}
val ocrText = ocrResult?.texts?.takeIf { it.isNotEmpty() }?.joinToString("\n")
val detections = listOf(
Detection(
detect = {
results["isVehiclePhoto"] = llamaClient.ask(
results["isVehiclePhoto"] = askWithContext(
ocrText,
localImageUrl,
"IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only."
)
if (!isYes(results["isVehiclePhoto"])) return@Detection false
val candidate = llamaClient.ask(
val candidate = askWithContext(
ocrText,
localImageUrl,
"VEHICLE NUMBER PLATE? Reply only number or NONE."
)
@@ -55,11 +58,13 @@ class DocumentExtractionService(
),
Detection(
detect = {
results["hasAadhar"] = llamaClient.ask(
results["hasAadhar"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS AADHAAR? Answer YES or NO only."
)
results["hasUidai"] = llamaClient.ask(
results["hasUidai"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS UIDAI? Answer YES or NO only."
)
@@ -72,7 +77,7 @@ class DocumentExtractionService(
"hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO."
)
for ((key, question) in aadharQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
val hasAddress = isYes(results["hasAddress"])
if (hasAddress) {
@@ -81,7 +86,7 @@ class DocumentExtractionService(
DocumentPrompts.ADDRESS
)
for ((key, question) in addressQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
val hasDob = isYes(results["hasDob"])
@@ -94,19 +99,21 @@ class DocumentExtractionService(
DocumentPrompts.GENDER
)
for ((key, question) in aadharFrontQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult, ocrText)
}
}
),
Detection(
detect = {
results["hasDrivingLicence"] = llamaClient.ask(
results["hasDrivingLicence"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS DRIVING LICENCE? Answer YES or NO only."
)
results["hasTransportDept"] = llamaClient.ask(
results["hasTransportDept"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only."
)
@@ -124,13 +131,14 @@ class DocumentExtractionService(
DocumentPrompts.NATIONALITY
)
for ((key, question) in drivingQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
),
Detection(
detect = {
results["hasElectionCommission"] = llamaClient.ask(
results["hasElectionCommission"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only."
)
@@ -148,13 +156,14 @@ class DocumentExtractionService(
DocumentPrompts.NATIONALITY
)
for ((key, question) in voterQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
),
Detection(
detect = {
results["hasIncomeTaxDept"] = llamaClient.ask(
results["hasIncomeTaxDept"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only."
)
@@ -172,13 +181,14 @@ class DocumentExtractionService(
DocumentPrompts.NATIONALITY
)
for ((key, question) in panQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
),
Detection(
detect = {
results["hasPassport"] = llamaClient.ask(
results["hasPassport"] = askWithContext(
ocrText,
localImageUrl,
"CONTAINS PASSPORT? Answer YES or NO only."
)
@@ -196,7 +206,7 @@ class DocumentExtractionService(
DocumentPrompts.NATIONALITY
)
for ((key, question) in passportQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
)
@@ -224,11 +234,12 @@ class DocumentExtractionService(
DocumentPrompts.NATIONALITY
)
for ((key, question) in generalQuestions) {
results[key] = llamaClient.ask(localImageUrl, question)
results[key] = askWithContext(ocrText, localImageUrl, question)
}
}
normalizePinCode(results)
normalizeIdNumber(results)
results["docType"] = computeDocType(results, handled)
applyGuestUpdates(document, propertyId, results)
return ExtractionResult(results, handled)
@@ -257,6 +268,15 @@ class DocumentExtractionService(
results[pinKey] = chosen ?: "NONE"
}
private fun normalizeIdNumber(results: MutableMap<String, String>) {
val idKey = DocumentPrompts.ID_NUMBER.first
val raw = cleanedValue(results[idKey])
val digits = normalizeDigits(raw)
if (digits != null && isValidAadhaar(digits)) {
results[idKey] = digits
}
}
private fun computeDocType(results: Map<String, String>, handled: Boolean): String {
if (!handled) return "GENERAL"
return when {
@@ -286,39 +306,45 @@ class DocumentExtractionService(
publicImageUrl: String,
document: GuestDocument,
results: MutableMap<String, String>,
ocrResult: PaddleOcrResult?
ocrResult: PaddleOcrResult?,
ocrText: String?
) {
val key = DocumentPrompts.ID_NUMBER.first
val current = cleanedValue(results[key])
val normalized = normalizeDigits(current)
if (normalized != null && isValidAadhaar(normalized)) {
results[key] = formatAadhaar(normalized)
results[key] = normalized
return
}
val retry = llamaClient.ask(
val retry = askWithContext(
ocrText,
localImageUrl,
"AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE."
)
val retryNormalized = normalizeDigits(cleanedValue(retry))
if (retryNormalized != null && isValidAadhaar(retryNormalized)) {
results[key] = formatAadhaar(retryNormalized)
results[key] = retryNormalized
return
}
if (ocrResult != null) {
val ocrCandidate = ocrResult.aadhaar
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
results[key] = ocrCandidate
return
if (ocrCandidate != null) {
val ocrDigits = ocrCandidate.replace(" ", "")
if (isValidAadhaar(ocrDigits)) {
results[key] = ocrDigits
return
}
}
if (ocrResult.texts.isNotEmpty()) {
val ocrText = ocrResult.texts.joinToString("\n")
val ocrAsk = llamaClient.askText(
val ocrAsk = askWithContext(
ocrText,
localImageUrl,
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
)
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
results[key] = formatAadhaar(ocrAskNormalized)
results[key] = ocrAskNormalized
return
}
}
@@ -328,6 +354,14 @@ class DocumentExtractionService(
results[key] = "NONE"
}
private fun askWithContext(ocrText: String?, imageUrl: String, question: String): String {
return if (ocrText != null) {
llamaClient.askWithOcr(imageUrl, ocrText, question)
} else {
llamaClient.ask(imageUrl, question)
}
}
private fun applyGuestUpdates(
document: GuestDocument,
propertyId: UUID,

View File

@@ -56,6 +56,34 @@ class LlamaClient(
return post(payload)
}
fun askWithOcr(imageUrl: String, ocrText: String, question: String): String {
val payload = mapOf(
"model" to "qwen",
"temperature" to temperature,
"top_p" to topP,
"min_p" to minP,
"repeat_penalty" to repeatPenalty,
"top_k" to topK,
"messages" to listOf(
mapOf(
"role" to "system",
"content" to systemPrompt
),
mapOf(
"role" to "user",
"content" to listOf(
mapOf(
"type" to "text",
"text" to "${question}\n\nOCR:\n${ocrText}"
),
mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl))
)
)
)
)
return post(payload)
}
fun askText(content: String, question: String): String {
val payload = mapOf(
"model" to "qwen",

View File

@@ -24,7 +24,9 @@ class PaddleOcrClient(
@Value("\${ocr.paddle.minScore:0.9}")
private val minScore: Double,
@Value("\${ocr.paddle.minAverageScore:0.8}")
private val minAverageScore: Double
private val minAverageScore: Double,
@Value("\${ocr.paddle.minTextLength:4}")
private val minTextLength: Int
) {
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
@@ -36,7 +38,7 @@ class PaddleOcrClient(
return try {
val output = callOcr(path)
val average = averageScore(output.scores)
val filtered = filterByScore(output.texts, output.scores, minScore)
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
val aadhaar = extractAadhaar(filtered)
val rejected = average != null && average < minAverageScore
PaddleOcrResult(filtered, aadhaar, average, rejected)
@@ -68,10 +70,10 @@ class PaddleOcrClient(
return OcrPayload(parsedTexts, parsedScores)
}
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
if (scores.size != texts.size || scores.isEmpty()) return texts
return texts.mapIndexedNotNull { index, text ->
if (scores[index] >= min) text else null
if (scores[index] >= min && text.trim().length >= minLen) text else null
}
}

View File

@@ -32,3 +32,4 @@ ocr.paddle.enabled=true
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
ocr.paddle.minScore=0.9
ocr.paddle.minAverageScore=0.8
ocr.paddle.minTextLength=4