ocr: attach high confidance stuff to all images
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
This commit is contained in:
@@ -29,15 +29,18 @@ class DocumentExtractionService(
|
||||
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
|
||||
return ExtractionResult(results, false)
|
||||
}
|
||||
val ocrText = ocrResult?.texts?.takeIf { it.isNotEmpty() }?.joinToString("\n")
|
||||
val detections = listOf(
|
||||
Detection(
|
||||
detect = {
|
||||
results["isVehiclePhoto"] = llamaClient.ask(
|
||||
results["isVehiclePhoto"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only."
|
||||
)
|
||||
if (!isYes(results["isVehiclePhoto"])) return@Detection false
|
||||
val candidate = llamaClient.ask(
|
||||
val candidate = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"VEHICLE NUMBER PLATE? Reply only number or NONE."
|
||||
)
|
||||
@@ -55,11 +58,13 @@ class DocumentExtractionService(
|
||||
),
|
||||
Detection(
|
||||
detect = {
|
||||
results["hasAadhar"] = llamaClient.ask(
|
||||
results["hasAadhar"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS AADHAAR? Answer YES or NO only."
|
||||
)
|
||||
results["hasUidai"] = llamaClient.ask(
|
||||
results["hasUidai"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS UIDAI? Answer YES or NO only."
|
||||
)
|
||||
@@ -72,7 +77,7 @@ class DocumentExtractionService(
|
||||
"hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO."
|
||||
)
|
||||
for ((key, question) in aadharQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
val hasAddress = isYes(results["hasAddress"])
|
||||
if (hasAddress) {
|
||||
@@ -81,7 +86,7 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.ADDRESS
|
||||
)
|
||||
for ((key, question) in addressQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
val hasDob = isYes(results["hasDob"])
|
||||
@@ -94,19 +99,21 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.GENDER
|
||||
)
|
||||
for ((key, question) in aadharFrontQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
|
||||
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult, ocrText)
|
||||
}
|
||||
}
|
||||
),
|
||||
Detection(
|
||||
detect = {
|
||||
results["hasDrivingLicence"] = llamaClient.ask(
|
||||
results["hasDrivingLicence"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS DRIVING LICENCE? Answer YES or NO only."
|
||||
)
|
||||
results["hasTransportDept"] = llamaClient.ask(
|
||||
results["hasTransportDept"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only."
|
||||
)
|
||||
@@ -124,13 +131,14 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.NATIONALITY
|
||||
)
|
||||
for ((key, question) in drivingQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
),
|
||||
Detection(
|
||||
detect = {
|
||||
results["hasElectionCommission"] = llamaClient.ask(
|
||||
results["hasElectionCommission"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only."
|
||||
)
|
||||
@@ -148,13 +156,14 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.NATIONALITY
|
||||
)
|
||||
for ((key, question) in voterQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
),
|
||||
Detection(
|
||||
detect = {
|
||||
results["hasIncomeTaxDept"] = llamaClient.ask(
|
||||
results["hasIncomeTaxDept"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only."
|
||||
)
|
||||
@@ -172,13 +181,14 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.NATIONALITY
|
||||
)
|
||||
for ((key, question) in panQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
),
|
||||
Detection(
|
||||
detect = {
|
||||
results["hasPassport"] = llamaClient.ask(
|
||||
results["hasPassport"] = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"CONTAINS PASSPORT? Answer YES or NO only."
|
||||
)
|
||||
@@ -196,7 +206,7 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.NATIONALITY
|
||||
)
|
||||
for ((key, question) in passportQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
)
|
||||
@@ -224,11 +234,12 @@ class DocumentExtractionService(
|
||||
DocumentPrompts.NATIONALITY
|
||||
)
|
||||
for ((key, question) in generalQuestions) {
|
||||
results[key] = llamaClient.ask(localImageUrl, question)
|
||||
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||
}
|
||||
}
|
||||
|
||||
normalizePinCode(results)
|
||||
normalizeIdNumber(results)
|
||||
results["docType"] = computeDocType(results, handled)
|
||||
applyGuestUpdates(document, propertyId, results)
|
||||
return ExtractionResult(results, handled)
|
||||
@@ -257,6 +268,15 @@ class DocumentExtractionService(
|
||||
results[pinKey] = chosen ?: "NONE"
|
||||
}
|
||||
|
||||
private fun normalizeIdNumber(results: MutableMap<String, String>) {
|
||||
val idKey = DocumentPrompts.ID_NUMBER.first
|
||||
val raw = cleanedValue(results[idKey])
|
||||
val digits = normalizeDigits(raw)
|
||||
if (digits != null && isValidAadhaar(digits)) {
|
||||
results[idKey] = digits
|
||||
}
|
||||
}
|
||||
|
||||
private fun computeDocType(results: Map<String, String>, handled: Boolean): String {
|
||||
if (!handled) return "GENERAL"
|
||||
return when {
|
||||
@@ -286,39 +306,45 @@ class DocumentExtractionService(
|
||||
publicImageUrl: String,
|
||||
document: GuestDocument,
|
||||
results: MutableMap<String, String>,
|
||||
ocrResult: PaddleOcrResult?
|
||||
ocrResult: PaddleOcrResult?,
|
||||
ocrText: String?
|
||||
) {
|
||||
val key = DocumentPrompts.ID_NUMBER.first
|
||||
val current = cleanedValue(results[key])
|
||||
val normalized = normalizeDigits(current)
|
||||
if (normalized != null && isValidAadhaar(normalized)) {
|
||||
results[key] = formatAadhaar(normalized)
|
||||
results[key] = normalized
|
||||
return
|
||||
}
|
||||
val retry = llamaClient.ask(
|
||||
val retry = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE."
|
||||
)
|
||||
val retryNormalized = normalizeDigits(cleanedValue(retry))
|
||||
if (retryNormalized != null && isValidAadhaar(retryNormalized)) {
|
||||
results[key] = formatAadhaar(retryNormalized)
|
||||
results[key] = retryNormalized
|
||||
return
|
||||
}
|
||||
if (ocrResult != null) {
|
||||
val ocrCandidate = ocrResult.aadhaar
|
||||
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
|
||||
results[key] = ocrCandidate
|
||||
return
|
||||
if (ocrCandidate != null) {
|
||||
val ocrDigits = ocrCandidate.replace(" ", "")
|
||||
if (isValidAadhaar(ocrDigits)) {
|
||||
results[key] = ocrDigits
|
||||
return
|
||||
}
|
||||
}
|
||||
if (ocrResult.texts.isNotEmpty()) {
|
||||
val ocrText = ocrResult.texts.joinToString("\n")
|
||||
val ocrAsk = llamaClient.askText(
|
||||
val ocrAsk = askWithContext(
|
||||
ocrText,
|
||||
localImageUrl,
|
||||
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
|
||||
)
|
||||
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
|
||||
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
|
||||
results[key] = formatAadhaar(ocrAskNormalized)
|
||||
results[key] = ocrAskNormalized
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -328,6 +354,14 @@ class DocumentExtractionService(
|
||||
results[key] = "NONE"
|
||||
}
|
||||
|
||||
private fun askWithContext(ocrText: String?, imageUrl: String, question: String): String {
|
||||
return if (ocrText != null) {
|
||||
llamaClient.askWithOcr(imageUrl, ocrText, question)
|
||||
} else {
|
||||
llamaClient.ask(imageUrl, question)
|
||||
}
|
||||
}
|
||||
|
||||
private fun applyGuestUpdates(
|
||||
document: GuestDocument,
|
||||
propertyId: UUID,
|
||||
|
||||
@@ -56,6 +56,34 @@ class LlamaClient(
|
||||
return post(payload)
|
||||
}
|
||||
|
||||
fun askWithOcr(imageUrl: String, ocrText: String, question: String): String {
|
||||
val payload = mapOf(
|
||||
"model" to "qwen",
|
||||
"temperature" to temperature,
|
||||
"top_p" to topP,
|
||||
"min_p" to minP,
|
||||
"repeat_penalty" to repeatPenalty,
|
||||
"top_k" to topK,
|
||||
"messages" to listOf(
|
||||
mapOf(
|
||||
"role" to "system",
|
||||
"content" to systemPrompt
|
||||
),
|
||||
mapOf(
|
||||
"role" to "user",
|
||||
"content" to listOf(
|
||||
mapOf(
|
||||
"type" to "text",
|
||||
"text" to "${question}\n\nOCR:\n${ocrText}"
|
||||
),
|
||||
mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return post(payload)
|
||||
}
|
||||
|
||||
fun askText(content: String, question: String): String {
|
||||
val payload = mapOf(
|
||||
"model" to "qwen",
|
||||
|
||||
@@ -24,7 +24,9 @@ class PaddleOcrClient(
|
||||
@Value("\${ocr.paddle.minScore:0.9}")
|
||||
private val minScore: Double,
|
||||
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
||||
private val minAverageScore: Double
|
||||
private val minAverageScore: Double,
|
||||
@Value("\${ocr.paddle.minTextLength:4}")
|
||||
private val minTextLength: Int
|
||||
) {
|
||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||
|
||||
@@ -36,7 +38,7 @@ class PaddleOcrClient(
|
||||
return try {
|
||||
val output = callOcr(path)
|
||||
val average = averageScore(output.scores)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
||||
val aadhaar = extractAadhaar(filtered)
|
||||
val rejected = average != null && average < minAverageScore
|
||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||
@@ -68,10 +70,10 @@ class PaddleOcrClient(
|
||||
return OcrPayload(parsedTexts, parsedScores)
|
||||
}
|
||||
|
||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
|
||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
|
||||
if (scores.size != texts.size || scores.isEmpty()) return texts
|
||||
return texts.mapIndexedNotNull { index, text ->
|
||||
if (scores[index] >= min) text else null
|
||||
if (scores[index] >= min && text.trim().length >= minLen) text else null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,3 +32,4 @@ ocr.paddle.enabled=true
|
||||
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
|
||||
ocr.paddle.minScore=0.9
|
||||
ocr.paddle.minAverageScore=0.8
|
||||
ocr.paddle.minTextLength=4
|
||||
|
||||
Reference in New Issue
Block a user