ocr: attach high confidance stuff to all images
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 32s
This commit is contained in:
@@ -29,15 +29,18 @@ class DocumentExtractionService(
|
|||||||
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
|
results["ocrAverage"] = ocrResult.averageScore?.toString() ?: "UNKNOWN"
|
||||||
return ExtractionResult(results, false)
|
return ExtractionResult(results, false)
|
||||||
}
|
}
|
||||||
|
val ocrText = ocrResult?.texts?.takeIf { it.isNotEmpty() }?.joinToString("\n")
|
||||||
val detections = listOf(
|
val detections = listOf(
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["isVehiclePhoto"] = llamaClient.ask(
|
results["isVehiclePhoto"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only."
|
"IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
if (!isYes(results["isVehiclePhoto"])) return@Detection false
|
if (!isYes(results["isVehiclePhoto"])) return@Detection false
|
||||||
val candidate = llamaClient.ask(
|
val candidate = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"VEHICLE NUMBER PLATE? Reply only number or NONE."
|
"VEHICLE NUMBER PLATE? Reply only number or NONE."
|
||||||
)
|
)
|
||||||
@@ -55,11 +58,13 @@ class DocumentExtractionService(
|
|||||||
),
|
),
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["hasAadhar"] = llamaClient.ask(
|
results["hasAadhar"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS AADHAAR? Answer YES or NO only."
|
"CONTAINS AADHAAR? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
results["hasUidai"] = llamaClient.ask(
|
results["hasUidai"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS UIDAI? Answer YES or NO only."
|
"CONTAINS UIDAI? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
@@ -72,7 +77,7 @@ class DocumentExtractionService(
|
|||||||
"hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO."
|
"hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO."
|
||||||
)
|
)
|
||||||
for ((key, question) in aadharQuestions) {
|
for ((key, question) in aadharQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
val hasAddress = isYes(results["hasAddress"])
|
val hasAddress = isYes(results["hasAddress"])
|
||||||
if (hasAddress) {
|
if (hasAddress) {
|
||||||
@@ -81,7 +86,7 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.ADDRESS
|
DocumentPrompts.ADDRESS
|
||||||
)
|
)
|
||||||
for ((key, question) in addressQuestions) {
|
for ((key, question) in addressQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val hasDob = isYes(results["hasDob"])
|
val hasDob = isYes(results["hasDob"])
|
||||||
@@ -94,19 +99,21 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.GENDER
|
DocumentPrompts.GENDER
|
||||||
)
|
)
|
||||||
for ((key, question) in aadharFrontQuestions) {
|
for ((key, question) in aadharFrontQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult)
|
ensureAadhaarId(localImageUrl, publicImageUrl, document, results, ocrResult, ocrText)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["hasDrivingLicence"] = llamaClient.ask(
|
results["hasDrivingLicence"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS DRIVING LICENCE? Answer YES or NO only."
|
"CONTAINS DRIVING LICENCE? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
results["hasTransportDept"] = llamaClient.ask(
|
results["hasTransportDept"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only."
|
"CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
@@ -124,13 +131,14 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.NATIONALITY
|
DocumentPrompts.NATIONALITY
|
||||||
)
|
)
|
||||||
for ((key, question) in drivingQuestions) {
|
for ((key, question) in drivingQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["hasElectionCommission"] = llamaClient.ask(
|
results["hasElectionCommission"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only."
|
"CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
@@ -148,13 +156,14 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.NATIONALITY
|
DocumentPrompts.NATIONALITY
|
||||||
)
|
)
|
||||||
for ((key, question) in voterQuestions) {
|
for ((key, question) in voterQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["hasIncomeTaxDept"] = llamaClient.ask(
|
results["hasIncomeTaxDept"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only."
|
"CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
@@ -172,13 +181,14 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.NATIONALITY
|
DocumentPrompts.NATIONALITY
|
||||||
)
|
)
|
||||||
for ((key, question) in panQuestions) {
|
for ((key, question) in panQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
Detection(
|
Detection(
|
||||||
detect = {
|
detect = {
|
||||||
results["hasPassport"] = llamaClient.ask(
|
results["hasPassport"] = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"CONTAINS PASSPORT? Answer YES or NO only."
|
"CONTAINS PASSPORT? Answer YES or NO only."
|
||||||
)
|
)
|
||||||
@@ -196,7 +206,7 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.NATIONALITY
|
DocumentPrompts.NATIONALITY
|
||||||
)
|
)
|
||||||
for ((key, question) in passportQuestions) {
|
for ((key, question) in passportQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -224,11 +234,12 @@ class DocumentExtractionService(
|
|||||||
DocumentPrompts.NATIONALITY
|
DocumentPrompts.NATIONALITY
|
||||||
)
|
)
|
||||||
for ((key, question) in generalQuestions) {
|
for ((key, question) in generalQuestions) {
|
||||||
results[key] = llamaClient.ask(localImageUrl, question)
|
results[key] = askWithContext(ocrText, localImageUrl, question)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
normalizePinCode(results)
|
normalizePinCode(results)
|
||||||
|
normalizeIdNumber(results)
|
||||||
results["docType"] = computeDocType(results, handled)
|
results["docType"] = computeDocType(results, handled)
|
||||||
applyGuestUpdates(document, propertyId, results)
|
applyGuestUpdates(document, propertyId, results)
|
||||||
return ExtractionResult(results, handled)
|
return ExtractionResult(results, handled)
|
||||||
@@ -257,6 +268,15 @@ class DocumentExtractionService(
|
|||||||
results[pinKey] = chosen ?: "NONE"
|
results[pinKey] = chosen ?: "NONE"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun normalizeIdNumber(results: MutableMap<String, String>) {
|
||||||
|
val idKey = DocumentPrompts.ID_NUMBER.first
|
||||||
|
val raw = cleanedValue(results[idKey])
|
||||||
|
val digits = normalizeDigits(raw)
|
||||||
|
if (digits != null && isValidAadhaar(digits)) {
|
||||||
|
results[idKey] = digits
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun computeDocType(results: Map<String, String>, handled: Boolean): String {
|
private fun computeDocType(results: Map<String, String>, handled: Boolean): String {
|
||||||
if (!handled) return "GENERAL"
|
if (!handled) return "GENERAL"
|
||||||
return when {
|
return when {
|
||||||
@@ -286,39 +306,45 @@ class DocumentExtractionService(
|
|||||||
publicImageUrl: String,
|
publicImageUrl: String,
|
||||||
document: GuestDocument,
|
document: GuestDocument,
|
||||||
results: MutableMap<String, String>,
|
results: MutableMap<String, String>,
|
||||||
ocrResult: PaddleOcrResult?
|
ocrResult: PaddleOcrResult?,
|
||||||
|
ocrText: String?
|
||||||
) {
|
) {
|
||||||
val key = DocumentPrompts.ID_NUMBER.first
|
val key = DocumentPrompts.ID_NUMBER.first
|
||||||
val current = cleanedValue(results[key])
|
val current = cleanedValue(results[key])
|
||||||
val normalized = normalizeDigits(current)
|
val normalized = normalizeDigits(current)
|
||||||
if (normalized != null && isValidAadhaar(normalized)) {
|
if (normalized != null && isValidAadhaar(normalized)) {
|
||||||
results[key] = formatAadhaar(normalized)
|
results[key] = normalized
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
val retry = llamaClient.ask(
|
val retry = askWithContext(
|
||||||
|
ocrText,
|
||||||
localImageUrl,
|
localImageUrl,
|
||||||
"AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE."
|
"AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE."
|
||||||
)
|
)
|
||||||
val retryNormalized = normalizeDigits(cleanedValue(retry))
|
val retryNormalized = normalizeDigits(cleanedValue(retry))
|
||||||
if (retryNormalized != null && isValidAadhaar(retryNormalized)) {
|
if (retryNormalized != null && isValidAadhaar(retryNormalized)) {
|
||||||
results[key] = formatAadhaar(retryNormalized)
|
results[key] = retryNormalized
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (ocrResult != null) {
|
if (ocrResult != null) {
|
||||||
val ocrCandidate = ocrResult.aadhaar
|
val ocrCandidate = ocrResult.aadhaar
|
||||||
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
|
if (ocrCandidate != null) {
|
||||||
results[key] = ocrCandidate
|
val ocrDigits = ocrCandidate.replace(" ", "")
|
||||||
return
|
if (isValidAadhaar(ocrDigits)) {
|
||||||
|
results[key] = ocrDigits
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (ocrResult.texts.isNotEmpty()) {
|
if (ocrResult.texts.isNotEmpty()) {
|
||||||
val ocrText = ocrResult.texts.joinToString("\n")
|
val ocrText = ocrResult.texts.joinToString("\n")
|
||||||
val ocrAsk = llamaClient.askText(
|
val ocrAsk = askWithContext(
|
||||||
ocrText,
|
ocrText,
|
||||||
|
localImageUrl,
|
||||||
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
|
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
|
||||||
)
|
)
|
||||||
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
|
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
|
||||||
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
|
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
|
||||||
results[key] = formatAadhaar(ocrAskNormalized)
|
results[key] = ocrAskNormalized
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -328,6 +354,14 @@ class DocumentExtractionService(
|
|||||||
results[key] = "NONE"
|
results[key] = "NONE"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun askWithContext(ocrText: String?, imageUrl: String, question: String): String {
|
||||||
|
return if (ocrText != null) {
|
||||||
|
llamaClient.askWithOcr(imageUrl, ocrText, question)
|
||||||
|
} else {
|
||||||
|
llamaClient.ask(imageUrl, question)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun applyGuestUpdates(
|
private fun applyGuestUpdates(
|
||||||
document: GuestDocument,
|
document: GuestDocument,
|
||||||
propertyId: UUID,
|
propertyId: UUID,
|
||||||
|
|||||||
@@ -56,6 +56,34 @@ class LlamaClient(
|
|||||||
return post(payload)
|
return post(payload)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun askWithOcr(imageUrl: String, ocrText: String, question: String): String {
|
||||||
|
val payload = mapOf(
|
||||||
|
"model" to "qwen",
|
||||||
|
"temperature" to temperature,
|
||||||
|
"top_p" to topP,
|
||||||
|
"min_p" to minP,
|
||||||
|
"repeat_penalty" to repeatPenalty,
|
||||||
|
"top_k" to topK,
|
||||||
|
"messages" to listOf(
|
||||||
|
mapOf(
|
||||||
|
"role" to "system",
|
||||||
|
"content" to systemPrompt
|
||||||
|
),
|
||||||
|
mapOf(
|
||||||
|
"role" to "user",
|
||||||
|
"content" to listOf(
|
||||||
|
mapOf(
|
||||||
|
"type" to "text",
|
||||||
|
"text" to "${question}\n\nOCR:\n${ocrText}"
|
||||||
|
),
|
||||||
|
mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return post(payload)
|
||||||
|
}
|
||||||
|
|
||||||
fun askText(content: String, question: String): String {
|
fun askText(content: String, question: String): String {
|
||||||
val payload = mapOf(
|
val payload = mapOf(
|
||||||
"model" to "qwen",
|
"model" to "qwen",
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ class PaddleOcrClient(
|
|||||||
@Value("\${ocr.paddle.minScore:0.9}")
|
@Value("\${ocr.paddle.minScore:0.9}")
|
||||||
private val minScore: Double,
|
private val minScore: Double,
|
||||||
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
||||||
private val minAverageScore: Double
|
private val minAverageScore: Double,
|
||||||
|
@Value("\${ocr.paddle.minTextLength:4}")
|
||||||
|
private val minTextLength: Int
|
||||||
) {
|
) {
|
||||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||||
|
|
||||||
@@ -36,7 +38,7 @@ class PaddleOcrClient(
|
|||||||
return try {
|
return try {
|
||||||
val output = callOcr(path)
|
val output = callOcr(path)
|
||||||
val average = averageScore(output.scores)
|
val average = averageScore(output.scores)
|
||||||
val filtered = filterByScore(output.texts, output.scores, minScore)
|
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
||||||
val aadhaar = extractAadhaar(filtered)
|
val aadhaar = extractAadhaar(filtered)
|
||||||
val rejected = average != null && average < minAverageScore
|
val rejected = average != null && average < minAverageScore
|
||||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||||
@@ -68,10 +70,10 @@ class PaddleOcrClient(
|
|||||||
return OcrPayload(parsedTexts, parsedScores)
|
return OcrPayload(parsedTexts, parsedScores)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
|
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
|
||||||
if (scores.size != texts.size || scores.isEmpty()) return texts
|
if (scores.size != texts.size || scores.isEmpty()) return texts
|
||||||
return texts.mapIndexedNotNull { index, text ->
|
return texts.mapIndexedNotNull { index, text ->
|
||||||
if (scores[index] >= min) text else null
|
if (scores[index] >= min && text.trim().length >= minLen) text else null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -32,3 +32,4 @@ ocr.paddle.enabled=true
|
|||||||
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
|
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
|
||||||
ocr.paddle.minScore=0.9
|
ocr.paddle.minScore=0.9
|
||||||
ocr.paddle.minAverageScore=0.8
|
ocr.paddle.minAverageScore=0.8
|
||||||
|
ocr.paddle.minTextLength=4
|
||||||
|
|||||||
Reference in New Issue
Block a user