From bd2bca9f33d4b1beaa53a2002fcc8eb92c13821f Mon Sep 17 00:00:00 2001 From: androidlover5842 Date: Sat, 31 Jan 2026 05:03:28 +0530 Subject: [PATCH] Add OpenAI fallback for Aadhaar extraction --- .../component/DocumentExtractionService.kt | 50 ++++++++------ .../component/OpenAIVisionClient.kt | 65 +++++++++++++++++++ .../controller/GuestDocuments.kt | 4 +- src/main/resources/application.properties | 3 + 4 files changed, 100 insertions(+), 22 deletions(-) create mode 100644 src/main/kotlin/com/android/trisolarisserver/component/OpenAIVisionClient.kt diff --git a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt index 80f5db3..0cd5764 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt @@ -12,22 +12,23 @@ import java.util.UUID @org.springframework.stereotype.Component class DocumentExtractionService( private val llamaClient: LlamaClient, + private val openAIVisionClient: OpenAIVisionClient, private val guestRepo: GuestRepo, private val guestVehicleRepo: GuestVehicleRepo, private val propertyRepo: PropertyRepo ) { - fun extractAndApply(imageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult { + fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult { val results = linkedMapOf() val detections = listOf( Detection( detect = { results["isVehiclePhoto"] = llamaClient.ask( - imageUrl, + localImageUrl, "IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only." ) if (!isYes(results["isVehiclePhoto"])) return@Detection false val candidate = llamaClient.ask( - imageUrl, + localImageUrl, "VEHICLE NUMBER PLATE? Reply only number or NONE." ) val cleaned = cleanedValue(candidate) @@ -45,11 +46,11 @@ class DocumentExtractionService( Detection( detect = { results["hasAadhar"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS AADHAAR? Answer YES or NO only." ) results["hasUidai"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS UIDAI? Answer YES or NO only." ) isYes(results["hasAadhar"]) || isYes(results["hasUidai"]) @@ -61,7 +62,7 @@ class DocumentExtractionService( "hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO." ) for ((key, question) in aadharQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } val hasAddress = isYes(results["hasAddress"]) if (hasAddress) { @@ -70,7 +71,7 @@ class DocumentExtractionService( DocumentPrompts.ADDRESS ) for ((key, question) in addressQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } val hasDob = isYes(results["hasDob"]) @@ -83,20 +84,20 @@ class DocumentExtractionService( DocumentPrompts.GENDER ) for ((key, question) in aadharFrontQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } - ensureAadhaarId(imageUrl, results) + ensureAadhaarId(localImageUrl, publicImageUrl, results) } } ), Detection( detect = { results["hasDrivingLicence"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS DRIVING LICENCE? Answer YES or NO only." ) results["hasTransportDept"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only." ) isYes(results["hasDrivingLicence"]) || isYes(results["hasTransportDept"]) @@ -113,14 +114,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in drivingQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasElectionCommission"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only." ) isYes(results["hasElectionCommission"]) @@ -137,14 +138,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in voterQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasIncomeTaxDept"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only." ) isYes(results["hasIncomeTaxDept"]) @@ -161,14 +162,14 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in panQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasPassport"] = llamaClient.ask( - imageUrl, + localImageUrl, "CONTAINS PASSPORT? Answer YES or NO only." ) isYes(results["hasPassport"]) @@ -185,7 +186,7 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in passportQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } ) @@ -213,7 +214,7 @@ class DocumentExtractionService( DocumentPrompts.NATIONALITY ) for ((key, question) in generalQuestions) { - results[key] = llamaClient.ask(imageUrl, question) + results[key] = llamaClient.ask(localImageUrl, question) } } @@ -270,7 +271,7 @@ class DocumentExtractionService( } } - private fun ensureAadhaarId(imageUrl: String, results: MutableMap) { + private fun ensureAadhaarId(localImageUrl: String, publicImageUrl: String, results: MutableMap) { val key = DocumentPrompts.ID_NUMBER.first val current = cleanedValue(results[key]) val normalized = normalizeDigits(current) @@ -279,12 +280,19 @@ class DocumentExtractionService( return } val retry = llamaClient.ask( - imageUrl, + localImageUrl, "AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE." ) val retryNormalized = normalizeDigits(cleanedValue(retry)) if (retryNormalized != null && isValidAadhaar(retryNormalized)) { results[key] = formatAadhaar(retryNormalized) + return + } + + val fallback = openAIVisionClient.extractAadhaarNumber(publicImageUrl) ?: "" + val fallbackNormalized = normalizeDigits(cleanedValue(fallback)) + if (fallbackNormalized != null && isValidAadhaar(fallbackNormalized)) { + results[key] = formatAadhaar(fallbackNormalized) } else { results[key] = "NONE" } diff --git a/src/main/kotlin/com/android/trisolarisserver/component/OpenAIVisionClient.kt b/src/main/kotlin/com/android/trisolarisserver/component/OpenAIVisionClient.kt new file mode 100644 index 0000000..e9636b5 --- /dev/null +++ b/src/main/kotlin/com/android/trisolarisserver/component/OpenAIVisionClient.kt @@ -0,0 +1,65 @@ +package com.android.trisolarisserver.component + +import com.fasterxml.jackson.databind.ObjectMapper +import org.springframework.beans.factory.annotation.Value +import org.springframework.http.HttpEntity +import org.springframework.http.HttpHeaders +import org.springframework.http.MediaType +import org.springframework.stereotype.Component +import org.springframework.web.client.RestTemplate + +@Component +class OpenAIVisionClient( + private val restTemplate: RestTemplate, + private val objectMapper: ObjectMapper, + @Value("\${openai.apiKey:}") + private val apiKey: String, + @Value("\${openai.baseUrl:https://api.openai.com/v1/responses}") + private val baseUrl: String, + @Value("\${openai.model:gpt-5-mini}") + private val model: String +) { + fun extractAadhaarNumber(imageUrl: String): String? { + if (apiKey.isBlank()) return null + + val payload = mapOf( + "model" to model, + "input" to listOf( + mapOf( + "role" to "user", + "content" to listOf( + mapOf( + "type" to "input_text", + "text" to "Read extremely carefully. Aadhaar number = 12 digits. Reply ONLY the 12 digits or NONE." + ), + mapOf( + "type" to "input_image", + "image_url" to imageUrl + ) + ) + ) + ) + ) + + val headers = HttpHeaders() + headers.contentType = MediaType.APPLICATION_JSON + headers.setBearerAuth(apiKey) + val entity = HttpEntity(payload, headers) + val response = restTemplate.postForEntity(baseUrl, entity, String::class.java) + val body = response.body ?: return null + val node = objectMapper.readTree(body) + + val outputText = node.path("output_text").asText() + if (outputText.isNotBlank()) return outputText + + val outputArray = node.path("output") + if (outputArray.isArray && outputArray.size() > 0) { + val content = outputArray[0].path("content") + if (content.isArray && content.size() > 0) { + val text = content[0].path("text").asText() + if (text.isNotBlank()) return text + } + } + return null + } +} diff --git a/src/main/kotlin/com/android/trisolarisserver/controller/GuestDocuments.kt b/src/main/kotlin/com/android/trisolarisserver/controller/GuestDocuments.kt index 40d7d23..8da66d1 100644 --- a/src/main/kotlin/com/android/trisolarisserver/controller/GuestDocuments.kt +++ b/src/main/kotlin/com/android/trisolarisserver/controller/GuestDocuments.kt @@ -211,7 +211,9 @@ class GuestDocuments( val imageUrl = "${aiBaseUrl}/properties/$propertyId/guests/$guestId/documents/${document.id}/file?token=$token" - val extraction = extractionService.extractAndApply(imageUrl, document, propertyId) + val publicImageUrl = + "${publicBaseUrl.trimEnd('/')}/properties/$propertyId/guests/$guestId/documents/${document.id}/file?token=$token" + val extraction = extractionService.extractAndApply(imageUrl, publicImageUrl, document, propertyId) val results = extraction.results document.extractedData = objectMapper.writeValueAsString(results) diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index b396e61..734eb04 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -28,3 +28,6 @@ ai.llama.topP=0.8 ai.llama.minP=0.2 ai.llama.repeatPenalty=1.0 ai.llama.topK=40 +openai.apiKey=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 +openai.baseUrl=https://api.openai.com/v1/responses +openai.model=gpt-5-mini