416 lines
17 KiB
Kotlin
416 lines
17 KiB
Kotlin
package com.android.trisolarisserver.component
|
|
|
|
import com.android.trisolarisserver.controller.DocumentPrompts
|
|
import com.android.trisolarisserver.db.repo.GuestRepo
|
|
import com.android.trisolarisserver.models.booking.GuestDocument
|
|
import com.android.trisolarisserver.models.booking.GuestVehicle
|
|
import com.android.trisolarisserver.repo.GuestVehicleRepo
|
|
import com.android.trisolarisserver.repo.PropertyRepo
|
|
import java.time.OffsetDateTime
|
|
import java.util.UUID
|
|
import org.slf4j.LoggerFactory
|
|
|
|
@org.springframework.stereotype.Component
|
|
class DocumentExtractionService(
|
|
private val llamaClient: LlamaClient,
|
|
private val guestRepo: GuestRepo,
|
|
private val guestVehicleRepo: GuestVehicleRepo,
|
|
private val propertyRepo: PropertyRepo,
|
|
private val paddleOcrClient: PaddleOcrClient
|
|
) {
|
|
private val logger = LoggerFactory.getLogger(DocumentExtractionService::class.java)
|
|
|
|
fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult {
|
|
val results = linkedMapOf<String, String>()
|
|
val detections = listOf(
|
|
Detection(
|
|
detect = {
|
|
results["isVehiclePhoto"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only."
|
|
)
|
|
if (!isYes(results["isVehiclePhoto"])) return@Detection false
|
|
val candidate = llamaClient.ask(
|
|
localImageUrl,
|
|
"VEHICLE NUMBER PLATE? Reply only number or NONE."
|
|
)
|
|
val cleaned = cleanedValue(candidate)
|
|
if (cleaned != null && isLikelyVehicleNumber(cleaned)) {
|
|
results["vehicleNumber"] = cleaned
|
|
true
|
|
} else {
|
|
results["vehicleNumber"] = "NONE"
|
|
results["isVehiclePhoto"] = "NO"
|
|
false
|
|
}
|
|
},
|
|
handle = {}
|
|
),
|
|
Detection(
|
|
detect = {
|
|
results["hasAadhar"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS AADHAAR? Answer YES or NO only."
|
|
)
|
|
results["hasUidai"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS UIDAI? Answer YES or NO only."
|
|
)
|
|
isYes(results["hasAadhar"]) || isYes(results["hasUidai"])
|
|
},
|
|
handle = {
|
|
val aadharQuestions = linkedMapOf(
|
|
"hasAddress" to "POSTAL ADDRESS PRESENT? Answer YES or NO only.",
|
|
"hasDob" to "DOB? Reply YES or NO.",
|
|
"hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO."
|
|
)
|
|
for ((key, question) in aadharQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
val hasAddress = isYes(results["hasAddress"])
|
|
if (hasAddress) {
|
|
val addressQuestions = linkedMapOf(
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.ADDRESS
|
|
)
|
|
for ((key, question) in addressQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
val hasDob = isYes(results["hasDob"])
|
|
val hasGender = isYes(results["hasGenderMentioned"])
|
|
if (hasDob && hasGender) {
|
|
val aadharFrontQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
DocumentPrompts.ID_NUMBER,
|
|
DocumentPrompts.GENDER
|
|
)
|
|
for ((key, question) in aadharFrontQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
ensureAadhaarId(localImageUrl, publicImageUrl, document, results)
|
|
}
|
|
}
|
|
),
|
|
Detection(
|
|
detect = {
|
|
results["hasDrivingLicence"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS DRIVING LICENCE? Answer YES or NO only."
|
|
)
|
|
results["hasTransportDept"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only."
|
|
)
|
|
isYes(results["hasDrivingLicence"]) || isYes(results["hasTransportDept"])
|
|
},
|
|
handle = {
|
|
val drivingQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
"idNumber" to "DL NUMBER? Reply only number or NONE.",
|
|
DocumentPrompts.ADDRESS,
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.CITY,
|
|
DocumentPrompts.GENDER,
|
|
DocumentPrompts.NATIONALITY
|
|
)
|
|
for ((key, question) in drivingQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
),
|
|
Detection(
|
|
detect = {
|
|
results["hasElectionCommission"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only."
|
|
)
|
|
isYes(results["hasElectionCommission"])
|
|
},
|
|
handle = {
|
|
val voterQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
"idNumber" to "VOTER ID NUMBER? Reply only number or NONE.",
|
|
DocumentPrompts.ADDRESS,
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.CITY,
|
|
DocumentPrompts.GENDER,
|
|
DocumentPrompts.NATIONALITY
|
|
)
|
|
for ((key, question) in voterQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
),
|
|
Detection(
|
|
detect = {
|
|
results["hasIncomeTaxDept"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only."
|
|
)
|
|
isYes(results["hasIncomeTaxDept"])
|
|
},
|
|
handle = {
|
|
val panQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
"idNumber" to "PAN NUMBER? Reply only number or NONE.",
|
|
DocumentPrompts.ADDRESS,
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.CITY,
|
|
DocumentPrompts.GENDER,
|
|
DocumentPrompts.NATIONALITY
|
|
)
|
|
for ((key, question) in panQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
),
|
|
Detection(
|
|
detect = {
|
|
results["hasPassport"] = llamaClient.ask(
|
|
localImageUrl,
|
|
"CONTAINS PASSPORT? Answer YES or NO only."
|
|
)
|
|
isYes(results["hasPassport"])
|
|
},
|
|
handle = {
|
|
val passportQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
"idNumber" to "PASSPORT NUMBER? Reply only number or NONE.",
|
|
DocumentPrompts.ADDRESS,
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.CITY,
|
|
DocumentPrompts.GENDER,
|
|
DocumentPrompts.NATIONALITY
|
|
)
|
|
for ((key, question) in passportQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
)
|
|
)
|
|
|
|
var handled = false
|
|
for (detection in detections) {
|
|
if (detection.detect()) {
|
|
detection.handle()
|
|
handled = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if (!handled) {
|
|
val generalQuestions = linkedMapOf(
|
|
DocumentPrompts.NAME,
|
|
DocumentPrompts.DOB,
|
|
DocumentPrompts.ID_NUMBER,
|
|
DocumentPrompts.ADDRESS,
|
|
DocumentPrompts.VEHICLE_NUMBER,
|
|
DocumentPrompts.PIN_CODE,
|
|
DocumentPrompts.CITY,
|
|
DocumentPrompts.GENDER,
|
|
DocumentPrompts.NATIONALITY
|
|
)
|
|
for ((key, question) in generalQuestions) {
|
|
results[key] = llamaClient.ask(localImageUrl, question)
|
|
}
|
|
}
|
|
|
|
normalizePinCode(results)
|
|
results["docType"] = computeDocType(results, handled)
|
|
applyGuestUpdates(document, propertyId, results)
|
|
return ExtractionResult(results, handled)
|
|
}
|
|
|
|
private fun isYes(value: String?): Boolean {
|
|
return value.orEmpty().contains("YES", ignoreCase = true)
|
|
}
|
|
|
|
private fun isLikelyVehicleNumber(value: String): Boolean {
|
|
val normalized = value.uppercase().replace(Regex("[\\s-]"), "")
|
|
if (normalized.length == 12 && normalized.all { it.isDigit() }) return false
|
|
if (normalized.length < 6) return false
|
|
return standardPlateRegex.matches(normalized) || bhPlateRegex.matches(normalized)
|
|
}
|
|
|
|
private fun normalizePinCode(results: MutableMap<String, String>) {
|
|
val pinKey = DocumentPrompts.PIN_CODE.first
|
|
val rawPin = cleanedValue(results[pinKey])
|
|
val address = cleanedValue(results[DocumentPrompts.ADDRESS.first])
|
|
|
|
val fromPin = extractPinFromValue(rawPin)
|
|
val fromAddress = extractPinFromAddress(address)
|
|
|
|
val chosen = fromPin ?: fromAddress
|
|
results[pinKey] = chosen ?: "NONE"
|
|
}
|
|
|
|
private fun computeDocType(results: Map<String, String>, handled: Boolean): String {
|
|
if (!handled) return "GENERAL"
|
|
return when {
|
|
isYes(results["hasCourt"]) ||
|
|
isYes(results["hasHighCourt"]) ||
|
|
isYes(results["hasSupremeCourt"]) ||
|
|
isYes(results["hasJudiciary"]) -> "COURT_ID"
|
|
isYes(results["hasPolice"]) -> "POLICE_ID"
|
|
isYes(results["hasPassport"]) -> "PASSPORT"
|
|
isYes(results["hasTransportDept"]) ||
|
|
isYes(results["hasDrivingLicence"]) -> "TRANSPORT"
|
|
isYes(results["hasIncomeTaxDept"]) -> "PAN"
|
|
isYes(results["hasElectionCommission"]) -> "VOTER_ID"
|
|
isYes(results["hasAadhar"]) ||
|
|
isYes(results["hasUidai"]) -> {
|
|
if (isYes(results["hasAddress"])) "AADHAR_BACK" else "AADHAR_FRONT"
|
|
}
|
|
results["vehicleNumber"].orEmpty().isNotBlank() &&
|
|
!results["vehicleNumber"]!!.contains("NONE", true) -> "VEHICLE"
|
|
isYes(results["isVehiclePhoto"]) -> "VEHICLE_PHOTO"
|
|
else -> "UNKNOWN"
|
|
}
|
|
}
|
|
|
|
private fun ensureAadhaarId(
|
|
localImageUrl: String,
|
|
publicImageUrl: String,
|
|
document: GuestDocument,
|
|
results: MutableMap<String, String>
|
|
) {
|
|
val key = DocumentPrompts.ID_NUMBER.first
|
|
val current = cleanedValue(results[key])
|
|
val normalized = normalizeDigits(current)
|
|
if (normalized != null && isValidAadhaar(normalized)) {
|
|
results[key] = formatAadhaar(normalized)
|
|
return
|
|
}
|
|
val retry = llamaClient.ask(
|
|
localImageUrl,
|
|
"AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE."
|
|
)
|
|
val retryNormalized = normalizeDigits(cleanedValue(retry))
|
|
if (retryNormalized != null && isValidAadhaar(retryNormalized)) {
|
|
results[key] = formatAadhaar(retryNormalized)
|
|
return
|
|
}
|
|
val ocrResult = paddleOcrClient.extract(document.storagePath)
|
|
if (ocrResult != null) {
|
|
val ocrCandidate = ocrResult.aadhaar
|
|
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
|
|
results[key] = ocrCandidate
|
|
return
|
|
}
|
|
if (ocrResult.texts.isNotEmpty()) {
|
|
val ocrText = ocrResult.texts.joinToString("\n")
|
|
val ocrAsk = llamaClient.askText(
|
|
ocrText,
|
|
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
|
|
)
|
|
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
|
|
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
|
|
results[key] = formatAadhaar(ocrAskNormalized)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.warn("Aadhaar retry failed; setting idNumber=NONE")
|
|
results[key] = "NONE"
|
|
}
|
|
|
|
private fun applyGuestUpdates(
|
|
document: GuestDocument,
|
|
propertyId: UUID,
|
|
results: Map<String, String>
|
|
) {
|
|
val extractedName = cleanedValue(results[DocumentPrompts.NAME.first])
|
|
val extractedAddress = cleanedValue(results[DocumentPrompts.ADDRESS.first])
|
|
val guestIdValue = document.guest.id
|
|
if (guestIdValue != null && (extractedName != null || extractedAddress != null)) {
|
|
val guestEntity = guestRepo.findById(guestIdValue).orElse(null)
|
|
if (guestEntity != null) {
|
|
var updated = false
|
|
if (guestEntity.name.isNullOrBlank() && extractedName != null) {
|
|
guestEntity.name = extractedName
|
|
updated = true
|
|
}
|
|
if (guestEntity.addressText.isNullOrBlank() && extractedAddress != null) {
|
|
guestEntity.addressText = extractedAddress
|
|
updated = true
|
|
}
|
|
if (updated) {
|
|
guestEntity.updatedAt = OffsetDateTime.now()
|
|
guestRepo.save(guestEntity)
|
|
}
|
|
}
|
|
}
|
|
|
|
val extractedVehicle = cleanedValue(results["vehicleNumber"])
|
|
if (isYes(results["isVehiclePhoto"]) && extractedVehicle != null) {
|
|
val guestIdSafe = document.guest.id
|
|
if (guestIdSafe != null &&
|
|
!guestVehicleRepo.existsByPropertyIdAndVehicleNumberIgnoreCase(propertyId, extractedVehicle)
|
|
) {
|
|
val property = propertyRepo.findById(propertyId).orElse(null)
|
|
val guestEntity = guestRepo.findById(guestIdSafe).orElse(null)
|
|
if (property != null && guestEntity != null) {
|
|
guestVehicleRepo.save(
|
|
GuestVehicle(
|
|
property = property,
|
|
guest = guestEntity,
|
|
booking = document.booking,
|
|
vehicleNumber = extractedVehicle
|
|
)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
data class ExtractionResult(
|
|
val results: LinkedHashMap<String, String>,
|
|
val handled: Boolean
|
|
)
|
|
|
|
private data class Detection(
|
|
val detect: () -> Boolean,
|
|
val handle: () -> Unit
|
|
)
|
|
|
|
private fun cleanedValue(value: String?): String? {
|
|
val trimmed = value?.trim().orEmpty()
|
|
if (trimmed.isBlank()) return null
|
|
val upper = trimmed.uppercase()
|
|
if (upper == "NONE" || upper == "N/A" || upper == "NA" || upper == "NULL") return null
|
|
return trimmed
|
|
}
|
|
|
|
private val standardPlateRegex = Regex("^[A-Z]{2}\\d{1,2}[A-Z]{1,3}\\d{3,4}$")
|
|
private val bhPlateRegex = Regex("^\\d{2}BH\\d{4}[A-Z]{1,2}$")
|
|
private val pinCodeRegex = Regex("\\b\\d{6}\\b")
|
|
private fun extractPinFromValue(value: String?): String? {
|
|
if (value.isNullOrBlank()) return null
|
|
val compact = value.replace(Regex("\\s+"), "")
|
|
if (compact.length == 12 && compact.all { it.isDigit() }) return null
|
|
val match = pinCodeRegex.find(value) ?: return null
|
|
return match.value
|
|
}
|
|
|
|
private fun extractPinFromAddress(value: String?): String? {
|
|
if (value.isNullOrBlank()) return null
|
|
val hasPinLabel = value.contains("PIN", ignoreCase = true) || value.contains("PINCODE", ignoreCase = true)
|
|
if (!hasPinLabel) return null
|
|
val match = pinCodeRegex.find(value) ?: return null
|
|
return match.value
|
|
}
|
|
|
|
private fun normalizeDigits(value: String?): String? {
|
|
if (value.isNullOrBlank()) return null
|
|
val digits = value.filter { it.isDigit() }
|
|
return digits.ifBlank { null }
|
|
}
|