package com.android.trisolarisserver.component import com.android.trisolarisserver.controller.DocumentPrompts import com.android.trisolarisserver.db.repo.GuestRepo import com.android.trisolarisserver.models.booking.GuestDocument import com.android.trisolarisserver.models.booking.GuestVehicle import com.android.trisolarisserver.repo.GuestVehicleRepo import com.android.trisolarisserver.repo.PropertyRepo import java.time.OffsetDateTime import java.util.UUID import org.slf4j.LoggerFactory @org.springframework.stereotype.Component class DocumentExtractionService( private val llamaClient: LlamaClient, private val guestRepo: GuestRepo, private val guestVehicleRepo: GuestVehicleRepo, private val propertyRepo: PropertyRepo, private val paddleOcrClient: PaddleOcrClient ) { private val logger = LoggerFactory.getLogger(DocumentExtractionService::class.java) fun extractAndApply(localImageUrl: String, publicImageUrl: String, document: GuestDocument, propertyId: UUID): ExtractionResult { val results = linkedMapOf() val detections = listOf( Detection( detect = { results["isVehiclePhoto"] = llamaClient.ask( localImageUrl, "IS THIS A VEHICLE NUMBER PLATE PHOTO? Answer YES or NO only." ) if (!isYes(results["isVehiclePhoto"])) return@Detection false val candidate = llamaClient.ask( localImageUrl, "VEHICLE NUMBER PLATE? Reply only number or NONE." ) val cleaned = cleanedValue(candidate) if (cleaned != null && isLikelyVehicleNumber(cleaned)) { results["vehicleNumber"] = cleaned true } else { results["vehicleNumber"] = "NONE" results["isVehiclePhoto"] = "NO" false } }, handle = {} ), Detection( detect = { results["hasAadhar"] = llamaClient.ask( localImageUrl, "CONTAINS AADHAAR? Answer YES or NO only." ) results["hasUidai"] = llamaClient.ask( localImageUrl, "CONTAINS UIDAI? Answer YES or NO only." ) isYes(results["hasAadhar"]) || isYes(results["hasUidai"]) }, handle = { val aadharQuestions = linkedMapOf( "hasAddress" to "POSTAL ADDRESS PRESENT? Answer YES or NO only.", "hasDob" to "DOB? Reply YES or NO.", "hasGenderMentioned" to "GENDER MENTIONED? Reply YES or NO." ) for ((key, question) in aadharQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } val hasAddress = isYes(results["hasAddress"]) if (hasAddress) { val addressQuestions = linkedMapOf( DocumentPrompts.PIN_CODE, DocumentPrompts.ADDRESS ) for ((key, question) in addressQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } val hasDob = isYes(results["hasDob"]) val hasGender = isYes(results["hasGenderMentioned"]) if (hasDob && hasGender) { val aadharFrontQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, DocumentPrompts.ID_NUMBER, DocumentPrompts.GENDER ) for ((key, question) in aadharFrontQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } ensureAadhaarId(localImageUrl, publicImageUrl, document, results) } } ), Detection( detect = { results["hasDrivingLicence"] = llamaClient.ask( localImageUrl, "CONTAINS DRIVING LICENCE? Answer YES or NO only." ) results["hasTransportDept"] = llamaClient.ask( localImageUrl, "CONTAINS TRANSPORT DEPARTMENT? Answer YES or NO only." ) isYes(results["hasDrivingLicence"]) || isYes(results["hasTransportDept"]) }, handle = { val drivingQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, "idNumber" to "DL NUMBER? Reply only number or NONE.", DocumentPrompts.ADDRESS, DocumentPrompts.PIN_CODE, DocumentPrompts.CITY, DocumentPrompts.GENDER, DocumentPrompts.NATIONALITY ) for ((key, question) in drivingQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasElectionCommission"] = llamaClient.ask( localImageUrl, "CONTAINS ELECTION COMMISSION OF INDIA? Answer YES or NO only." ) isYes(results["hasElectionCommission"]) }, handle = { val voterQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, "idNumber" to "VOTER ID NUMBER? Reply only number or NONE.", DocumentPrompts.ADDRESS, DocumentPrompts.PIN_CODE, DocumentPrompts.CITY, DocumentPrompts.GENDER, DocumentPrompts.NATIONALITY ) for ((key, question) in voterQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasIncomeTaxDept"] = llamaClient.ask( localImageUrl, "CONTAINS INCOME TAX DEPARTMENT? Answer YES or NO only." ) isYes(results["hasIncomeTaxDept"]) }, handle = { val panQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, "idNumber" to "PAN NUMBER? Reply only number or NONE.", DocumentPrompts.ADDRESS, DocumentPrompts.PIN_CODE, DocumentPrompts.CITY, DocumentPrompts.GENDER, DocumentPrompts.NATIONALITY ) for ((key, question) in panQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } ), Detection( detect = { results["hasPassport"] = llamaClient.ask( localImageUrl, "CONTAINS PASSPORT? Answer YES or NO only." ) isYes(results["hasPassport"]) }, handle = { val passportQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, "idNumber" to "PASSPORT NUMBER? Reply only number or NONE.", DocumentPrompts.ADDRESS, DocumentPrompts.PIN_CODE, DocumentPrompts.CITY, DocumentPrompts.GENDER, DocumentPrompts.NATIONALITY ) for ((key, question) in passportQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } ) ) var handled = false for (detection in detections) { if (detection.detect()) { detection.handle() handled = true break } } if (!handled) { val generalQuestions = linkedMapOf( DocumentPrompts.NAME, DocumentPrompts.DOB, DocumentPrompts.ID_NUMBER, DocumentPrompts.ADDRESS, DocumentPrompts.VEHICLE_NUMBER, DocumentPrompts.PIN_CODE, DocumentPrompts.CITY, DocumentPrompts.GENDER, DocumentPrompts.NATIONALITY ) for ((key, question) in generalQuestions) { results[key] = llamaClient.ask(localImageUrl, question) } } normalizePinCode(results) results["docType"] = computeDocType(results, handled) applyGuestUpdates(document, propertyId, results) return ExtractionResult(results, handled) } private fun isYes(value: String?): Boolean { return value.orEmpty().contains("YES", ignoreCase = true) } private fun isLikelyVehicleNumber(value: String): Boolean { val normalized = value.uppercase().replace(Regex("[\\s-]"), "") if (normalized.length == 12 && normalized.all { it.isDigit() }) return false if (normalized.length < 6) return false return standardPlateRegex.matches(normalized) || bhPlateRegex.matches(normalized) } private fun normalizePinCode(results: MutableMap) { val pinKey = DocumentPrompts.PIN_CODE.first val rawPin = cleanedValue(results[pinKey]) val address = cleanedValue(results[DocumentPrompts.ADDRESS.first]) val fromPin = extractPinFromValue(rawPin) val fromAddress = extractPinFromAddress(address) val chosen = fromPin ?: fromAddress results[pinKey] = chosen ?: "NONE" } private fun computeDocType(results: Map, handled: Boolean): String { if (!handled) return "GENERAL" return when { isYes(results["hasCourt"]) || isYes(results["hasHighCourt"]) || isYes(results["hasSupremeCourt"]) || isYes(results["hasJudiciary"]) -> "COURT_ID" isYes(results["hasPolice"]) -> "POLICE_ID" isYes(results["hasPassport"]) -> "PASSPORT" isYes(results["hasTransportDept"]) || isYes(results["hasDrivingLicence"]) -> "TRANSPORT" isYes(results["hasIncomeTaxDept"]) -> "PAN" isYes(results["hasElectionCommission"]) -> "VOTER_ID" isYes(results["hasAadhar"]) || isYes(results["hasUidai"]) -> { if (isYes(results["hasAddress"])) "AADHAR_BACK" else "AADHAR_FRONT" } results["vehicleNumber"].orEmpty().isNotBlank() && !results["vehicleNumber"]!!.contains("NONE", true) -> "VEHICLE" isYes(results["isVehiclePhoto"]) -> "VEHICLE_PHOTO" else -> "UNKNOWN" } } private fun ensureAadhaarId( localImageUrl: String, publicImageUrl: String, document: GuestDocument, results: MutableMap ) { val key = DocumentPrompts.ID_NUMBER.first val current = cleanedValue(results[key]) val normalized = normalizeDigits(current) if (normalized != null && isValidAadhaar(normalized)) { results[key] = formatAadhaar(normalized) return } val retry = llamaClient.ask( localImageUrl, "AADHAAR NUMBER (12 digits). Read extremely carefully. Reply ONLY the 12 digits or NONE." ) val retryNormalized = normalizeDigits(cleanedValue(retry)) if (retryNormalized != null && isValidAadhaar(retryNormalized)) { results[key] = formatAadhaar(retryNormalized) return } val ocrResult = paddleOcrClient.extract(document.storagePath) if (ocrResult != null) { val ocrCandidate = ocrResult.aadhaar if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) { results[key] = ocrCandidate return } if (ocrResult.texts.isNotEmpty()) { val ocrText = ocrResult.texts.joinToString("\n") val ocrAsk = llamaClient.askText( ocrText, "AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE." ) val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk)) if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) { results[key] = formatAadhaar(ocrAskNormalized) return } } } logger.warn("Aadhaar retry failed; setting idNumber=NONE") results[key] = "NONE" } private fun applyGuestUpdates( document: GuestDocument, propertyId: UUID, results: Map ) { val extractedName = cleanedValue(results[DocumentPrompts.NAME.first]) val extractedAddress = cleanedValue(results[DocumentPrompts.ADDRESS.first]) val guestIdValue = document.guest.id if (guestIdValue != null && (extractedName != null || extractedAddress != null)) { val guestEntity = guestRepo.findById(guestIdValue).orElse(null) if (guestEntity != null) { var updated = false if (guestEntity.name.isNullOrBlank() && extractedName != null) { guestEntity.name = extractedName updated = true } if (guestEntity.addressText.isNullOrBlank() && extractedAddress != null) { guestEntity.addressText = extractedAddress updated = true } if (updated) { guestEntity.updatedAt = OffsetDateTime.now() guestRepo.save(guestEntity) } } } val extractedVehicle = cleanedValue(results["vehicleNumber"]) if (isYes(results["isVehiclePhoto"]) && extractedVehicle != null) { val guestIdSafe = document.guest.id if (guestIdSafe != null && !guestVehicleRepo.existsByPropertyIdAndVehicleNumberIgnoreCase(propertyId, extractedVehicle) ) { val property = propertyRepo.findById(propertyId).orElse(null) val guestEntity = guestRepo.findById(guestIdSafe).orElse(null) if (property != null && guestEntity != null) { guestVehicleRepo.save( GuestVehicle( property = property, guest = guestEntity, booking = document.booking, vehicleNumber = extractedVehicle ) ) } } } } } data class ExtractionResult( val results: LinkedHashMap, val handled: Boolean ) private data class Detection( val detect: () -> Boolean, val handle: () -> Unit ) private fun cleanedValue(value: String?): String? { val trimmed = value?.trim().orEmpty() if (trimmed.isBlank()) return null val upper = trimmed.uppercase() if (upper == "NONE" || upper == "N/A" || upper == "NA" || upper == "NULL") return null return trimmed } private val standardPlateRegex = Regex("^[A-Z]{2}\\d{1,2}[A-Z]{1,3}\\d{3,4}$") private val bhPlateRegex = Regex("^\\d{2}BH\\d{4}[A-Z]{1,2}$") private val pinCodeRegex = Regex("\\b\\d{6}\\b") private fun extractPinFromValue(value: String?): String? { if (value.isNullOrBlank()) return null val compact = value.replace(Regex("\\s+"), "") if (compact.length == 12 && compact.all { it.isDigit() }) return null val match = pinCodeRegex.find(value) ?: return null return match.value } private fun extractPinFromAddress(value: String?): String? { if (value.isNullOrBlank()) return null val hasPinLabel = value.contains("PIN", ignoreCase = true) || value.contains("PINCODE", ignoreCase = true) if (!hasPinLabel) return null val match = pinCodeRegex.find(value) ?: return null return match.value } private fun normalizeDigits(value: String?): String? { if (value.isNullOrBlank()) return null val digits = value.filter { it.isDigit() } return digits.ifBlank { null } }