try orc on faliled attempt
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s
This commit is contained in:
@@ -0,0 +1,40 @@
|
|||||||
|
package com.android.trisolarisserver.component
|
||||||
|
|
||||||
|
internal fun formatAadhaar(value: String): String {
|
||||||
|
if (value.length != 12) return value
|
||||||
|
return value.chunked(4).joinToString(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
internal fun isValidAadhaar(value: String): Boolean {
|
||||||
|
if (value.length != 12 || !value.all { it.isDigit() }) return false
|
||||||
|
var c = 0
|
||||||
|
val reversed = value.reversed()
|
||||||
|
for (i in reversed.indices) {
|
||||||
|
val digit = reversed[i].digitToInt()
|
||||||
|
c = aadhaarMultiplication[c][aadhaarPermutation[i % 8][digit]]
|
||||||
|
}
|
||||||
|
return c == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
private val aadhaarMultiplication = arrayOf(
|
||||||
|
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
||||||
|
intArrayOf(1, 2, 3, 4, 0, 6, 7, 8, 9, 5),
|
||||||
|
intArrayOf(2, 3, 4, 0, 1, 7, 8, 9, 5, 6),
|
||||||
|
intArrayOf(3, 4, 0, 1, 2, 8, 9, 5, 6, 7),
|
||||||
|
intArrayOf(4, 0, 1, 2, 3, 9, 5, 6, 7, 8),
|
||||||
|
intArrayOf(5, 9, 8, 7, 6, 0, 4, 3, 2, 1),
|
||||||
|
intArrayOf(6, 5, 9, 8, 7, 1, 0, 4, 3, 2),
|
||||||
|
intArrayOf(7, 6, 5, 9, 8, 2, 1, 0, 4, 3),
|
||||||
|
intArrayOf(8, 7, 6, 5, 9, 3, 2, 1, 0, 4),
|
||||||
|
intArrayOf(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||||
|
)
|
||||||
|
private val aadhaarPermutation = arrayOf(
|
||||||
|
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
||||||
|
intArrayOf(1, 5, 7, 6, 2, 8, 3, 0, 9, 4),
|
||||||
|
intArrayOf(5, 8, 0, 3, 7, 9, 6, 1, 4, 2),
|
||||||
|
intArrayOf(8, 9, 1, 6, 0, 4, 3, 5, 2, 7),
|
||||||
|
intArrayOf(9, 4, 5, 3, 1, 2, 6, 8, 7, 0),
|
||||||
|
intArrayOf(4, 2, 8, 6, 5, 7, 3, 9, 0, 1),
|
||||||
|
intArrayOf(2, 7, 9, 3, 8, 0, 6, 4, 1, 5),
|
||||||
|
intArrayOf(7, 0, 4, 6, 9, 1, 3, 2, 5, 8)
|
||||||
|
)
|
||||||
@@ -15,7 +15,8 @@ class DocumentExtractionService(
|
|||||||
private val llamaClient: LlamaClient,
|
private val llamaClient: LlamaClient,
|
||||||
private val guestRepo: GuestRepo,
|
private val guestRepo: GuestRepo,
|
||||||
private val guestVehicleRepo: GuestVehicleRepo,
|
private val guestVehicleRepo: GuestVehicleRepo,
|
||||||
private val propertyRepo: PropertyRepo
|
private val propertyRepo: PropertyRepo,
|
||||||
|
private val paddleOcrClient: PaddleOcrClient
|
||||||
) {
|
) {
|
||||||
private val logger = LoggerFactory.getLogger(DocumentExtractionService::class.java)
|
private val logger = LoggerFactory.getLogger(DocumentExtractionService::class.java)
|
||||||
|
|
||||||
@@ -295,6 +296,26 @@ class DocumentExtractionService(
|
|||||||
results[key] = formatAadhaar(retryNormalized)
|
results[key] = formatAadhaar(retryNormalized)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
val ocrResult = paddleOcrClient.extract(document.storagePath)
|
||||||
|
if (ocrResult != null) {
|
||||||
|
val ocrCandidate = ocrResult.aadhaar
|
||||||
|
if (ocrCandidate != null && isValidAadhaar(ocrCandidate.replace(" ", ""))) {
|
||||||
|
results[key] = ocrCandidate
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (ocrResult.texts.isNotEmpty()) {
|
||||||
|
val ocrText = ocrResult.texts.joinToString("\n")
|
||||||
|
val ocrAsk = llamaClient.askText(
|
||||||
|
ocrText,
|
||||||
|
"AADHAAR NUMBER (12 digits). Reply ONLY the 12 digits or NONE."
|
||||||
|
)
|
||||||
|
val ocrAskNormalized = normalizeDigits(cleanedValue(ocrAsk))
|
||||||
|
if (ocrAskNormalized != null && isValidAadhaar(ocrAskNormalized)) {
|
||||||
|
results[key] = formatAadhaar(ocrAskNormalized)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
logger.warn("Aadhaar retry failed; setting idNumber=NONE")
|
logger.warn("Aadhaar retry failed; setting idNumber=NONE")
|
||||||
results[key] = "NONE"
|
results[key] = "NONE"
|
||||||
@@ -371,29 +392,6 @@ private fun cleanedValue(value: String?): String? {
|
|||||||
private val standardPlateRegex = Regex("^[A-Z]{2}\\d{1,2}[A-Z]{1,3}\\d{3,4}$")
|
private val standardPlateRegex = Regex("^[A-Z]{2}\\d{1,2}[A-Z]{1,3}\\d{3,4}$")
|
||||||
private val bhPlateRegex = Regex("^\\d{2}BH\\d{4}[A-Z]{1,2}$")
|
private val bhPlateRegex = Regex("^\\d{2}BH\\d{4}[A-Z]{1,2}$")
|
||||||
private val pinCodeRegex = Regex("\\b\\d{6}\\b")
|
private val pinCodeRegex = Regex("\\b\\d{6}\\b")
|
||||||
private val aadhaarMultiplication = arrayOf(
|
|
||||||
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
|
||||||
intArrayOf(1, 2, 3, 4, 0, 6, 7, 8, 9, 5),
|
|
||||||
intArrayOf(2, 3, 4, 0, 1, 7, 8, 9, 5, 6),
|
|
||||||
intArrayOf(3, 4, 0, 1, 2, 8, 9, 5, 6, 7),
|
|
||||||
intArrayOf(4, 0, 1, 2, 3, 9, 5, 6, 7, 8),
|
|
||||||
intArrayOf(5, 9, 8, 7, 6, 0, 4, 3, 2, 1),
|
|
||||||
intArrayOf(6, 5, 9, 8, 7, 1, 0, 4, 3, 2),
|
|
||||||
intArrayOf(7, 6, 5, 9, 8, 2, 1, 0, 4, 3),
|
|
||||||
intArrayOf(8, 7, 6, 5, 9, 3, 2, 1, 0, 4),
|
|
||||||
intArrayOf(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
|
||||||
)
|
|
||||||
private val aadhaarPermutation = arrayOf(
|
|
||||||
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
|
||||||
intArrayOf(1, 5, 7, 6, 2, 8, 3, 0, 9, 4),
|
|
||||||
intArrayOf(5, 8, 0, 3, 7, 9, 6, 1, 4, 2),
|
|
||||||
intArrayOf(8, 9, 1, 6, 0, 4, 3, 5, 2, 7),
|
|
||||||
intArrayOf(9, 4, 5, 3, 1, 2, 6, 8, 7, 0),
|
|
||||||
intArrayOf(4, 2, 8, 6, 5, 7, 3, 9, 0, 1),
|
|
||||||
intArrayOf(2, 7, 9, 3, 8, 0, 6, 4, 1, 5),
|
|
||||||
intArrayOf(7, 0, 4, 6, 9, 1, 3, 2, 5, 8)
|
|
||||||
)
|
|
||||||
|
|
||||||
private fun extractPinFromValue(value: String?): String? {
|
private fun extractPinFromValue(value: String?): String? {
|
||||||
if (value.isNullOrBlank()) return null
|
if (value.isNullOrBlank()) return null
|
||||||
val compact = value.replace(Regex("\\s+"), "")
|
val compact = value.replace(Regex("\\s+"), "")
|
||||||
@@ -413,21 +411,5 @@ private fun extractPinFromAddress(value: String?): String? {
|
|||||||
private fun normalizeDigits(value: String?): String? {
|
private fun normalizeDigits(value: String?): String? {
|
||||||
if (value.isNullOrBlank()) return null
|
if (value.isNullOrBlank()) return null
|
||||||
val digits = value.filter { it.isDigit() }
|
val digits = value.filter { it.isDigit() }
|
||||||
return if (digits.isBlank()) null else digits
|
return digits.ifBlank { null }
|
||||||
}
|
|
||||||
|
|
||||||
private fun formatAadhaar(value: String): String {
|
|
||||||
if (value.length != 12) return value
|
|
||||||
return value.chunked(4).joinToString(" ")
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun isValidAadhaar(value: String): Boolean {
|
|
||||||
if (value.length != 12 || !value.all { it.isDigit() }) return false
|
|
||||||
var c = 0
|
|
||||||
val reversed = value.reversed()
|
|
||||||
for (i in reversed.indices) {
|
|
||||||
val digit = reversed[i].digitToInt()
|
|
||||||
c = aadhaarMultiplication[c][aadhaarPermutation[i % 8][digit]]
|
|
||||||
}
|
|
||||||
return c == 0
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package com.android.trisolarisserver.component
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import org.slf4j.LoggerFactory
|
||||||
|
import org.springframework.beans.factory.annotation.Value
|
||||||
|
import org.springframework.core.io.FileSystemResource
|
||||||
|
import org.springframework.http.HttpEntity
|
||||||
|
import org.springframework.http.HttpHeaders
|
||||||
|
import org.springframework.http.MediaType
|
||||||
|
import org.springframework.stereotype.Component
|
||||||
|
import org.springframework.util.LinkedMultiValueMap
|
||||||
|
import org.springframework.web.client.RestTemplate
|
||||||
|
import java.nio.file.Files
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
@Component
|
||||||
|
class PaddleOcrClient(
|
||||||
|
private val restTemplate: RestTemplate,
|
||||||
|
private val objectMapper: ObjectMapper,
|
||||||
|
@Value("\${ocr.paddle.enabled:false}")
|
||||||
|
private val enabled: Boolean,
|
||||||
|
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
|
||||||
|
private val baseUrl: String,
|
||||||
|
@Value("\${ocr.paddle.minScore:0.9}")
|
||||||
|
private val minScore: Double
|
||||||
|
) {
|
||||||
|
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||||
|
|
||||||
|
fun extract(filePath: String): PaddleOcrResult? {
|
||||||
|
if (!enabled) return null
|
||||||
|
val path = Path.of(filePath)
|
||||||
|
if (!Files.exists(path)) return null
|
||||||
|
|
||||||
|
return try {
|
||||||
|
val output = callOcr(path)
|
||||||
|
val filtered = filterByScore(output.texts, output.scores, minScore)
|
||||||
|
val aadhaar = extractAadhaar(filtered)
|
||||||
|
PaddleOcrResult(filtered, aadhaar)
|
||||||
|
} catch (ex: Exception) {
|
||||||
|
logger.warn("PaddleOCR failed: {}", ex.message)
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun callOcr(path: Path): OcrPayload {
|
||||||
|
val headers = HttpHeaders()
|
||||||
|
headers.contentType = MediaType.MULTIPART_FORM_DATA
|
||||||
|
val body = LinkedMultiValueMap<String, Any>().apply {
|
||||||
|
add("file", FileSystemResource(path.toFile()))
|
||||||
|
}
|
||||||
|
val entity = HttpEntity(body, headers)
|
||||||
|
val response = restTemplate.postForEntity(baseUrl, entity, String::class.java)
|
||||||
|
val raw = response.body ?: return OcrPayload(emptyList(), emptyList())
|
||||||
|
val node = objectMapper.readTree(raw)
|
||||||
|
val texts = node.path("texts")
|
||||||
|
val scores = node.path("scores")
|
||||||
|
if (!texts.isArray) return OcrPayload(emptyList(), emptyList())
|
||||||
|
val parsedTexts = texts.mapNotNull { it.asText(null) }
|
||||||
|
val parsedScores = if (scores.isArray) {
|
||||||
|
scores.mapNotNull { if (it.isNumber) it.asDouble() else null }
|
||||||
|
} else {
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
return OcrPayload(parsedTexts, parsedScores)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
|
||||||
|
if (scores.size != texts.size || scores.isEmpty()) return texts
|
||||||
|
return texts.mapIndexedNotNull { index, text ->
|
||||||
|
if (scores[index] >= min) text else null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractAadhaar(texts: List<String>): String? {
|
||||||
|
val joined = texts.joinToString(" ")
|
||||||
|
val candidates = mutableListOf<String>()
|
||||||
|
val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
|
||||||
|
regex.findAll(joined).forEach { match ->
|
||||||
|
val digits = match.value.filter { it.isDigit() }
|
||||||
|
if (digits.length == 12) {
|
||||||
|
candidates.add(digits)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
|
||||||
|
return formatAadhaar(valid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data class PaddleOcrResult(
|
||||||
|
val texts: List<String>,
|
||||||
|
val aadhaar: String?
|
||||||
|
)
|
||||||
|
|
||||||
|
private data class OcrPayload(
|
||||||
|
val texts: List<String>,
|
||||||
|
val scores: List<Double>
|
||||||
|
)
|
||||||
@@ -1,2 +1,3 @@
|
|||||||
spring.datasource.url=jdbc:postgresql://192.168.1.53:5432/trisolaris
|
spring.datasource.url=jdbc:postgresql://192.168.1.53:5432/trisolaris
|
||||||
ai.llama.baseUrl=https://ai.hoteltrisolaris.in/v1/chat/completions
|
ai.llama.baseUrl=https://ai.hoteltrisolaris.in/v1/chat/completions
|
||||||
|
ocr.paddle.baseUrl=https://ocr.hoteltrisolaris.in/
|
||||||
@@ -2,3 +2,4 @@ spring.datasource.url=jdbc:postgresql://localhost:5432/trisolaris
|
|||||||
ai.llama.baseUrl=http://localhost:8089/v1/chat/completions
|
ai.llama.baseUrl=http://localhost:8089/v1/chat/completions
|
||||||
logging.level.com.android.trisolarisserver.controller.Auth=INFO
|
logging.level.com.android.trisolarisserver.controller.Auth=INFO
|
||||||
storage.documents.aiBaseUrl=http://127.0.0.1:18921
|
storage.documents.aiBaseUrl=http://127.0.0.1:18921
|
||||||
|
ocr.paddle.baseUrl=http://127.0.0.1/
|
||||||
@@ -28,3 +28,5 @@ ai.llama.topP=0.8
|
|||||||
ai.llama.minP=0.2
|
ai.llama.minP=0.2
|
||||||
ai.llama.repeatPenalty=1.0
|
ai.llama.repeatPenalty=1.0
|
||||||
ai.llama.topK=40
|
ai.llama.topK=40
|
||||||
|
ocr.paddle.enabled=true
|
||||||
|
ocr.paddle.minScore=0.9
|
||||||
|
|||||||
Reference in New Issue
Block a user