111 lines
4.1 KiB
Kotlin
111 lines
4.1 KiB
Kotlin
package com.android.trisolarisserver.component
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
|
import org.slf4j.LoggerFactory
|
|
import org.springframework.beans.factory.annotation.Value
|
|
import org.springframework.core.io.FileSystemResource
|
|
import org.springframework.http.HttpEntity
|
|
import org.springframework.http.HttpHeaders
|
|
import org.springframework.http.MediaType
|
|
import org.springframework.stereotype.Component
|
|
import org.springframework.util.LinkedMultiValueMap
|
|
import org.springframework.web.client.RestTemplate
|
|
import java.nio.file.Files
|
|
import java.nio.file.Path
|
|
|
|
@Component
|
|
class PaddleOcrClient(
|
|
private val restTemplate: RestTemplate,
|
|
private val objectMapper: ObjectMapper,
|
|
@Value("\${ocr.paddle.enabled:false}")
|
|
private val enabled: Boolean,
|
|
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
|
|
private val baseUrl: String,
|
|
@Value("\${ocr.paddle.minScore:0.9}")
|
|
private val minScore: Double,
|
|
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
|
private val minAverageScore: Double,
|
|
@Value("\${ocr.paddle.minTextLength:4}")
|
|
private val minTextLength: Int
|
|
) {
|
|
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
|
|
|
fun extract(filePath: String): PaddleOcrResult? {
|
|
if (!enabled) return null
|
|
val path = Path.of(filePath)
|
|
if (!Files.exists(path)) return null
|
|
|
|
return try {
|
|
val output = callOcr(path)
|
|
val average = averageScore(output.scores)
|
|
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
|
val aadhaar = extractAadhaar(filtered)
|
|
val rejected = average != null && average < minAverageScore
|
|
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
|
} catch (ex: Exception) {
|
|
logger.warn("PaddleOCR failed: {}", ex.message)
|
|
null
|
|
}
|
|
}
|
|
|
|
private fun callOcr(path: Path): OcrPayload {
|
|
val headers = HttpHeaders()
|
|
headers.contentType = MediaType.MULTIPART_FORM_DATA
|
|
val body = LinkedMultiValueMap<String, Any>().apply {
|
|
add("file", FileSystemResource(path.toFile()))
|
|
}
|
|
val entity = HttpEntity(body, headers)
|
|
val response = restTemplate.postForEntity(baseUrl, entity, String::class.java)
|
|
val raw = response.body ?: return OcrPayload(emptyList(), emptyList())
|
|
val node = objectMapper.readTree(raw)
|
|
val texts = node.path("texts")
|
|
val scores = node.path("scores")
|
|
if (!texts.isArray) return OcrPayload(emptyList(), emptyList())
|
|
val parsedTexts = texts.mapNotNull { it.asText(null) }
|
|
val parsedScores = if (scores.isArray) {
|
|
scores.mapNotNull { if (it.isNumber) it.asDouble() else null }
|
|
} else {
|
|
emptyList()
|
|
}
|
|
return OcrPayload(parsedTexts, parsedScores)
|
|
}
|
|
|
|
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
|
|
if (scores.size != texts.size || scores.isEmpty()) return texts
|
|
return texts.mapIndexedNotNull { index, text ->
|
|
if (scores[index] >= min && text.trim().length >= minLen) text else null
|
|
}
|
|
}
|
|
|
|
private fun averageScore(scores: List<Double>): Double? {
|
|
if (scores.isEmpty()) return null
|
|
return scores.sum() / scores.size
|
|
}
|
|
|
|
private fun extractAadhaar(texts: List<String>): String? {
|
|
val joined = texts.joinToString(" ")
|
|
val candidates = mutableListOf<String>()
|
|
val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
|
|
regex.findAll(joined).forEach { match ->
|
|
val digits = match.value.filter { it.isDigit() }
|
|
if (digits.length == 12) {
|
|
candidates.add(digits)
|
|
}
|
|
}
|
|
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
|
|
return formatAadhaar(valid)
|
|
}
|
|
}
|
|
|
|
data class PaddleOcrResult(
|
|
val texts: List<String>,
|
|
val aadhaar: String?,
|
|
val averageScore: Double?,
|
|
val rejected: Boolean
|
|
)
|
|
|
|
private data class OcrPayload(
|
|
val texts: List<String>,
|
|
val scores: List<Double>
|
|
)
|