try orc on faliled attempt
All checks were successful
build-and-deploy / build-deploy (push) Successful in 33s

This commit is contained in:
androidlover5842
2026-01-31 09:10:47 +05:30
parent f7c0cf5c18
commit ced92c34f8
6 changed files with 164 additions and 41 deletions

View File

@@ -0,0 +1,97 @@
package com.android.trisolarisserver.component
import com.fasterxml.jackson.databind.ObjectMapper
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Value
import org.springframework.core.io.FileSystemResource
import org.springframework.http.HttpEntity
import org.springframework.http.HttpHeaders
import org.springframework.http.MediaType
import org.springframework.stereotype.Component
import org.springframework.util.LinkedMultiValueMap
import org.springframework.web.client.RestTemplate
import java.nio.file.Files
import java.nio.file.Path
@Component
class PaddleOcrClient(
private val restTemplate: RestTemplate,
private val objectMapper: ObjectMapper,
@Value("\${ocr.paddle.enabled:false}")
private val enabled: Boolean,
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
private val baseUrl: String,
@Value("\${ocr.paddle.minScore:0.9}")
private val minScore: Double
) {
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
fun extract(filePath: String): PaddleOcrResult? {
if (!enabled) return null
val path = Path.of(filePath)
if (!Files.exists(path)) return null
return try {
val output = callOcr(path)
val filtered = filterByScore(output.texts, output.scores, minScore)
val aadhaar = extractAadhaar(filtered)
PaddleOcrResult(filtered, aadhaar)
} catch (ex: Exception) {
logger.warn("PaddleOCR failed: {}", ex.message)
null
}
}
private fun callOcr(path: Path): OcrPayload {
val headers = HttpHeaders()
headers.contentType = MediaType.MULTIPART_FORM_DATA
val body = LinkedMultiValueMap<String, Any>().apply {
add("file", FileSystemResource(path.toFile()))
}
val entity = HttpEntity(body, headers)
val response = restTemplate.postForEntity(baseUrl, entity, String::class.java)
val raw = response.body ?: return OcrPayload(emptyList(), emptyList())
val node = objectMapper.readTree(raw)
val texts = node.path("texts")
val scores = node.path("scores")
if (!texts.isArray) return OcrPayload(emptyList(), emptyList())
val parsedTexts = texts.mapNotNull { it.asText(null) }
val parsedScores = if (scores.isArray) {
scores.mapNotNull { if (it.isNumber) it.asDouble() else null }
} else {
emptyList()
}
return OcrPayload(parsedTexts, parsedScores)
}
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double): List<String> {
if (scores.size != texts.size || scores.isEmpty()) return texts
return texts.mapIndexedNotNull { index, text ->
if (scores[index] >= min) text else null
}
}
private fun extractAadhaar(texts: List<String>): String? {
val joined = texts.joinToString(" ")
val candidates = mutableListOf<String>()
val regex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
regex.findAll(joined).forEach { match ->
val digits = match.value.filter { it.isDigit() }
if (digits.length == 12) {
candidates.add(digits)
}
}
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
return formatAadhaar(valid)
}
}
data class PaddleOcrResult(
val texts: List<String>,
val aadhaar: String?
)
private data class OcrPayload(
val texts: List<String>,
val scores: List<Double>
)