Reorganize packages by domain
All checks were successful
build-and-deploy / build-deploy (push) Successful in 34s
All checks were successful
build-and-deploy / build-deploy (push) Successful in 34s
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
package com.android.trisolarisserver.component.ai
|
||||
import com.android.trisolarisserver.component.ai.formatAadhaar
|
||||
import com.android.trisolarisserver.component.ai.isValidAadhaar
|
||||
|
||||
internal fun formatAadhaar(value: String): String {
|
||||
if (value.length != 12) return value
|
||||
return value.chunked(4).joinToString(" ")
|
||||
}
|
||||
|
||||
internal fun isValidAadhaar(value: String): Boolean {
|
||||
if (value.length != 12 || !value.all { it.isDigit() }) return false
|
||||
var c = 0
|
||||
val reversed = value.reversed()
|
||||
for (i in reversed.indices) {
|
||||
val digit = reversed[i].digitToInt()
|
||||
c = aadhaarMultiplication[c][aadhaarPermutation[i % 8][digit]]
|
||||
}
|
||||
return c == 0
|
||||
}
|
||||
|
||||
private val aadhaarMultiplication = arrayOf(
|
||||
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
||||
intArrayOf(1, 2, 3, 4, 0, 6, 7, 8, 9, 5),
|
||||
intArrayOf(2, 3, 4, 0, 1, 7, 8, 9, 5, 6),
|
||||
intArrayOf(3, 4, 0, 1, 2, 8, 9, 5, 6, 7),
|
||||
intArrayOf(4, 0, 1, 2, 3, 9, 5, 6, 7, 8),
|
||||
intArrayOf(5, 9, 8, 7, 6, 0, 4, 3, 2, 1),
|
||||
intArrayOf(6, 5, 9, 8, 7, 1, 0, 4, 3, 2),
|
||||
intArrayOf(7, 6, 5, 9, 8, 2, 1, 0, 4, 3),
|
||||
intArrayOf(8, 7, 6, 5, 9, 3, 2, 1, 0, 4),
|
||||
intArrayOf(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
)
|
||||
private val aadhaarPermutation = arrayOf(
|
||||
intArrayOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
|
||||
intArrayOf(1, 5, 7, 6, 2, 8, 3, 0, 9, 4),
|
||||
intArrayOf(5, 8, 0, 3, 7, 9, 6, 1, 4, 2),
|
||||
intArrayOf(8, 9, 1, 6, 0, 4, 3, 5, 2, 7),
|
||||
intArrayOf(9, 4, 5, 3, 1, 2, 6, 8, 7, 0),
|
||||
intArrayOf(4, 2, 8, 6, 5, 7, 3, 9, 0, 1),
|
||||
intArrayOf(2, 7, 9, 3, 8, 0, 6, 4, 1, 5),
|
||||
intArrayOf(7, 0, 4, 6, 9, 1, 3, 2, 5, 8)
|
||||
)
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.android.trisolarisserver.component.ai
|
||||
|
||||
import jakarta.annotation.PreDestroy
|
||||
import org.springframework.stereotype.Component
|
||||
import java.util.concurrent.Executors
|
||||
|
||||
@Component
|
||||
class ExtractionQueue {
|
||||
private val executor = Executors.newSingleThreadExecutor { runnable ->
|
||||
Thread(runnable, "doc-extraction-queue").apply { isDaemon = true }
|
||||
}
|
||||
|
||||
fun enqueue(task: () -> Unit) {
|
||||
executor.submit {
|
||||
try {
|
||||
task()
|
||||
} catch (_: Exception) {
|
||||
// Best-effort processing; failures should not crash the worker.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@PreDestroy
|
||||
fun shutdown() {
|
||||
executor.shutdown()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
package com.android.trisolarisserver.component.ai
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import org.springframework.beans.factory.annotation.Value
|
||||
import org.springframework.http.HttpEntity
|
||||
import org.springframework.http.HttpHeaders
|
||||
import org.springframework.http.MediaType
|
||||
import org.springframework.stereotype.Component
|
||||
import org.springframework.web.client.RestTemplate
|
||||
|
||||
@Component
|
||||
class LlamaClient(
|
||||
private val restTemplate: RestTemplate,
|
||||
private val objectMapper: ObjectMapper,
|
||||
@Value("\${ai.llama.baseUrl}")
|
||||
private val baseUrl: String,
|
||||
@Value("\${ai.llama.temperature:0.7}")
|
||||
private val temperature: Double,
|
||||
@Value("\${ai.llama.topP:0.8}")
|
||||
private val topP: Double,
|
||||
@Value("\${ai.llama.minP:0.2}")
|
||||
private val minP: Double,
|
||||
@Value("\${ai.llama.repeatPenalty:1.0}")
|
||||
private val repeatPenalty: Double,
|
||||
@Value("\${ai.llama.topK:40}")
|
||||
private val topK: Int,
|
||||
@Value("\${ai.llama.model}")
|
||||
private val model: String
|
||||
) {
|
||||
private val systemPrompt =
|
||||
"Read extremely carefully. Look only at visible text. " +
|
||||
"Return the exact text you can read verbatim. " +
|
||||
"If the text is unclear, partial, or inferred, return NOT CLEARLY VISIBLE. " +
|
||||
"Do not guess. Do not explain."
|
||||
|
||||
fun ask(imageUrl: String, question: String): String {
|
||||
val payload = mapOf(
|
||||
"model" to model,
|
||||
"temperature" to temperature,
|
||||
"top_p" to topP,
|
||||
"min_p" to minP,
|
||||
"repeat_penalty" to repeatPenalty,
|
||||
"top_k" to topK,
|
||||
"messages" to listOf(
|
||||
mapOf(
|
||||
"role" to "system",
|
||||
"content" to systemPrompt
|
||||
),
|
||||
mapOf(
|
||||
"role" to "user",
|
||||
"content" to listOf(
|
||||
mapOf("type" to "text", "text" to question),
|
||||
mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return post(payload)
|
||||
}
|
||||
|
||||
fun askWithOcr(imageUrl: String, ocrText: String, question: String): String {
|
||||
val payload = mapOf(
|
||||
"model" to model,
|
||||
"temperature" to temperature,
|
||||
"top_p" to topP,
|
||||
"min_p" to minP,
|
||||
"repeat_penalty" to repeatPenalty,
|
||||
"top_k" to topK,
|
||||
"messages" to listOf(
|
||||
mapOf(
|
||||
"role" to "system",
|
||||
"content" to systemPrompt
|
||||
),
|
||||
mapOf(
|
||||
"role" to "user",
|
||||
"content" to listOf(
|
||||
mapOf(
|
||||
"type" to "text",
|
||||
"text" to "${question}\n\nOCR:\n${ocrText}"
|
||||
),
|
||||
mapOf("type" to "image_url", "image_url" to mapOf("url" to imageUrl))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return post(payload)
|
||||
}
|
||||
|
||||
fun askText(content: String, question: String): String {
|
||||
val payload = mapOf(
|
||||
"model" to model,
|
||||
"temperature" to temperature,
|
||||
"top_p" to topP,
|
||||
"min_p" to minP,
|
||||
"repeat_penalty" to repeatPenalty,
|
||||
"top_k" to topK,
|
||||
"messages" to listOf(
|
||||
mapOf(
|
||||
"role" to "system",
|
||||
"content" to systemPrompt
|
||||
),
|
||||
mapOf(
|
||||
"role" to "user",
|
||||
"content" to "${question}\n\nEMAIL:\n${content}"
|
||||
)
|
||||
)
|
||||
)
|
||||
return post(payload)
|
||||
}
|
||||
|
||||
private fun post(payload: Map<String, Any>): String {
|
||||
val headers = HttpHeaders()
|
||||
headers.contentType = MediaType.APPLICATION_JSON
|
||||
val entity = HttpEntity(payload, headers)
|
||||
val response = restTemplate.postForEntity(baseUrl, entity, String::class.java)
|
||||
val body = response.body ?: return ""
|
||||
val node = objectMapper.readTree(body)
|
||||
return node.path("choices").path(0).path("message").path("content").asText()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
package com.android.trisolarisserver.component.ai
|
||||
import com.android.trisolarisserver.component.ai.formatAadhaar
|
||||
import com.android.trisolarisserver.component.ai.isValidAadhaar
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import org.slf4j.LoggerFactory
|
||||
import org.springframework.beans.factory.annotation.Value
|
||||
import org.springframework.core.io.FileSystemResource
|
||||
import org.springframework.http.HttpEntity
|
||||
import org.springframework.http.HttpHeaders
|
||||
import org.springframework.http.MediaType
|
||||
import org.springframework.stereotype.Component
|
||||
import org.springframework.util.LinkedMultiValueMap
|
||||
import org.springframework.web.client.RestTemplate
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
|
||||
@Component
|
||||
class PaddleOcrClient(
|
||||
private val restTemplate: RestTemplate,
|
||||
private val objectMapper: ObjectMapper,
|
||||
@Value("\${ocr.paddle.enabled:false}")
|
||||
private val enabled: Boolean,
|
||||
@Value("\${ocr.paddle.baseUrl:https://ocr.hoteltrisolaris.in}")
|
||||
private val baseUrl: String,
|
||||
@Value("\${ocr.paddle.minScore:0.9}")
|
||||
private val minScore: Double,
|
||||
@Value("\${ocr.paddle.minAverageScore:0.8}")
|
||||
private val minAverageScore: Double,
|
||||
@Value("\${ocr.paddle.minTextLength:4}")
|
||||
private val minTextLength: Int
|
||||
) {
|
||||
private val logger = LoggerFactory.getLogger(PaddleOcrClient::class.java)
|
||||
private val aadhaarRegex = Regex("\\b(?:\\d[\\s-]?){12}\\b")
|
||||
|
||||
fun extract(filePath: String): PaddleOcrResult? {
|
||||
if (!enabled) return null
|
||||
val path = Path.of(filePath)
|
||||
if (!Files.exists(path)) return null
|
||||
|
||||
return try {
|
||||
val sizeBytes = Files.size(path)
|
||||
logger.debug("PaddleOCR extract path={} sizeBytes={}", path, sizeBytes)
|
||||
val output = callOcr(path)
|
||||
val average = averageScore(output.scores)
|
||||
val rawCandidates = extractCandidates(output.texts)
|
||||
val filtered = filterByScore(output.texts, output.scores, minScore, minTextLength)
|
||||
val filteredCandidates = extractCandidates(filtered)
|
||||
val aadhaar = extractAadhaar(filtered)
|
||||
if (rawCandidates.isNotEmpty() || filteredCandidates.isNotEmpty() || aadhaar != null) {
|
||||
logger.debug(
|
||||
"PaddleOCR candidates path={} raw={} filtered={} selected={}",
|
||||
path,
|
||||
rawCandidates.map { maskAadhaar(it) },
|
||||
filteredCandidates.map { maskAadhaar(it) },
|
||||
aadhaar?.let { maskAadhaar(it) }
|
||||
)
|
||||
}
|
||||
val rejected = average != null && average < minAverageScore
|
||||
PaddleOcrResult(filtered, aadhaar, average, rejected)
|
||||
} catch (ex: Exception) {
|
||||
logger.warn("PaddleOCR failed: {}", ex.message)
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun callOcr(path: Path): OcrPayload {
|
||||
val headers = HttpHeaders()
|
||||
headers.contentType = MediaType.MULTIPART_FORM_DATA
|
||||
val body = LinkedMultiValueMap<String, Any>().apply {
|
||||
add("file", FileSystemResource(path.toFile()))
|
||||
}
|
||||
val entity = HttpEntity(body, headers)
|
||||
val response = restTemplate.postForEntity(baseUrl, entity, String::class.java)
|
||||
val raw = response.body ?: return OcrPayload(emptyList(), emptyList())
|
||||
val node = objectMapper.readTree(raw)
|
||||
val texts = node.path("texts")
|
||||
val scores = node.path("scores")
|
||||
if (!texts.isArray) return OcrPayload(emptyList(), emptyList())
|
||||
val parsedTexts = texts.mapNotNull { it.asText(null) }
|
||||
val parsedScores = if (scores.isArray) {
|
||||
scores.mapNotNull { if (it.isNumber) it.asDouble() else null }
|
||||
} else {
|
||||
emptyList()
|
||||
}
|
||||
return OcrPayload(parsedTexts, parsedScores)
|
||||
}
|
||||
|
||||
private fun filterByScore(texts: List<String>, scores: List<Double>, min: Double, minLen: Int): List<String> {
|
||||
if (scores.size != texts.size || scores.isEmpty()) return texts
|
||||
return texts.mapIndexedNotNull { index, text ->
|
||||
if (scores[index] >= min && text.trim().length >= minLen) text else null
|
||||
}
|
||||
}
|
||||
|
||||
private fun averageScore(scores: List<Double>): Double? {
|
||||
if (scores.isEmpty()) return null
|
||||
return scores.sum() / scores.size
|
||||
}
|
||||
|
||||
private fun extractAadhaar(texts: List<String>): String? {
|
||||
val candidates = extractCandidates(texts)
|
||||
val valid = candidates.firstOrNull { isValidAadhaar(it) } ?: return null
|
||||
return formatAadhaar(valid)
|
||||
}
|
||||
|
||||
private fun extractCandidates(texts: List<String>): List<String> {
|
||||
val joined = texts.joinToString(" ")
|
||||
val candidates = mutableListOf<String>()
|
||||
aadhaarRegex.findAll(joined).forEach { match ->
|
||||
val digits = match.value.filter { it.isDigit() }
|
||||
if (digits.length == 12) {
|
||||
candidates.add(digits)
|
||||
}
|
||||
}
|
||||
return candidates
|
||||
}
|
||||
|
||||
private fun maskAadhaar(value: String): String {
|
||||
val digits = value.filter { it.isDigit() }
|
||||
if (digits.length != 12) return value
|
||||
return "XXXXXXXX" + digits.takeLast(4)
|
||||
}
|
||||
}
|
||||
|
||||
data class PaddleOcrResult(
|
||||
val texts: List<String>,
|
||||
val aadhaar: String?,
|
||||
val averageScore: Double?,
|
||||
val rejected: Boolean
|
||||
)
|
||||
|
||||
private data class OcrPayload(
|
||||
val texts: List<String>,
|
||||
val scores: List<Double>
|
||||
)
|
||||
Reference in New Issue
Block a user