From db6ea5d529ea437f193afb5b5c1892f63e409600 Mon Sep 17 00:00:00 2001 From: androidlover5842 Date: Sat, 31 Jan 2026 12:46:48 +0530 Subject: [PATCH] Normalize extracted address fields --- .../component/DocumentExtractionService.kt | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt index 9799294..29c7983 100644 --- a/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt +++ b/src/main/kotlin/com/android/trisolarisserver/component/DocumentExtractionService.kt @@ -244,6 +244,7 @@ class DocumentExtractionService( normalizePinCode(results) normalizeIdNumber(results) markAadhaarIfValid(results) + normalizeAddress(results) applyBookingCityUpdates(document, results) results["docType"] = computeDocType(results, handled) applyGuestUpdates(document, propertyId, results) @@ -282,6 +283,13 @@ class DocumentExtractionService( } } + private fun normalizeAddress(results: MutableMap) { + val key = DocumentPrompts.ADDRESS.first + val raw = cleanedValue(results[key]) ?: return + val normalized = cleanAddress(raw) ?: return + results[key] = normalized + } + private fun markAadhaarIfValid(results: MutableMap) { val idKey = DocumentPrompts.ID_NUMBER.first val digits = normalizeDigits(cleanedValue(results[idKey])) @@ -519,3 +527,54 @@ private fun isValidPin(value: String?): Boolean { if (value.isNullOrBlank()) return false return pinCodeRegex.matches(value) } + +private fun cleanAddress(raw: String): String? { + val relationRegex = Regex("^\\s*(S/O|D/O|W/O|C/O|H/O|F/O)\\b", RegexOption.IGNORE_CASE) + val prefixRegexes = listOf( + Regex("^\\s*ADDRESS\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*HOUSE\\s*/\\s*BLDG\\.?\\s*/\\s*APT\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*HOUSE-?BLDG\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*HOUSE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*H\\s*NO\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*HOUSE\\s*NO\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*STREET/ROAD/LANE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*STREET\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*ROAD\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*LANE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*AREA\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*SECTOR\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*COLONY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*VILLAGE/TOWN/CITY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*VILLAGE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*TOWN\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*CITY\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*P\\.?\\s*O\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*POST\\s*OFFICE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*P\\.?\\s*DIST\\.?\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*DISTRICT\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*DIST\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*STATE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*PIN\\s*CODE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*PINCODE\\b[:\\-\\s]*", RegexOption.IGNORE_CASE), + Regex("^\\s*PIN\\b[:\\-\\s]*", RegexOption.IGNORE_CASE) + ) + val dropPhrases = setOf( + "address", "addr", "area", "area was", "street/road/lane", "village/town/city", "colony" + ) + val parts = raw.replace("\n", ",").split(",") + val cleanedParts = mutableListOf() + for (part in parts) { + var value = part.trim() + if (value.isBlank()) continue + if (relationRegex.containsMatchIn(value)) continue + for (regex in prefixRegexes) { + value = regex.replace(value, "").trim() + } + value = value.replace(Regex("\\s+"), " ").trim() + if (value.isBlank()) continue + if (value.length < 3) continue + if (dropPhrases.contains(value.lowercase())) continue + cleanedParts.add(value) + } + return if (cleanedParts.isEmpty()) null else cleanedParts.joinToString(", ") +}