From 65c3f5f9f408b8f2d1e96305e99ce59c1b64a039 Mon Sep 17 00:00:00 2001
From: Aaron J Todd <todaaron@amazon.com>
Date: Tue, 10 Oct 2023 17:02:05 -0400
Subject: [PATCH] update case utils with failing tests

---
 .../smithy/kotlin/codegen/utils/CaseUtils.kt  | 101 +++++++++++++++---
 .../smithy/kotlin/codegen/core/NamingTest.kt  |  55 +++++++++-
 2 files changed, 136 insertions(+), 20 deletions(-)
diff --git a/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt b/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt
index 0dd7bef0a6..be77ef55ee 100644
--- a/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt
+++ b/codegen/smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/utils/CaseUtils.kt
@@ -8,32 +8,99 @@ package software.amazon.smithy.kotlin.codegen.utils
  * Split a string on word boundaries
  */
 fun String.splitOnWordBoundaries(): List<String> {
-    // adapted from Java v2 SDK CodegenNamingUtils.splitOnWordBoundaries
-    var result = this
+    // This is taken from Rust: https://github.com/awslabs/smithy-rs/pull/3037/files#diff-4175c66ee81a450fcf1cd3e256f36ae2c8e0b30b910be8ca505135fbe215144d
+    // previously we used the Java v2 implementation https://github.com/aws/aws-sdk-java-v2/blob/2.20.162/utils/src/main/java/software/amazon/awssdk/utils/internal/CodegenNamingUtils.java#L36
+    // but this has some edge cases it doesn't handle well
+    val out = mutableListOf<String>()
+    // These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
+    val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl")
+    var currentWord = ""
 
-    // all non-alphanumeric characters: "acm-success"-> "acm success"
-    result = result.replace(Regex("[^A-Za-z0-9+]"), " ")
+    // emit the current word and update from the next character
+    val emit = { next: Char ->
+        if (currentWord.isNotEmpty()) {
+            out += currentWord.lowercase()
+        }
+        currentWord = if (next.isLetterOrDigit()) {
+            next.toString()
+        } else {
+            ""
+        }
+    }
+    val allLowerCase = this.lowercase() == this
+    this.forEachIndexed { index, nextCharacter ->
+        val peek = this.getOrNull(index + 1)
+        val doublePeek = this.getOrNull(index + 2)
+        val completeWordInProgress = completeWords.any {
+            (currentWord + this.substring(index)).lowercase().startsWith(
+                it,
+            )
+        } && !completeWords.contains(currentWord.lowercase())
+        when {
+            // [C] in these docs indicates the value of nextCharacter
+            // A[_]B
+            !nextCharacter.isLetterOrDigit() -> emit(nextCharacter)
 
-    // if a number has a standalone v in front of it, separate it out
-    result = result.replace(Regex("([^a-z]{2,})v([0-9]+)"), "$1 v$2 ") // TESTv4 -> "TEST v4 "
-        .replace(Regex("([^A-Z]{2,})V([0-9]+)"), "$1 V$2 ") // TestV4 -> "Test V4 "
+            // If we have no letters so far, push the next letter (we already know it's a letter or digit)
+            currentWord.isEmpty() -> currentWord += nextCharacter.toString()
 
-    // add a space between camelCased words
-    result = result.split(Regex("(?<=[a-z])(?=[A-Z]([a-zA-Z]|[0-9]))")).joinToString(separator = " ") // AcmSuccess -> // "Acm Success"
+            // Abc[D]ef or Ab2[D]ef
+            !completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)
 
-    // add a space after acronyms
-    result = result.replace(Regex("([A-Z]+)([A-Z][a-z])"), "$1 $2") // "ACMSuccess" -> "ACM Success"
+            // s3[k]ey
+            !completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
+                nextCharacter,
+            )
 
-    // add space after a number in the middle of a word
-    result = result.replace(Regex("([0-9])([a-zA-Z])"), "$1 $2") // "s3ec2" -> "s3 ec2"
+            // DB[P]roxy, or `IAM[U]ser` but not AC[L]s
+            endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter)
 
-    // remove extra spaces - multiple consecutive ones or those and the beginning/end of words
-    result = result.replace(Regex("\\s+"), " ") // "Foo  Bar" -> "Foo Bar"
-        .trim() // " Foo " -> "Foo"
+            // If we haven't found a word boundary, push it and keep going
+            else -> currentWord += nextCharacter.toString()
+        }
+    }
+    if (currentWord.isNotEmpty()) {
+        out += currentWord
+    }
+    return out
+}
+
+/**
+ * Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s`
+ */
+private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
+    if (!current.last().isUpperCase()) {
+        // Not an acronym in progress
+        return false
+    }
+    if (!nextChar.isUpperCase()) {
+        // We aren't at the next word yet
+        return false
+    }
+
+    if (peek?.isLowerCase() != true) {
+        return false
+    }
 
-    return result.split(" ")
+    // Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
+    if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) {
+        return false
+    }
+
+    // Skip cases like `DynamoD[B]v2`
+    return !(peek == 'v' && doublePeek?.isDigit() == true)
+}
+
+private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean {
+    if (!nextChar.isUpperCase()) {
+        return false
+    }
+    return current.last().isLowerCase() || current.last().isDigit()
 }
 
+private fun digitFollowedByLower(current: String, nextChar: Char): Boolean =
+    (current.last().isDigit() && nextChar.isLowerCase())
+
 /**
  * Convert a string to `PascalCase` (uppercase start with upper case word boundaries)
  */
diff --git a/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt b/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt
index 9028c0b689..251d2c5914 100644
--- a/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt
+++ b/codegen/smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/core/NamingTest.kt
@@ -119,12 +119,18 @@ class NamingTest {
             "arm64" to "Arm64",
         )
 
+        val errors = mutableListOf<String>()
         tests.forEach { (input, expected) ->
             // NOTE: a lot of these are not valid names according to the Smithy spec but since
             // we still allow deriving a name from the enum value we want to verify what _would_ happen
             // should we encounter these inputs
             val actual = input.enumVariantName()
-            assertEquals(expected, actual, "input: $input")
+            if (expected != actual) {
+                errors += "expected '$expected' != actual '$actual' for input: $input"
+            }
+        }
+        if (errors.isNotEmpty()) {
+            fail(errors.joinToString("\n"))
         }
     }
 
@@ -174,6 +180,49 @@ class NamingTest {
         assertNotEquals(all, firstMember)
         assertNotEquals(firstMember, secondMember)
     }
+
+    @Test
+    fun testCamelCase() {
+        val tests = listOf(
+            "ACLs" to "acls",
+            "ACLsUpdateStatus" to "aclsUpdateStatus",
+            "AllowedAllVPCs" to "allowedAllVpcs",
+            "BluePrimaryX" to "bluePrimaryX",
+            "CIDRs" to "cidrs",
+            "AuthTtL" to "authTtl",
+            "CNAMEPrefix" to "cnamePrefix",
+            "S3Location" to "s3Location",
+            "signatureS" to "signatureS",
+            "signatureR" to "signatureR",
+            "M3u8Settings" to "m3u8Settings",
+            "IAMUser" to "iamUser",
+            "OtaaV1_0_x" to "otaaV10X",
+            "DynamoDBv2Action" to "dynamoDbv2Action",
+            "SessionKeyEmv2000" to "sessionKeyEmv2000",
+            "SupportsClassB" to "supportsClassB",
+            "UnassignIpv6AddressesRequest" to "unassignIpv6AddressesRequest",
+            "TotalGpuMemoryInMiB" to "totalGpuMemoryInMib",
+            "WriteIOs" to "writeIos",
+            "dynamoDBv2" to "dynamoDbv2",
+            "ipv4Address" to "ipv4Address",
+            "sigv4" to "sigv4",
+            "s3key" to "s3Key",
+            "sha256sum" to "sha256Sum",
+            "Av1QvbrSettings" to "av1QvbrSettings",
+            "Av1Settings" to "av1Settings",
+            "AwsElbv2LoadBalancer" to "awsElbv2LoadBalancer",
+            "SigV4Authorization" to "sigv4Authorization",
+            "IpV6Address" to "ipv6Address",
+            "IpV6Cidr" to "ipv6Cidr",
+            "IpV4Addresses" to "ipv4Addresses",
+        )
+
+        tests.forEach { (input, expected) ->
+            val actual = input.toCamelCase()
+            assertEquals(expected, actual, "input: $input")
+        }
+    }
+
     @Test
     fun testAllNames() {
         // Set this to true to write a new test expectation file
@@ -186,7 +235,7 @@ class NamingTest {
             val input = split[0]
             val expectation = split[1]
             val actual = input.toCamelCase()
-            if (input.toCamelCase() != expectation) {
+            if (actual != expectation) {
                 errors += "$it => $actual (expected $expectation)"
             }
             output.appendLine("$input,$actual")
@@ -212,7 +261,7 @@ class NamingTest {
             val input = split[0]
             val expectation = split[1]
             val actual = clientName(input)
-            if (input.toCamelCase() != expectation) {
+            if (actual != expectation) {
                 errors += "$it => $actual (expected $expectation)"
             }
             output.appendLine("$input,$actual")