-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix!: improve case utils for more optimal member names #975
Changes from 4 commits
85e8ca9
b13a341
4aa1928
8b42d62
5e9f6a9
d9cd9d7
8666f18
207cdcf
ad0a1d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"id": "b707af45-8aa9-4f88-8fce-b8ce51770c25", | ||
"type": "misc", | ||
"description": "**BREAKING**: refactor CaseUtils to better deal with plurals and other edge cases." | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,32 +8,109 @@ package software.amazon.smithy.kotlin.codegen.utils | |
* Split a string on word boundaries | ||
*/ | ||
fun String.splitOnWordBoundaries(): List<String> { | ||
// adapted from Java v2 SDK CodegenNamingUtils.splitOnWordBoundaries | ||
var result = this | ||
// This is taken from Rust: https://github.com/awslabs/smithy-rs/pull/3037/files#diff-4175c66ee81a450fcf1cd3e256f36ae2c8e0b30b910be8ca505135fbe215144d | ||
// with minor changes (s3 and iot as whole words). Previously we used the Java v2 implementation | ||
// https://github.com/aws/aws-sdk-java-v2/blob/2.20.162/utils/src/main/java/software/amazon/awssdk/utils/internal/CodegenNamingUtils.java#L36 | ||
// but this has some edge cases it doesn't handle well | ||
val out = mutableListOf<String>() | ||
// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL` | ||
val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl", "iot", "s3") | ||
var currentWord = "" | ||
|
||
// all non-alphanumeric characters: "acm-success"-> "acm success" | ||
result = result.replace(Regex("[^A-Za-z0-9+]"), " ") | ||
// emit the current word and update from the next character | ||
val emit = { next: Char -> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this lambda function definitely seems Rusty |
||
if (currentWord.isNotEmpty()) { | ||
out += currentWord.lowercase() | ||
} | ||
currentWord = if (next.isLetterOrDigit()) { | ||
next.toString() | ||
} else { | ||
"" | ||
} | ||
} | ||
|
||
// if a number has a standalone v in front of it, separate it out | ||
result = result.replace(Regex("([^a-z]{2,})v([0-9]+)"), "$1 v$2 ") // TESTv4 -> "TEST v4 " | ||
.replace(Regex("([^A-Z]{2,})V([0-9]+)"), "$1 V$2 ") // TestV4 -> "Test V4 " | ||
val allLowerCase = lowercase() == this | ||
forEachIndexed { index, nextChar -> | ||
val peek = getOrNull(index + 1) | ||
val doublePeek = getOrNull(index + 2) | ||
val completeWordInProgress = completeWords.any { | ||
(currentWord + substring(index)).lowercase().startsWith(it) | ||
} && !completeWords.contains(currentWord.lowercase()) | ||
|
||
// add a space between camelCased words | ||
result = result.split(Regex("(?<=[a-z])(?=[A-Z]([a-zA-Z]|[0-9]))")).joinToString(separator = " ") // AcmSuccess -> // "Acm Success" | ||
when { | ||
// [C] in these docs indicates the value of nextCharacter | ||
// A[_]B | ||
!nextChar.isLetterOrDigit() -> emit(nextChar) | ||
|
||
// add a space after acronyms | ||
result = result.replace(Regex("([A-Z]+)([A-Z][a-z])"), "$1 $2") // "ACMSuccess" -> "ACM Success" | ||
// If we have no letters so far, push the next letter (we already know it's a letter or digit) | ||
currentWord.isEmpty() -> currentWord += nextChar.toString() | ||
|
||
// add space after a number in the middle of a word | ||
result = result.replace(Regex("([0-9])([a-zA-Z])"), "$1 $2") // "s3ec2" -> "s3 ec2" | ||
// Abc[D]ef or Ab2[D]ef | ||
!completeWordInProgress && loweredFollowedByUpper(currentWord, nextChar) -> emit(nextChar) | ||
|
||
// remove extra spaces - multiple consecutive ones or those and the beginning/end of words | ||
result = result.replace(Regex("\\s+"), " ") // "Foo Bar" -> "Foo Bar" | ||
.trim() // " Foo " -> "Foo" | ||
// s3[k]ey | ||
!completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextChar) -> emit(nextChar) | ||
|
||
return result.split(" ") | ||
// DB[P]roxy, or `IAM[U]ser` but not AC[L]s | ||
endOfAcronym(currentWord, nextChar, peek, doublePeek) -> emit(nextChar) | ||
|
||
// emit complete words | ||
!completeWordInProgress && completeWords.contains(currentWord.lowercase()) -> emit(nextChar) | ||
|
||
// If we haven't found a word boundary, push it and keep going | ||
else -> currentWord += nextChar.toString() | ||
} | ||
} | ||
if (currentWord.isNotEmpty()) { | ||
out += currentWord | ||
} | ||
|
||
return out | ||
} | ||
|
||
/** | ||
* Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s` | ||
*/ | ||
private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean { | ||
if (!current.last().isUpperCase()) { | ||
// Not an acronym in progress | ||
return false | ||
} | ||
if (!nextChar.isUpperCase()) { | ||
// We aren't at the next word yet | ||
return false | ||
} | ||
|
||
if (peek?.isLowerCase() != true) { | ||
return false | ||
} | ||
|
||
// Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser` | ||
if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
return false | ||
} | ||
|
||
// Skip cases like `DynamoD[B]v2` | ||
return !(peek == 'v' && doublePeek?.isDigit() == true) | ||
Comment on lines
+99
to
+100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correctness/Question: Why do we want to skip cases like this? It seems to me that "DynamoDBv2" in this case represents the words There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing this results in: Member Names
Enum Names
Client Names
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, turns out I didn't fully understand the if (nextChar == 'v' && peek?.isDigit() == true) {
// Handle cases like `DynamoDB[v]2`
return true
} This yields Full function codeprivate fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
if (!current.last().isUpperCase()) {
// Not an acronym in progress
return false
}
if (nextChar == 'v' && peek?.isDigit() == true) {
// Handle cases like `DynamoDB[v]2`
return true
}
if (!nextChar.isUpperCase()) {
// We aren't at the next word yet
return false
}
if (peek?.isLowerCase() != true) {
return false
}
// Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
if (peek == 's' && doublePeek?.isLowerCase() != true) {
return false
}
// Skip cases like `DynamoD[B]v2`
return !(peek == 'v' && doublePeek?.isDigit() == true)
} |
||
} | ||
|
||
private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean { | ||
if (!nextChar.isUpperCase()) { | ||
return false | ||
} | ||
return current.last().isLowerCase() || current.last().isDigit() | ||
} | ||
|
||
private fun loweredFollowedByDigit(current: String, nextChar: Char): Boolean { | ||
if (!nextChar.isDigit()) { | ||
return false | ||
} | ||
return current.last().isLowerCase() | ||
} | ||
|
||
private fun digitFollowedByLower(current: String, nextChar: Char): Boolean = | ||
(current.last().isDigit() && nextChar.isLowerCase()) | ||
|
||
/** | ||
* Convert a string to `PascalCase` (uppercase start with upper case word boundaries) | ||
*/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,11 +8,14 @@ package software.amazon.smithy.kotlin.codegen.core | |
import software.amazon.smithy.kotlin.codegen.model.expectShape | ||
import software.amazon.smithy.kotlin.codegen.test.prependNamespaceAndService | ||
import software.amazon.smithy.kotlin.codegen.test.toSmithyModel | ||
import software.amazon.smithy.kotlin.codegen.utils.toCamelCase | ||
import software.amazon.smithy.model.shapes.MemberShape | ||
import software.amazon.smithy.model.shapes.ShapeId | ||
import java.io.File | ||
import kotlin.test.Test | ||
import kotlin.test.assertEquals | ||
import kotlin.test.assertNotEquals | ||
import kotlin.test.fail | ||
|
||
class NamingTest { | ||
|
||
|
@@ -24,7 +27,7 @@ class NamingTest { | |
assertEquals("ElastiCache", clientName("ElastiCache")) | ||
assertEquals("ApiGatewayManagement", clientName("ApiGatewayManagementApi")) | ||
assertEquals("MigrationHubConfig", clientName("MigrationHub Config")) | ||
assertEquals("IoTFleetHub", clientName("IoTFleetHub")) | ||
assertEquals("IotFleetHub", clientName("IoTFleetHub")) | ||
assertEquals("Iot1ClickProjects", clientName("IoT 1Click Projects")) | ||
assertEquals("DynamoDb", clientName("DynamoDB")) | ||
|
||
|
@@ -45,8 +48,8 @@ class NamingTest { | |
"ACL" to "acl", | ||
"ACLList" to "aclList", | ||
"fooey" to "fooey", | ||
"stringA" to "stringa", | ||
"StringA" to "stringa", | ||
"stringA" to "stringA", | ||
"StringA" to "stringA", | ||
) | ||
|
||
tests.forEach { (input, expected) -> | ||
|
@@ -75,30 +78,30 @@ class NamingTest { | |
"application/vnd.amazonaws.card.generic" to "ApplicationVndAmazonawsCardGeneric", | ||
"IPV4" to "Ipv4", | ||
"ipv4" to "Ipv4", | ||
"IPv4" to "IpV4", | ||
"ipV4" to "IpV4", | ||
"IPv4" to "Ipv4", | ||
"ipV4" to "Ipv4", | ||
"IPMatch" to "IpMatch", | ||
"S3" to "S3", | ||
"EC2Instance" to "Ec2Instance", | ||
"aws.config" to "AwsConfig", | ||
"AWS::EC2::CustomerGateway" to "AwsEc2CustomerGateway", | ||
"application/pdf" to "ApplicationPdf", | ||
"ADConnector" to "AdConnector", | ||
"MS-CHAPv1" to "MsChapV1", | ||
"MS-CHAPv1" to "MsChapv1", | ||
"One-Way: Outgoing" to "OneWayOutgoing", | ||
"scram_sha_1" to "ScramSha1", | ||
"EC_prime256v1" to "EcPrime256V1", | ||
"EC_prime256v1" to "EcPrime256v1", | ||
"EC_PRIME256V1" to "EcPrime256V1", | ||
"EC2v11.4" to "Ec2V11_4", | ||
"EC2v11.4" to "Ec2v11_4", | ||
"nodejs4.3-edge" to "Nodejs4_3_Edge", | ||
"BUILD_GENERAL1_SMALL" to "BuildGeneral1Small", | ||
"SSE_S3" to "SseS3", | ||
"http1.1" to "Http1_1", | ||
"T100" to "T100", | ||
"s3:ObjectCreated:*" to "S3ObjectCreated", | ||
"s3:ObjectCreated:Put" to "S3ObjectCreatedPut", | ||
"TLSv1" to "TlsV1", | ||
"TLSv1.2" to "TlsV1_2", | ||
"TLSv1" to "Tlsv1", | ||
"TLSv1.2" to "Tlsv1_2", | ||
"us-east-1" to "UsEast1", | ||
"io1" to "Io1", | ||
"testNESTEDAcronym" to "TestNestedAcronym", | ||
|
@@ -116,12 +119,18 @@ class NamingTest { | |
"arm64" to "Arm64", | ||
) | ||
|
||
val errors = mutableListOf<String>() | ||
tests.forEach { (input, expected) -> | ||
// NOTE: a lot of these are not valid names according to the Smithy spec but since | ||
// we still allow deriving a name from the enum value we want to verify what _would_ happen | ||
// should we encounter these inputs | ||
val actual = input.enumVariantName() | ||
assertEquals(expected, actual, "input: $input") | ||
if (expected != actual) { | ||
errors += "expected '$expected' != actual '$actual' for input: $input" | ||
} | ||
} | ||
if (errors.isNotEmpty()) { | ||
fail(errors.joinToString("\n")) | ||
} | ||
} | ||
|
||
|
@@ -171,4 +180,97 @@ class NamingTest { | |
assertNotEquals(all, firstMember) | ||
assertNotEquals(firstMember, secondMember) | ||
} | ||
|
||
@Test | ||
fun testCamelCase() { | ||
val tests = listOf( | ||
"ACLs" to "acls", | ||
"ACLsUpdateStatus" to "aclsUpdateStatus", | ||
"AllowedAllVPCs" to "allowedAllVpcs", | ||
"BluePrimaryX" to "bluePrimaryX", | ||
"CIDRs" to "cidrs", | ||
"AuthTtL" to "authTtl", | ||
"CNAMEPrefix" to "cnamePrefix", | ||
"S3Location" to "s3Location", | ||
"signatureS" to "signatureS", | ||
"signatureR" to "signatureR", | ||
"M3u8Settings" to "m3u8Settings", | ||
"IAMUser" to "iamUser", | ||
"OtaaV1_0_x" to "otaaV10X", | ||
"DynamoDBv2Action" to "dynamoDbv2Action", | ||
"SessionKeyEmv2000" to "sessionKeyEmv2000", | ||
"SupportsClassB" to "supportsClassB", | ||
"UnassignIpv6AddressesRequest" to "unassignIpv6AddressesRequest", | ||
"TotalGpuMemoryInMiB" to "totalGpuMemoryInMib", | ||
"WriteIOs" to "writeIos", | ||
"dynamoDBv2" to "dynamoDbv2", | ||
"ipv4Address" to "ipv4Address", | ||
"sigv4" to "sigv4", | ||
"s3key" to "s3Key", | ||
"sha256sum" to "sha256Sum", | ||
"Av1QvbrSettings" to "av1QvbrSettings", | ||
"Av1Settings" to "av1Settings", | ||
"AwsElbv2LoadBalancer" to "awsElbv2LoadBalancer", | ||
"SigV4Authorization" to "sigv4Authorization", | ||
"IpV6Address" to "ipv6Address", | ||
"IpV6Cidr" to "ipv6Cidr", | ||
"IpV4Addresses" to "ipv4Addresses", | ||
) | ||
|
||
tests.forEach { (input, expected) -> | ||
val actual = input.toCamelCase() | ||
assertEquals(expected, actual, "input: $input") | ||
} | ||
} | ||
|
||
@Test | ||
fun testAllNames() { | ||
// Set this to true to write a new test expectation file | ||
val publishUpdate = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we just always have this enabled? |
||
val allNames = this::class.java.getResource("/all-names-test-output.txt")?.readText()!! | ||
val errors = mutableListOf<String>() | ||
val output = StringBuilder() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add a header to the file |
||
allNames.lines().filter { it.isNotBlank() }.forEach { | ||
val split = it.split(',') | ||
val input = split[0] | ||
val expectation = split[1] | ||
val actual = input.toCamelCase() | ||
if (actual != expectation) { | ||
errors += "$it => $actual (expected $expectation)" | ||
} | ||
output.appendLine("$input,$actual") | ||
} | ||
if (publishUpdate) { | ||
File("all-names-test-output.txt").writeText(output.toString()) | ||
} | ||
if (errors.isNotEmpty()) { | ||
fail(errors.joinToString("\n")) | ||
} | ||
} | ||
|
||
@Test | ||
fun testClientNames() { | ||
// jq '.. | select(.sdkId?).sdkId' codegen/sdk/aws-models/*.json > /tmp/sdk-ids.txt | ||
// Set this to true to write a new test expectation file | ||
val publishUpdate = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment about whether this should be configurable or not |
||
val allNames = this::class.java.getResource("/sdk-ids-test-output.txt")?.readText()!! | ||
val errors = mutableListOf<String>() | ||
val output = StringBuilder() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also same comment about file header, it makes it easier to know what's what |
||
allNames.lines().filter { it.isNotBlank() }.forEach { | ||
val split = it.split(',') | ||
val input = split[0] | ||
val expectation = split[1] | ||
val actual = clientName(input) | ||
if (actual != expectation) { | ||
errors += "$it => $actual (expected $expectation)" | ||
} | ||
output.appendLine("$input,$actual") | ||
} | ||
if (publishUpdate) { | ||
File("sdk-ids-test-output.txt").writeText(output.toString()) | ||
} | ||
if (errors.isNotEmpty()) { | ||
fail(errors.joinToString("\n")) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correctness/Question: Should we really publish the update if there are errors? Or should the error check (and potential There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the update doesn't overwrite the file, it ends up generated at the root of the project so you can diff them |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Style: Could be replaced with
return buildList { ... }
.