Skip to content

Commit

Permalink
add test for table.NewMetadata and AssignFresh* functions
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroshade committed Jan 7, 2025
1 parent 791303f commit abdd1ae
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 4 deletions.
4 changes: 2 additions & 2 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -1216,14 +1216,14 @@ func AssignFreshSchemaIDs(sc *Schema, nextID func() int) (*Schema, error) {

fields := outType.(*StructType).FieldList
var newIdentifierIDs []int
if len(sc.IdentifierFieldIDs) == 0 {
if len(sc.IdentifierFieldIDs) != 0 {
newIdentifierIDs = make([]int, len(sc.IdentifierFieldIDs))
for i, id := range sc.IdentifierFieldIDs {
newIdentifierIDs[i] = visitor.oldIdToNew[id]
}
}

return NewSchemaWithIdentifiers(sc.ID, newIdentifierIDs, fields...), nil
return NewSchemaWithIdentifiers(0, newIdentifierIDs, fields...), nil
}

type SchemaWithPartnerVisitor[T, P any] interface {
Expand Down
26 changes: 26 additions & 0 deletions schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,32 @@ func TestPruneNilSchema(t *testing.T) {
assert.ErrorIs(t, err, iceberg.ErrInvalidArgument)
}

func TestAssignFreshSchemaIDs(t *testing.T) {
startID := 100
sc, err := iceberg.AssignFreshSchemaIDs(tableSchemaNested, func() int {
startID++
return startID
})
require.NoError(t, err)
require.NotNil(t, sc)

startID = 100
var checkID func(iceberg.NestedField)
checkID = func(f iceberg.NestedField) {
startID++
assert.Equal(t, startID, f.ID)
if nested, ok := f.Type.(iceberg.NestedType); ok {
for _, nf := range nested.Fields() {
checkID(nf)
}
}
}

for _, f := range sc.Fields() {
checkID(f)
}
}

func TestSchemaRoundTrip(t *testing.T) {
data, err := json.Marshal(tableSchemaNested)
require.NoError(t, err)
Expand Down
6 changes: 5 additions & 1 deletion table/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,11 @@ func (m *metadataV2) UnmarshalJSON(b []byte) error {

const DefaultFormatVersion = 2

func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties, tableUuid uuid.UUID) (Metadata, error) {
func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties) (Metadata, error) {
return NewMetadataWithUUID(sc, partitions, sortOrder, location, props, uuid.Nil)
}

func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties, tableUuid uuid.UUID) (Metadata, error) {
freshSchema, err := iceberg.AssignFreshSchemaIDs(sc, nil)
if err != nil {
return nil, err
Expand Down
115 changes: 115 additions & 0 deletions table/metadata_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,3 +491,118 @@ func TestV1WriteMetadataToV2(t *testing.T) {
assert.NotContains(t, rawData, "schema")
assert.NotContains(t, rawData, "partition-spec")
}

func TestNewMetadataWithExplicitV1Format(t *testing.T) {
schema := iceberg.NewSchemaWithIdentifiers(10,
[]int{22},
iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false},
iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false},
)

partitionSpec := iceberg.NewPartitionSpecID(10,
iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"})

sortOrder := SortOrder{
OrderID: 10,
Fields: []SortField{{
SourceID: 10,
Transform: iceberg.IdentityTransform{},
Direction: SortASC, NullOrder: NullsLast}}}

actual, err := NewMetadata(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", iceberg.Properties{"format-version": "1"})
require.NoError(t, err)

expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2},
iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String},
iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool})

expectedSpec := iceberg.NewPartitionSpec(
iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"})

expectedSortOrder := SortOrder{
OrderID: 1,
Fields: []SortField{{
SourceID: 1, Transform: iceberg.IdentityTransform{},
Direction: SortASC, NullOrder: NullsLast}}}

lastPartitionID := 1000
expected := &metadataV1{
commonMetadata: commonMetadata{
Loc: "s3://some_v1_location/",
UUID: actual.TableUUID(),
LastUpdatedMS: actual.LastUpdatedMillis(),
LastColumnId: 3,
SchemaList: []*iceberg.Schema{expectedSchema},
CurrentSchemaID: 0,
Specs: []iceberg.PartitionSpec{expectedSpec},
DefaultSpecID: 0,
LastPartitionID: &lastPartitionID,
SortOrderList: []SortOrder{expectedSortOrder},
DefaultSortOrderID: 1,
FormatVersion: 1,
},
Schema: expectedSchema,
Partition: slices.Collect(expectedSpec.Fields()),
}

assert.Truef(t, expected.Equals(actual), "expected: %s\ngot: %s", expected, actual)
}

func TestNewMetadataV2Format(t *testing.T) {
schema := iceberg.NewSchemaWithIdentifiers(10,
[]int{22},
iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false},
iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false},
)

partitionSpec := iceberg.NewPartitionSpecID(10,
iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"})

sortOrder := SortOrder{
OrderID: 10,
Fields: []SortField{{
SourceID: 10,
Transform: iceberg.IdentityTransform{},
Direction: SortASC, NullOrder: NullsLast}}}

tableUUID := uuid.New()

actual, err := NewMetadataWithUUID(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", nil, tableUUID)
require.NoError(t, err)

expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2},
iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String},
iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool})

expectedSpec := iceberg.NewPartitionSpec(
iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"})

expectedSortOrder := SortOrder{
OrderID: 1,
Fields: []SortField{{
SourceID: 1, Transform: iceberg.IdentityTransform{},
Direction: SortASC, NullOrder: NullsLast}}}

lastPartitionID := 1000
expected := &metadataV2{
commonMetadata: commonMetadata{
Loc: "s3://some_v1_location/",
UUID: tableUUID,
LastUpdatedMS: actual.LastUpdatedMillis(),
LastColumnId: 3,
SchemaList: []*iceberg.Schema{expectedSchema},
CurrentSchemaID: 0,
Specs: []iceberg.PartitionSpec{expectedSpec},
DefaultSpecID: 0,
LastPartitionID: &lastPartitionID,
SortOrderList: []SortOrder{expectedSortOrder},
DefaultSortOrderID: 1,
FormatVersion: 2,
}}

assert.Truef(t, expected.Equals(actual), "expected: %s\ngot: %s", expected, actual)
}
2 changes: 1 addition & 1 deletion table/sorting.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,5 +203,5 @@ func AssignFreshSortOrderIDsWithID(sortOrder SortOrder, old, fresh *iceberg.Sche
NullOrder: field.NullOrder,
})
}
return SortOrder{OrderID: sortOrderID, Fields: fields}, nil
return SortOrder{OrderID: 1, Fields: fields}, nil
}

0 comments on commit abdd1ae

Please sign in to comment.