diff --git a/modules/underdog-dataframe/src/main/groovy/underdog/DataFrame.groovy b/modules/underdog-dataframe/src/main/groovy/underdog/DataFrame.groovy index fc2a9af..9a9b06a 100644 --- a/modules/underdog-dataframe/src/main/groovy/underdog/DataFrame.groovy +++ b/modules/underdog-dataframe/src/main/groovy/underdog/DataFrame.groovy @@ -12,6 +12,24 @@ import java.util.function.Function * @since 0.1.0 */ interface DataFrame extends Columnar { + + /** + * Make a copy of this object’s indices and data. + * + * @return a new instance of {@link DataFrame} + * @since 0.1.0 + */ + DataFrame copy() + + /** + * Fill NA/NaN values using the specified value passed as parameter + * + * @param value the value to replace the NA/Nan values with + * @return the dataframe with replaced values + * @since 0.1.0 + */ + DataFrame fillna(Object value) + /** * The transpose of the DataFrame. * diff --git a/modules/underdog-dataframe/src/main/groovy/underdog/Series.groovy b/modules/underdog-dataframe/src/main/groovy/underdog/Series.groovy index 2b0d866..4cea914 100644 --- a/modules/underdog-dataframe/src/main/groovy/underdog/Series.groovy +++ b/modules/underdog-dataframe/src/main/groovy/underdog/Series.groovy @@ -84,6 +84,15 @@ interface Series extends Columnar, Iterable { */ Series dropna() + /** + * Fill NA/NaN values using the specified value + * + * @param value the value to replace the NA/Nan values with + * @return the series instance with replaced values + * @since 0.1.0 + */ + Series fillna(Object value) + /** * @param index * @return diff --git a/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSDataFrame.groovy b/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSDataFrame.groovy index 7cde1d9..f2e44da 100644 --- a/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSDataFrame.groovy +++ b/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSDataFrame.groovy @@ -76,6 +76,26 @@ class TSDataFrame implements DataFrame { return new TSDataFrame(Table.create(dataFrameName, columns)) } + @Override + DataFrame copy() { + return new TSDataFrame(table.copy()) + } + + @Override + DataFrame fillna(Object o) { + Table copied = table.copy() + copied.columns().each { + if (it.type() == ColumnType.DOUBLE && o.toString().isNumber() && o.toString().isLong()) { + it.setMissingTo(o.toString().toDouble()) + } else if (it.type() == ColumnType.STRING) { + it.setMissingTo(o.toString()) + } else { + it.setMissingTo(o) + } + } + return new TSDataFrame(copied) + } + @Override DataFrame getT() { return new TSDataFrame(this.table.transpose()) @@ -487,7 +507,7 @@ class TSDataFrame implements DataFrame { @NamedVariant Series min(TypeAxis axisType) { return null -min } + } @Override DataFrame minus(Series series) { diff --git a/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSSeries.groovy b/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSSeries.groovy index de97635..d9a8263 100644 --- a/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSSeries.groovy +++ b/modules/underdog-dataframe/src/main/groovy/underdog/impl/TSSeries.groovy @@ -206,6 +206,11 @@ class TSSeries implements Series { return new TSSeries(column.removeMissing()) } + @Override + Series fillna(Object o) { + return new TSSeries(column.setMissingTo(o)) + } + @Override Series lag(int index) { return new TSSeries(this.column.lag(index)) diff --git a/modules/underdog-dataframe/src/test/groovy/underdog/DataFrameSpec.groovy b/modules/underdog-dataframe/src/test/groovy/underdog/DataFrameSpec.groovy index 3756e06..a09f070 100644 --- a/modules/underdog-dataframe/src/test/groovy/underdog/DataFrameSpec.groovy +++ b/modules/underdog-dataframe/src/test/groovy/underdog/DataFrameSpec.groovy @@ -480,4 +480,38 @@ class DataFrameSpec extends BaseSpec { columns == ['id', 'weight'] weights == [11, 22, 303, 604, 1005] as int[] } + + def "[DataFrame/fillna]: fixed value all numeric series"() { + setup: + def df = [ + A: [1, 2, null, 3, 4], + B: [1.0, 2.0, 3.0, null, 4.0], + C: ((1..4).collect { new BigDecimal(it) } + [null]) + ].toDataFrame("coercing") + + when: + df = df.fillna(-1) + + then: + df['A'].toList() == [1, 2, -1, 3, 4] + df['B'].toList() == [1.0, 2.0, 3.0, -1, 4.0] + df['C'].toList() == [1.0, 2.0, 3.0, 4.0, -1.0] + } + + def "[DataFrame/fillna]: fixed value all mixed series"() { + setup: + def df = [ + A: ('a'..'c') + [null], + B: [1.0, 2.0, null, 4.0], + C: (1..3) + [null] + ].toDataFrame("coercing") + + when: + df = df.fillna(-1) + + then: + df['A'].toList() == ['a', 'b', 'c', '-1'] + df['B'].toList() == [1.0, 2.0, -1, 4.0] + df['C'].toList() == [1, 2, 3, -1.0] + } }