Skip to content

Commit

Permalink
Add DataFrame.sort and Column.sort (#234)
Browse files Browse the repository at this point in the history
* add sort

* update sorted_indices docs
  • Loading branch information
MarcoGorelli authored Aug 24, 2023
1 parent cafa8fd commit 77bc66b
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 6 deletions.
33 changes: 30 additions & 3 deletions spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,35 @@ def get_value(self, row_number: int) -> Scalar:
"""
...

def sort(
self,
*,
ascending: bool = True,
nulls_position: Literal['first', 'last'] = 'last',
) -> Column[DType]:
"""
Sort column.
If you need the indices which would sort the column,
use :meth:`sorted_indices`.
Parameters
----------
ascending : bool
If `True`, sort in ascending order.
If `False`, sort in descending order.
nulls_position : ``{'first', 'last'}``
Whether null values should be placed at the beginning
or at the end of the result.
Note that the position of NaNs is unspecified and may
vary based on the implementation.
Returns
-------
Column
"""
...

def sorted_indices(
self,
*,
Expand All @@ -150,9 +179,7 @@ def sorted_indices(
"""
Return row numbers which would sort column.
If you need to sort the Column, you can simply do::
col.get_rows(col.sorted_indices())
If you need to sort the Column, use :meth:`sort`.
Parameters
----------
Expand Down
45 changes: 42 additions & 3 deletions spec/API_specification/dataframe_api/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,47 @@ def get_column_names(self) -> Sequence[str]:
Sequence[str]
"""
...

def sort(
self,
keys: Sequence[str] | None = None,
*,
ascending: Sequence[bool] | bool = True,
nulls_position: Literal['first', 'last'] = 'last',
) -> DataFrame:
"""
Sort dataframe according to given columns.
If you only need the indices which would sort the dataframe, use
:meth:`sorted_indices`.
Parameters
----------
keys : Sequence[str] | None
Names of columns to sort by.
If `None`, sort by all columns.
ascending : Sequence[bool] or bool
If `True`, sort by all keys in ascending order.
If `False`, sort by all keys in descending order.
If a sequence, it must be the same length as `keys`,
and determines the direction with which to use each
key to sort by.
nulls_position : ``{'first', 'last'}``
Whether null values should be placed at the beginning
or at the end of the result.
Note that the position of NaNs is unspecified and may
vary based on the implementation.
Returns
-------
DataFrame
Raises
------
ValueError
If `keys` and `ascending` are sequences of different lengths.
"""
...

def sorted_indices(
self,
Expand All @@ -255,9 +296,7 @@ def sorted_indices(
"""
Return row numbers which would sort according to given columns.
If you need to sort the DataFrame, you can simply do::
df.get_rows(df.sorted_indices(keys))
If you need to sort the DataFrame, use :meth:`sort`.
Parameters
----------
Expand Down

0 comments on commit 77bc66b

Please sign in to comment.