Skip to content

Commit

Permalink
support dataframe protocol (tested with Vaex)
Browse files Browse the repository at this point in the history
This allows plotly express to take in any dataframe that supports
the dataframe protocol, see:
https://data-apis.org/blog/dataframe_protocol_rfc/
https://data-apis.org/dataframe-protocol/latest/index.html

Test includes an example with vaex, which should work with
vaexio/vaex#1509
(not yet released)
  • Loading branch information
maartenbreddels committed Sep 16, 2021
1 parent 0a83329 commit 12a5349
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
12 changes: 11 additions & 1 deletion packages/python/plotly/plotly/express/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,17 @@ def build_dataframe(args, constructor):
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
df_provided = args["data_frame"] is not None
if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
args["data_frame"] = pd.DataFrame(args["data_frame"])
if hasattr(args["data_frame"], "__dataframe__"):
# Pandas does not implement a `from_dataframe` yet
# $ wget https://raw.githubusercontent.com/data-apis/dataframe-api/main/protocol/pandas_implementation.py
# $ export PYTHONPATH=`pwd`
import pandas_implementation

args["data_frame"] = pandas_implementation.from_dataframe(
args["data_frame"]
)
else:
args["data_frame"] = pd.DataFrame(args["data_frame"])
df_input = args["data_frame"]

# now we handle special cases like wide-mode or x-xor-y specification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,19 @@ def test_build_df_with_index():
assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])


def test_build_df_protocol():
import vaex

# take out the 'species' columns since the vaex implementation does not cover strings yet
iris_pandas = px.data.iris()[["petal_width", "sepal_length"]]
iris_vaex = vaex.from_pandas(iris_pandas)
args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length")
out = build_dataframe(args, go.Scatter)
assert_frame_equal(
iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"]
)


def test_timezones():
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
df["date"] = pd.to_datetime(df["date"])
Expand Down

0 comments on commit 12a5349

Please sign in to comment.