Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(datatypes): return pd.Timestamp or pd.Series[datetime64] for date.to_pandas() #8784

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/contribute/02_workflow.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,11 @@ you are going only up).
```bash
$ colima delete
```

### `x86_64` or `amd64` based containers

While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places:
1. Add `platform: linux/amd64` for the service in `compose.yaml`.
2. Set the `--arch` flag while starting the VM `colima start --arch x86_64`

For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started.
8 changes: 4 additions & 4 deletions ibis/backends/oracle/converter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from __future__ import annotations

import datetime
import pandas as pd

from ibis.formats.pandas import PandasData


class OraclePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 11 in ibis/backends/oracle/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/oracle/converter.py#L11

Added line #L11 was not covered by tests

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 15 in ibis/backends/oracle/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/oracle/converter.py#L15

Added line #L15 was not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is sort of up for grabs given that pandas doesn't have a standard way to represent an array of dates (the _element suffix implies [perhaps not in an obvious way] that this function is being called once per element of an array).

I think it's fine to also change this to using pandas timestamps.


@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 19 in ibis/backends/oracle/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/oracle/converter.py#L19

Added line #L19 was not covered by tests
8 changes: 4 additions & 4 deletions ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import datetime
import json
from typing import TYPE_CHECKING

import pandas as pd
import pyarrow as pa

from ibis.formats.pandas import PandasData
Expand Down Expand Up @@ -52,15 +52,15 @@
class SnowflakePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 55 in ibis/backends/snowflake/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/snowflake/converter.py#L55

Added line #L55 was not covered by tests

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 59 in ibis/backends/snowflake/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/snowflake/converter.py#L59

Added line #L59 was not covered by tests

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat

Check warning on line 63 in ibis/backends/snowflake/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/snowflake/converter.py#L63

Added line #L63 was not covered by tests

@classmethod
def convert_JSON(cls, s, dtype, pandas_type):
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/sqlite/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def test_type_map(db):
sol = pd.DataFrame(
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
)
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)

assert res.equals(sol)


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ def test_string_quantile(alltypes, func):
)
def test_date_quantile(alltypes):
expr = alltypes.timestamp_col.date().quantile(0.5)
result = expr.execute()
result = expr.execute().to_pydatetime().date()
assert result == date(2009, 12, 31)


Expand Down
11 changes: 5 additions & 6 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@
GoogleBadRequest,
MySQLOperationalError,
PolarsComputeError,
PsycoPg2ArraySubscriptError,
PsycoPg2IndeterminateDatatype,
PsycoPg2InternalError,
PsycoPg2ProgrammingError,
PsycoPg2SyntaxError,
PsycoPgInvalidTextRepresentation,
PsycoPgSyntaxError,
Py4JJavaError,
PyAthenaDatabaseError,
Expand Down Expand Up @@ -1118,7 +1117,7 @@ def test_unnest_struct(con):


@builtin_array
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError)
@pytest.mark.notimpl(
["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
Expand Down Expand Up @@ -1209,7 +1208,7 @@ def test_zip_null(con, fn):


@builtin_array
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
@pytest.mark.notimpl(["datafusion"], raises=Exception, reason="not yet supported")
@pytest.mark.notimpl(
Expand Down Expand Up @@ -1769,7 +1768,7 @@ def test_table_unnest_column_expr(backend):
@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["trino"], raises=TrinoUserError)
@pytest.mark.notimpl(["athena"], raises=PyAthenaOperationalError)
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
@pytest.mark.notyet(
["risingwave"], raises=PsycoPg2InternalError, reason="not supported in risingwave"
Expand Down Expand Up @@ -1890,7 +1889,7 @@ def test_array_agg_bool(con, data, agg, baseline_func):

@pytest.mark.notyet(
["postgres"],
raises=PsycoPg2ArraySubscriptError,
raises=PsycoPgInvalidTextRepresentation,
reason="all dimensions must match in size",
)
@pytest.mark.notimpl(["risingwave", "flink"], raises=com.OperationNotDefinedError)
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
ImpalaHiveServer2Error,
OracleDatabaseError,
PsycoPg2InternalError,
PsycoPg2UndefinedObject,
PsycoPgUndefinedObject,
Py4JJavaError,
PyAthenaDatabaseError,
PyODBCProgrammingError,
Expand Down Expand Up @@ -725,7 +725,7 @@ def test_list_database_contents(con):
@pytest.mark.notyet(["databricks"], raises=DatabricksServerOperationError)
@pytest.mark.notyet(["bigquery"], raises=com.UnsupportedBackendType)
@pytest.mark.notyet(
["postgres"], raises=PsycoPg2UndefinedObject, reason="no unsigned int types"
["postgres"], raises=PsycoPgUndefinedObject, reason="no unsigned int types"
)
@pytest.mark.notyet(
["oracle"], raises=OracleDatabaseError, reason="no unsigned int types"
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
OracleDatabaseError,
PolarsInvalidOperationError,
PsycoPg2InternalError,
PsycoPg2SyntaxError,
PsycoPgSyntaxError,
Py4JJavaError,
PyAthenaDatabaseError,
PyAthenaOperationalError,
Expand Down Expand Up @@ -1736,7 +1736,7 @@ def hash_256(col):
pytest.mark.notimpl(["flink"], raises=Py4JJavaError),
pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError),
pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError),
pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError),
pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError),
pytest.mark.notimpl(["snowflake"], raises=AssertionError),
pytest.mark.never(
Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DatabricksServerOperationError,
PolarsColumnNotFoundError,
PsycoPg2InternalError,
PsycoPg2SyntaxError,
PsycoPgSyntaxError,
Py4JJavaError,
PyAthenaDatabaseError,
PyAthenaOperationalError,
Expand Down Expand Up @@ -138,7 +138,7 @@ def test_collect_into_struct(alltypes):


@pytest.mark.notimpl(
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
["postgres"], reason="struct literals not implemented", raises=PsycoPgSyntaxError
)
@pytest.mark.notimpl(
["risingwave"],
Expand All @@ -155,7 +155,7 @@ def test_field_access_after_case(con):


@pytest.mark.notimpl(
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
["postgres"], reason="struct literals not implemented", raises=PsycoPgSyntaxError
)
@pytest.mark.notimpl(["flink"], raises=IbisError, reason="not implemented in ibis")
@pytest.mark.parametrize(
Expand Down Expand Up @@ -242,7 +242,7 @@ def test_keyword_fields(con, nullable):

@pytest.mark.notyet(
["postgres"],
raises=PsycoPg2SyntaxError,
raises=PsycoPgSyntaxError,
reason="sqlglot doesn't implement structs for postgres correctly",
)
@pytest.mark.notyet(
Expand Down
31 changes: 9 additions & 22 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,9 +650,7 @@ def convert_to_offset(x):
"ignore", category=(UserWarning, pd.errors.PerformanceWarning)
)
expected = (
pd.to_datetime(df.date_string_col)
.add(offset)
.map(lambda ts: ts.normalize().date(), na_action="ignore")
pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]")
)

expected = backend.default_series_rename(expected)
Expand Down Expand Up @@ -727,12 +725,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.add(pd.Timedelta(days=4))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)),
id="date-add-interval",
marks=[
pytest.mark.notimpl(
Expand All @@ -743,12 +736,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.sub(pd.Timedelta(days=14))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)),
id="date-subtract-interval",
marks=[
pytest.mark.notimpl(
Expand Down Expand Up @@ -1013,14 +1001,15 @@ def test_interval_add_cast_column(backend, alltypes, df):
delta = alltypes.bigint_col.cast("interval('D')")
expr = alltypes.select("id", (timestamp_date + delta).name("tmp"))
result = expr.execute().sort_values("id").reset_index().tmp

df = df.sort_values("id").reset_index(drop=True)
expected = (
df["timestamp_col"]
.dt.normalize()
.add(df.bigint_col.astype("timedelta64[D]"))
.rename("tmp")
.dt.date
)

backend.assert_series_equal(result, expected.astype(result.dtype))


Expand Down Expand Up @@ -2296,20 +2285,18 @@ def test_time_literal_sql(dialect, snapshot, micros):
)
def test_date_scalar(con, value, func):
expr = ibis.date(func(value)).name("tmp")

result = con.execute(expr)

assert not isinstance(result, datetime.datetime)
assert isinstance(result, datetime.date)

assert result == datetime.date.fromisoformat(value)
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(value)


@pytest.mark.notyet(
["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError
)
def test_simple_unix_date_offset(con):
d = ibis.date("2023-04-07")
s = "2023-04-07"
d = ibis.date(s)
expr = d.epoch_days()
result = con.execute(expr)
delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1)
Expand Down
13 changes: 8 additions & 5 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,20 @@
def convert_Date(cls, s, dtype, pandas_type):
if isinstance(s.dtype, pd.DatetimeTZDtype):
s = s.dt.tz_convert("UTC").dt.tz_localize(None)

try:
return s.astype(pandas_type).dt.date
return s.astype(pandas_type)
except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime):

def try_date(v):
if isinstance(v, datetime.datetime):
return v.date()
if isinstance(v, datetime.date):
return pd.Timestamp(v)
elif isinstance(v, str):
if v.endswith("Z"):
return datetime.datetime.fromisoformat(v[:-1]).date()
return datetime.date.fromisoformat(v)
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
else:
datetime_obj = datetime.datetime.fromisoformat(v)

Check warning on line 237 in ibis/formats/pandas.py

View check run for this annotation

Codecov / codecov/patch

ibis/formats/pandas.py#L237

Added line #L237 was not covered by tests
return pd.Timestamp(datetime_obj)
else:
return v

Expand Down
Loading