Skip to content

Commit

Permalink
chore(sql): disallow creating null-typed columns with in-memory tables
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Aug 9, 2024
1 parent 6d3a5dd commit 3784885
Show file tree
Hide file tree
Showing 14 changed files with 52 additions and 20 deletions.
6 changes: 6 additions & 0 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,12 @@ def _normalize_external_tables(self, external_tables=None) -> ExternalData | Non
n += 1
if not (schema := obj.schema):
raise TypeError(f"Schema is empty for external table {name}")
if null_fields := schema.null_fields:
raise com.IbisTypeError(
"ClickHouse doesn't support NULL-typed fields. "
"Consider assigning a type through casting or on construction. "
f"Got null typed fields: {null_fields}"
)

structure = [
f"{name} {type_mapper.to_string(typ.copy(nullable=not typ.is_nested()))}"
Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ def create_table(
if schema is None:
schema = table.schema()

if schema.nulls:
raise exc.UnsupportedBackendType(
if null_fields := schema.null_fields:
raise exc.IbisTypeError(
"DuckDB does not support creating tables with NULL typed columns. "
"Ensure that every column has non-NULL type. "
f"NULL columns: {schema.nulls}"
f"NULL columns: {null_fields}"
)

column_defs = [
Expand Down Expand Up @@ -1422,7 +1422,7 @@ def to_pyarrow(
) -> pa.Table:
table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow()
schema = expr.as_table().schema()
if not schema.nulls:
if not schema.null_fields:
return expr.__pyarrow_result__(table)

arrays = [
Expand Down
10 changes: 6 additions & 4 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,17 @@ def test_create_table_with_nulls(con):
schema = t.schema()

assert schema == ibis.schema({"a": "null"})
assert schema.nulls == ("a",)
assert schema.null_fields == ("a",)

name = gen_name("duckdb_all_nulls")

with pytest.raises(com.UnsupportedBackendType, match="NULL typed columns"):
match = "NULL typed columns"

with pytest.raises(com.IbisTypeError, match=match):
con.create_table(name, obj=t)

with pytest.raises(com.UnsupportedBackendType, match="NULL typed columns"):
with pytest.raises(com.IbisTypeError, match=match):
con.create_table(name, obj=t, schema=schema)

with pytest.raises(com.UnsupportedBackendType, match="NULL typed columns"):
with pytest.raises(com.IbisTypeError, match=match):
con.create_table(name, schema=schema)
2 changes: 1 addition & 1 deletion ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
"Exasol cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,7 @@ def explain(

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
"Impala cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def create_table(

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
"MS SQL cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def create_table(

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
"MySQL cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/oracle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,11 @@ def drop_table(

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := schema.null_fields:
raise exc.IbisTypeError(
f"{self.name} cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
)

# only register if we haven't already done so
if (name := op.name) not in self.list_tables():
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
from psycopg2.extras import execute_batch

schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise exc.IbisTypeError(
f"{self.name} cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/risingwave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def create_table(

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
f"{self.name} cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
19 changes: 19 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pytest import param

import ibis
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
from ibis import util
from ibis.backends.tests.errors import (
Expand Down Expand Up @@ -595,9 +596,27 @@ def test_scalar_to_memory(limit, awards_players, output_format, converter):

expr = awards_players.filter(awards_players.awardID == "DEADBEEF").yearID.min()
res = method(expr)

assert converter(res) is None


# flink
@pytest.mark.notyet(
[
"clickhouse",
"exasol",
"flink",
"impala",
"mssql",
"mysql",
"oracle",
"postgres",
"risingwave",
"trino",
],
raises=com.IbisTypeError,
reason="unable to handle null typed columns as input",
)
def test_all_null_column(con):
t = ibis.memtable({"a": [None]})
result = con.to_pyarrow(t)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/trino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = op.schema
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
if null_columns := schema.null_fields:
raise com.IbisTypeError(
"Trino cannot yet reliably handle `null` typed columns; "
f"got null typed columns: {null_columns}"
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def geospatial(self) -> tuple[str, ...]:
return tuple(name for name, typ in self.fields.items() if typ.is_geospatial())

@attribute
def nulls(self) -> tuple[str, ...]:
def null_fields(self) -> tuple[str, ...]:
return tuple(name for name, typ in self.fields.items() if typ.is_null())

@attribute
Expand Down
8 changes: 4 additions & 4 deletions ibis/expr/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def test_schema_from_to_pandas_dask_dtypes():
assert restored_dtypes == expected_dtypes


def test_nulls():
assert sch.schema({"a": "int64", "b": "string"}).nulls == ()
assert sch.schema({"a": "null", "b": "string"}).nulls == ("a",)
assert sch.schema({"a": "null", "b": "null"}).nulls == ("a", "b")
def test_null_fields():
assert sch.schema({"a": "int64", "b": "string"}).null_fields == ()
assert sch.schema({"a": "null", "b": "string"}).null_fields == ("a",)
assert sch.schema({"a": "null", "b": "null"}).null_fields == ("a", "b")

0 comments on commit 3784885

Please sign in to comment.