Merge pull request #26 from invokermain/25-is-it-possible-to-return-e…

…mpty-dataframe-with-columns-when-query-result-is-zero-records Return empty dataframe with columns when query result has zero records
invokermain · Jul 20, 2022 · 612733f · 612733f
2 parents 188bad8 + ca24acb
commit 612733f
Show file tree

Hide file tree

Showing 8 changed files with 285 additions and 337 deletions.
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -16,7 +16,9 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - name: Setup Poetry
-        uses: abatilo/actions-poetry@v2.1.0
+        uses: abatilo/actions-poetry@v2.1.5
+      - name: Generate Lock File
+        run: poetry lock
       - name: Install venv
         run: poetry install --no-root -E all
       - name: Pytest

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0] - 2022-07-20
+### Changed
+- `DatabaseResult.to_dataframe()` will now return an empty dataframe with the correct
+  column names if the result has column information but no rows. Warning: Pandas will
+  not be able to infer the correct dtypes from these columns.
+- Loosened version restrictions on `Pandas` and `ORJSON` dependencies to be any version number.
+
+
 ## [0.3.0] - 2022-01-21
 ### Added
 - Added support for multiple result sets, you can move between result sets using the new

diff --git a/poetry.lock b/poetry.lock
diff --git a/poetry.toml b/poetry.toml
@@ -0,0 +1,2 @@
+[virtualenvs]
+in-project = true
diff --git a/pymssqlutils/databaseresult.py b/pymssqlutils/databaseresult.py
@@ -316,6 +316,10 @@ def to_dataframe(self, *args, **kwargs) -> "DataFrame":
                 "Pandas must be installed to use this method"
             ) from ImportError
 
+        # if there is no data, but we know the columns, return an empty dataframe
+        if not self._data and self._columns:
+            return DataFrame(columns=self._columns)
+
         return DataFrame(data=self.data, *args, **kwargs)
 
     def to_json(
@@ -333,10 +337,7 @@ def to_json(
         try:
             from orjson import dumps
         except ImportError as err:
-            raise ImportError(
-                "ORJSON must be installed to use this method, you can install "
-                + "this by running `pip install --upgrade pymssql-utils[json]`"
-            ) from err
+            raise ImportError("ORJSON must be installed to use this method") from err
 
         data_ = self.data if with_columns else self.raw_data
         json_ = dumps(data_)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pymssql-utils"
-version = "0.3.0"
+version = "0.4.0"
 description = "pymssql-utils is a small library that wraps pymssql to make your life easier."
 authors = ["Tim OSullivan <tim@lanster.dev>"]
 license = "GNU LGPLv2.1"
@@ -22,8 +22,8 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.7"
 pymssql = "^2.1.4"
-orjson = { version = ">=3.4.0,<4.0.0", optional = true }
-pandas = { version = ">=1.0.0,<2.0.0", optional = true }
+orjson = { version = "*", optional = true }
+pandas = { version = "*", optional = true }
 
 [tool.poetry.dev-dependencies]
 pytest = "^7"

diff --git a/tests/test_database.py b/tests/test_database.py
@@ -163,3 +163,11 @@ def test_multiple_result_sets():
     assert ok is False
 
     assert result.set_count == 2
+
+
+@pytest.mark.skipif(SKIP_FILE, reason=SKIP_REASON)
+def test_to_dataframe_empty_result():
+    result = sql.query("SELECT TOP 0 sysdatetimeoffset() now, 1 someint")
+    out = result.to_dataframe()
+    assert out.columns[0] == "now"
+    assert out.shape == (0, 2)
diff --git a/tests/test_databaseresult.py b/tests/test_databaseresult.py
@@ -240,6 +240,17 @@ def test_cast_to_dataframe():
     assert df.columns.tolist() == list(result.columns)
 
 
+def test_cast_to_dataframe_no_rows():
+    result = DatabaseResult(
+        ok=True, fetch=True, commit=False, cursor=MockCursor(row_count=0)
+    )
+    df = result.to_dataframe()
+
+    assert isinstance(df, pandas.DataFrame)
+    assert df.columns.tolist() == list(result.columns)
+    assert df.shape == (0, len(result.columns))
+
+
 def test_cast_to_dataframe_no_pandas(monkeypatch):
     monkeypatch.setitem(sys.modules, "pandas", None)