diff --git a/NEWS.md b/NEWS.md index 2f2a6af8e6..c0f0a3ce74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,16 @@ ## Breaking changes +* CategoricalArrays.jl is no longer reexported: call `using CategoricalArrays` + to use it [#2404]((https://github.com/JuliaData/DataFrames.jl/pull/2404)). + In the same vein, the `categorical` and `categorical!` functions + have been deprecated in favor of + `transform(df, cols .=> categorical .=> cols)` and similar syntaxes + [#2394]((https://github.com/JuliaData/DataFrames.jl/pull/2394)). + `stack` now creates a `PooledVector{String}` variable column rather than + a `CategoricalVector{String}` column by default; + pass `variable_eltype=CategoricalValue{String}` to get the previous behavior + ([#2391](https://github.com/JuliaData/DataFrames.jl/pull/2391)) * `isless` for `DataFrameRow`s now checks column names ([#2292](https://github.com/JuliaData/DataFrames.jl/pull/2292)) * `DataFrameColumns` is now not a subtype of `AbstractVector` @@ -22,13 +32,6 @@ choose the fast path only when it is safe; this resolves inconsistencies with what the same functions not using fast path produce ([#2357](https://github.com/JuliaData/DataFrames.jl/pull/2357)) -* `stack` now creates a `PooledVector{String}` variable column rather than - a `CategoricalVector{String}` column by default; - pass `variable_eltype=CategoricalValue{String}` to get the previous behavior - ([#2391](https://github.com/JuliaData/DataFrames.jl/pull/2391)) -* the `categorical` and `categorical!` functions have been deprecated in favor of - `transform(df, cols .=> categorical .=> cols)` and similar syntaxes - [#2394]((https://github.com/JuliaData/DataFrames.jl/pull/2394)) ## New functionalities diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 9d45d575bb..35653e2129 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -1,8 +1,8 @@ module DataFrames using Statistics, Printf, REPL -using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays -@reexport using CategoricalArrays, Missings, InvertedIndices +using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays, CategoricalArrays +@reexport using Missings, InvertedIndices using Base.Sort, Base.Order, Base.Iterators using TableTraits, IteratorInterfaceExtensions import LinearAlgebra: norm diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl index 44e172eedc..7db56ff20a 100644 --- a/src/abstractdataframe/reshape.jl +++ b/src/abstractdataframe/reshape.jl @@ -35,7 +35,7 @@ that return views into the original data frame. By default a `PooledArray{String}` is created. If `variable_eltype=Symbol` a `PooledVector{Symbol}` is created, and if `variable_eltype=CategoricalValue{String}` - a `CategoricalArray{String}` is produced. + a `CategoricalArray{String}` is produced (call `using CategoricalArrays` first if needed) Passing any other type `T` will produce a `PooledVector{T}` column as long as it supports conversion from `String`. When `view=true`, a `RepeatedVector{T}` is produced. diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 2668ff98b8..5204b86779 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1,6 +1,6 @@ module TestBroadcasting -using Test, DataFrames, PooledArrays, Random +using Test, DataFrames, PooledArrays, Random, CategoricalArrays const ≅ = isequal diff --git a/test/cat.jl b/test/cat.jl index 1153442461..af1c7325bc 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -1,6 +1,6 @@ module TestCat -using Test, Random, DataFrames +using Test, Random, DataFrames, CategoricalArrays const ≅ = isequal # diff --git a/test/constructors.jl b/test/constructors.jl index 38d5e08dda..d856f1461c 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -1,6 +1,6 @@ module TestConstructors -using Test, DataFrames +using Test, DataFrames, CategoricalArrays using DataFrames: Index, _columns, index const ≅ = isequal diff --git a/test/data.jl b/test/data.jl index 0e95c63cff..897484d962 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,6 +1,6 @@ module TestData -using Test, DataFrames, Random, Statistics +using Test, DataFrames, Random, Statistics, CategoricalArrays const ≅ = isequal @testset "constructors" begin diff --git a/test/dataframe.jl b/test/dataframe.jl index 157329e72a..9e9adebf8e 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,6 +1,7 @@ module TestDataFrame -using Dates, DataFrames, Statistics, Random, Test, Logging, DataStructures +using Dates, DataFrames, Statistics, Random, Test, Logging, DataStructures, + CategoricalArrays using DataFrames: _columns, index const ≅ = isequal const ≇ = !isequal diff --git a/test/dataframerow.jl b/test/dataframerow.jl index 13993a42fd..68058de3b6 100644 --- a/test/dataframerow.jl +++ b/test/dataframerow.jl @@ -1,6 +1,6 @@ module TestDataFrameRow -using Test, DataFrames, Random, Logging +using Test, DataFrames, Random, Logging, CategoricalArrays const ≅ = isequal const ≇ = !isequal diff --git a/test/deprecated.jl b/test/deprecated.jl index 5007021a1e..189df3d9f8 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -1,6 +1,6 @@ module TestDeprecated -using Test, DataFrames +using Test, DataFrames, CategoricalArrays const ≅ = isequal diff --git a/test/duplicates.jl b/test/duplicates.jl index 5d6a8a7695..f763399e18 100644 --- a/test/duplicates.jl +++ b/test/duplicates.jl @@ -1,6 +1,6 @@ module TestDuplicates -using Test, DataFrames +using Test, DataFrames, CategoricalArrays const ≅ = isequal @testset "nonunique" begin diff --git a/test/grouping.jl b/test/grouping.jl index 49092f2dc7..336c84eb82 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -1,6 +1,6 @@ module TestGrouping -using Test, DataFrames, Random, Statistics, PooledArrays +using Test, DataFrames, Random, Statistics, PooledArrays, CategoricalArrays const ≅ = isequal """Check if passed data frames are `isequal` and have the same element types of columns""" diff --git a/test/join.jl b/test/join.jl index a7a39798eb..56bb16dcc6 100644 --- a/test/join.jl +++ b/test/join.jl @@ -1,6 +1,6 @@ module TestJoin -using Test, DataFrames, Random +using Test, DataFrames, Random, CategoricalArrays using DataFrames: similar_missing const ≅ = isequal diff --git a/test/reshape.jl b/test/reshape.jl index 647045358c..6e215e7e92 100644 --- a/test/reshape.jl +++ b/test/reshape.jl @@ -1,6 +1,6 @@ module TestReshape -using Test, DataFrames, Random, Logging, PooledArrays +using Test, DataFrames, Random, Logging, PooledArrays, CategoricalArrays const ≅ = isequal @testset "the output of unstack" begin diff --git a/test/select.jl b/test/select.jl index c0f460dd86..99dc122322 100644 --- a/test/select.jl +++ b/test/select.jl @@ -1,6 +1,6 @@ module TestSelect -using DataFrames, Test, Random, Statistics +using DataFrames, Test, Random, Statistics, CategoricalArrays const ≅ = isequal diff --git a/test/sort.jl b/test/sort.jl index 835044a21e..6a02d99014 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -1,6 +1,6 @@ module TestSort -using DataFrames, Random, Test +using DataFrames, Random, Test, CategoricalArrays @testset "standard tests" begin dv1 = [9, 1, 8, missing, 3, 3, 7, missing] diff --git a/test/tables.jl b/test/tables.jl index 6ca9144234..7b3673e189 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,6 +1,6 @@ module TestTables -using Test, DataFrames +using Test, DataFrames, CategoricalArrays struct NamedTupleIterator{T <: NamedTuple} elements::Vector{T}