Skip to content

Commit

Permalink
Add DataFrame constructors allowing NTuple and collection of Pair-s (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored and nalimilan committed Feb 20, 2019
1 parent 70bbae2 commit 96eb818
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 13 deletions.
8 changes: 8 additions & 0 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ particularly a Vector or CategoricalVector.
```julia
DataFrame(columns::Vector, names::Vector{Symbol}; makeunique::Bool=false)
DataFrame(columns::NTuple{N,AbstractVector}, names::NTuple{N,Symbol}; makeunique::Bool=false)
DataFrame(columns::Matrix, names::Vector{Symbol}; makeunique::Bool=false)
DataFrame(kwargs...)
DataFrame(pairs::Pair{Symbol}...; makeunique::Bool=false)
Expand Down Expand Up @@ -173,6 +174,13 @@ function DataFrame(columns::AbstractVector{<:AbstractVector},
Index(convert(Vector{Symbol}, cnames), makeunique=makeunique))
end

DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol};
makeunique::Bool=false) where {N} =
DataFrame(collect(AbstractVector, columns), collect(Symbol, cnames), makeunique=makeunique)

DataFrame(columns::NTuple{N, AbstractVector}) where {N} =
DataFrame(collect(AbstractVector, columns), gennames(length(columns)))

DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol} = gennames(size(columns, 2));
makeunique::Bool=false) =
DataFrame(AbstractVector[columns[:, i] for i in 1:size(columns, 2)], cnames, makeunique=makeunique)
Expand Down
5 changes: 5 additions & 0 deletions src/other/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ function DataFrame(x)
if x isa AbstractVector && all(col -> isa(col, AbstractVector), x)
return DataFrame(Vector{AbstractVector}(x))
end
if applicable(iterate, x)
if all(v -> v isa Pair{Symbol, <:AbstractVector}, x)
return DataFrame(AbstractVector[last(v) for v in x], [first(v) for v in x])
end
end
if Tables.istable(x)
return fromcolumns(Tables.columns(x))
end
Expand Down
40 changes: 35 additions & 5 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ const ≅ = isequal
#
@testset "constructors" begin
df = DataFrame()
@inferred DataFrame()

@test isempty(_columns(df))
@test _columns(df) isa Vector{AbstractVector}
@test index(df) == Index()
Expand All @@ -22,16 +24,42 @@ const ≅ = isequal

@test df == DataFrame([CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))])
@test df == DataFrame([CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))], [:x1, :x2])
@test df == DataFrame(Any[CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))])
@test df == DataFrame(Any[CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))], [:x1, :x2])
@test df == DataFrame(AbstractVector[CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))])
CategoricalVector{Union{Float64, Missing}}(ones(3))], [:x1, :x2])
@test df == DataFrame((CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))))
@test df == DataFrame((CategoricalVector{Union{Float64, Missing}}(zeros(3)),
CategoricalVector{Union{Float64, Missing}}(ones(3))), (:x1, :x2))
@test df == DataFrame(x1 = Union{Int, Missing}[0.0, 0.0, 0.0],
x2 = Union{Int, Missing}[1.0, 1.0, 1.0])

@test (DataFrame([1:3, 1:3]) == DataFrame(Any[1:3, 1:3]) ==
DataFrame(UnitRange[1:3, 1:3]) == DataFrame(AbstractVector[1:3, 1:3]) ==
DataFrame([[1,2,3], [1,2,3]]) == DataFrame(Any[[1,2,3], [1,2,3]]))
@test df == DataFrame([:x1=>Union{Int, Missing}[0.0, 0.0, 0.0],
:x2=>Union{Int, Missing}[1.0, 1.0, 1.0]])
@test df == DataFrame((:x1=>Union{Int, Missing}[0.0, 0.0, 0.0],
:x2=>Union{Int, Missing}[1.0, 1.0, 1.0]))

@test DataFrame([1:3, 1:3]) == DataFrame(Any[1:3, 1:3]) ==
DataFrame(UnitRange[1:3, 1:3]) == DataFrame(AbstractVector[1:3, 1:3]) ==
DataFrame([[1,2,3], [1,2,3]]) == DataFrame(Any[[1,2,3], [1,2,3]]) ==
DataFrame(([1,2,3], [1,2,3])) == DataFrame((1:3, 1:3)) ==
DataFrame((1:3, [1,2,3])) == DataFrame([1:3, [1,2,3]])
DataFrame((:x1=>1:3, :x2=>[1,2,3])) == DataFrame([:x1=>1:3, :x2=>[1,2,3]])

@inferred DataFrame([1:3, 1:3])
@inferred DataFrame((1:3, 1:3))
@inferred DataFrame([1:3, 1:3], [:a, :b])
@inferred DataFrame((1:3, 1:3), (:a, :b))

if VERSION v"1.0.0"
# this test throws an error on Julia 0.7
@inferred DataFrame((:x1=>1:3, :x2=>[1,2,3]))
@inferred DataFrame([:x1=>1:3, :x2=>[1,2,3]])
end

@test df !== DataFrame(df)
@test df == DataFrame(df)
Expand Down Expand Up @@ -87,6 +115,7 @@ end

@testset "pair constructor" begin
df = DataFrame(:x1 => zeros(3), :x2 => ones(3))
@inferred DataFrame(:x1 => zeros(3), :x2 => ones(3))
@test size(df, 1) == 3
@test size(df, 2) == 2
@test isequal(df, DataFrame(x1 = [0.0, 0.0, 0.0], x2 = [1.0, 1.0, 1.0]))
Expand All @@ -97,6 +126,7 @@ end

@testset "associative" begin
df = DataFrame(Dict(:A => 1:3, :B => 4:6))
@inferred DataFrame(Dict(:A => 1:3, :B => 4:6))
@test df == DataFrame(A = 1:3, B = 4:6)
@test eltypes(df) == [Int, Int]
end
Expand Down
6 changes: 5 additions & 1 deletion test/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,11 @@ Base.propertynames(d::DuplicateNamesColumnTable) = (:a, :a, :b)
@test_throws ErrorException (dn |> DataFrame)

dn = DuplicateNamesColumnTable()
@test_throws ArgumentError (dn |> DataFrame)
if VERSION v"1.0.0"
@test_throws ArgumentError (dn |> DataFrame)
else
@test_throws MethodError (dn |> DataFrame)
end

# non-Tables.jl constructor fallbacks
@test DataFrame([(a = 0,), (a = 1,)]) == DataFrame(a = 0:1)
Expand Down
15 changes: 8 additions & 7 deletions test/tabletraits.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ end
@test collect(IteratorInterfaceExtensions.getiterator(sdf)) ==
[(a=1, b=DataValue(1.0)), (a=2, b=DataValue{Float64}())]

df = DataFrame(ColumnSource())

@test size(df)==(3,3)
@test df.a==[1,2,3]
@test df.b==[4.,5.,6.]
@test df.c==["A", "B", "C"]

if VERSION v"1.0.0"
df = DataFrame(ColumnSource())

@test size(df)==(3,3)
@test df.a==[1,2,3]
@test df.b==[4.,5.,6.]
@test df.c==["A", "B", "C"]
end
end

end # module

0 comments on commit 96eb818

Please sign in to comment.