JuliaData · pdeffebach · Feb 13, 2019 · Feb 13, 2019 · Feb 13, 2019 · Feb 23, 2019
diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -322,26 +322,30 @@ function Base.map(f::Any, gd::GroupedDataFrame)
 end
 
 """
-    combine(gd::GroupedDataFrame, cols => f...)
+    combine(gd::GroupedDataFrame, (cols => f)...)
+    combine(gd::GroupedDataFrame, [cols1 => f1, cols2 => f2]...)
     combine(gd::GroupedDataFrame; (colname = cols => f)...)
     combine(gd::GroupedDataFrame, f)
     combine(f, gd::GroupedDataFrame)
 
 Transform a [`GroupedDataFrame`](@ref) into a `DataFrame`.
 
-If the last argument(s) consist(s) in one or more `cols => f` pair(s), or if
-`colname = cols => f` keyword arguments are provided, `cols` must be
-a column name or index, or a vector or tuple thereof, and `f` must be a callable.
-A pair or a (named) tuple of pairs can also be provided as the first or last argument.
-If `cols` is a single column index, `f` is called with a `SubArray` view into that
-column for each group; else, `f` is called with a named tuple holding `SubArray`
-views into these columns.
+The last argument(s) in `combine` can be either:
 
-If the last argument is a callable `f`, it is passed a [`SubDataFrame`](@ref) view for each group,
-and the returned `DataFrame` then consists of the returned rows plus the grouping columns.
-Note that this second form is much slower than the first one due to type instability.
-A method is defined with `f` as the first argument, so do-block
-notation can be used.
+* One or several `cols => f` pairs, or vectors or tuples of such pairs (mixing is allowed). `cols`
+  must be a column name or index in `gd`, or a vector or tuple thereof. `f` must be callable.
+  If `cols` is a single column index, `f` is called with a `SubArray` view into that
+  column for each group; else, `f` is called with a named tuple holding `SubArray`
+  views into these columns.
+* A named tuple of `colname = cols => f` pairs or keyword arguments of such pairs,
+  where `colname` indicates the name of the column to be created in the new `DataFrame`.
+  Pairs must obey the same rules as above.
+* A callable `f` taking a `SubDataFrame` view for each group. The returned `DataFrame` 
+  then consists of the returned rows plus the grouping columns.
+  Note that this form is much slower than the others due to type instability.
+
+A method is defined with `f` as the first argument, so do-block notation can be used.
+In that case `f` can also be a named tuple of pairs.
 
 `f` can return a single value, a row or multiple rows. The type of the returned value
 determines the shape of the resulting data frame:
@@ -407,6 +411,34 @@ julia> combine(:c => sum, gd)
 │ 3   │ 3     │ 10    │
 │ 4   │ 4     │ 12    │
 
+julia> combine(gd, [:b, :c] .=> sum)
+8×2 DataFrame
+│ Row │ a     │ x1    │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 3     │
+│ 2   │ 1     │ 7     │
+│ 3   │ 2     │ 3     │
+│ 4   │ 2     │ 7     │
+│ 5   │ 3     │ 5     │
+│ 6   │ 3     │ 9     │
+│ 7   │ 4     │ 5     │
+│ 8   │ 4     │ 9     │
+
+julia> combine(gd, [:b, :c] .=> sum, :c => min)
+8×2 DataFrame
+│ Row │ a     │ x1    │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 3     │
+│ 2   │ 1     │ 7     │
+│ 3   │ 2     │ 3     │
+│ 4   │ 2     │ 7     │
+│ 5   │ 3     │ 5     │
+│ 6   │ 3     │ 9     │
+│ 7   │ 4     │ 5     │
+│ 8   │ 4     │ 9     │
+
 julia> combine(df -> sum(df.c), gd) # Slower variant
 4×2 DataFrame
 │ Row │ a     │ x1    │
@@ -436,9 +468,11 @@ function combine(f::Any, gd::GroupedDataFrame)
         return gd.parent[1:0, gd.cols]
     end
 end
+
 combine(gd::GroupedDataFrame, f::Any) = combine(f, gd)
-combine(gd::GroupedDataFrame, f::Pair...) = combine(f, gd)
-combine(gd::GroupedDataFrame, f::Pair) = combine(f, gd)
+
+combine(gd::GroupedDataFrame, f::Union{Pair, AbstractVector{<:Pair}}...) =
+    combine(reduce(vcat, f), gd)
 
 function combine(gd::GroupedDataFrame; f...)
     if length(f) == 0
@@ -673,19 +707,22 @@ function do_f(f, x...)
     end
 end
 
-function _combine(f::Union{AbstractVector{<:Pair}, Tuple{Vararg{Pair}},
+function _combine(f::Union{AbstractVector{<:Pair},
+                           Tuple{Vararg{Pair}},
                            NamedTuple{<:Any, <:Tuple{Vararg{Pair}}}},
                   gd::GroupedDataFrame)
     res = map(f) do p
         agg = check_aggregate(last(p))
-        if agg isa AbstractAggregate && p isa Pair{<:ColumnIndex}
+
+        if agg isa AbstractAggregate && p isa Pair && first(p) isa ColumnIndex
             incol = gd.parent[!, first(p)]
             idx = gd.idx[gd.starts]
             outcol = agg(incol, gd)
             return idx, outcol
         else
             fun = do_f(last(p))
-            if p isa Pair{<:ColumnIndex}
+
+             if p isa Pair && first(p) isa ColumnIndex
-             if p isa Pair && first(p) isa ColumnIndex
+            if p isa Pair && first(p) isa ColumnIndex
-             if p isa Pair && first(p) isa ColumnIndex
+            if p isa Pair && first(p) isa ColumnIndex
                 incols = gd.parent[!, first(p)]
             else
                 df = gd.parent[!, collect(first(p))]
@@ -705,7 +742,7 @@ function _combine(f::Union{AbstractVector{<:Pair}, Tuple{Vararg{Pair}},
     if f isa NamedTuple
         nams = collect(Symbol, propertynames(f))
     else
-        nams = [f[i] isa Pair{<:ColumnIndex} ?
+        nams = [f[i] isa Pair && first(f[i]) isa ColumnIndex ?
                     Symbol(names(gd.parent)[index(gd.parent)[first(f[i])]],
                            '_', funname(last(f[i]))) :
                     Symbol('x', i)
@@ -924,7 +961,8 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
 end
 
 """
-    by(df::AbstractDataFrame, keys, cols => f...; sort::Bool = false)
+    by(df::AbstractDataFrame, keys, (cols => f)...; sort::Bool = false)
+    by(df::AbstractDataFrame, keys, [cols1 => f1, cols2 => f2]...; sort::Bool = false)
     by(df::AbstractDataFrame, keys; (colname = cols => f)..., sort::Bool = false)
     by(df::AbstractDataFrame, keys, f; sort::Bool = false)
     by(f, df::AbstractDataFrame, keys; sort::Bool = false)
@@ -934,19 +972,22 @@ based on grouping columns `keys`, and return a `DataFrame`.
 
 `keys` can be either a single column index, or a vector thereof.
 
-If the last argument(s) consist(s) in one or more `cols => f` pair(s), or if
-`colname = cols => f` keyword arguments are provided, `cols` must be
-a column name or index, or a vector or tuple thereof, and `f` must be a callable.
-A pair or a (named) tuple of pairs can also be provided as the first or last argument.
-If `cols` is a single column index, `f` is called with a `SubArray` view into that
-column for each group; else, `f` is called with a named tuple holding `SubArray`
-views into these columns.
+The third through last arguments in `combine` can can be either
 
-If the last argument is a callable `f`, it is passed a [`SubDataFrame`](@ref) view for each group,
-and the returned `DataFrame` then consists of the returned rows plus the grouping columns.
-Note that this second form is much slower than the first one due to type instability.
-A method is defined with `f` as the first argument, so do-block
-notation can be used.
+* One or several `cols => f` pairs, or vectors or tuples of such pairs (mixing is allowed). `cols`
+  must be a column name or index in `gd`, or a vector or tuple thereof. `f` must be callable.
+  If `cols` is a single column index, `f` is called with a `SubArray` view into that
+  column for each group; else, `f` is called with a named tuple holding `SubArray`
+  views into these columns.
+* A named tuple of `colname = cols => f` pairs or keyword arguments of such pairs,
+  where `colname` indicates the name of the column to be created in the new `DataFrame`.
+  Pairs must obey the same rules as above.
+* A callable `f` taking a `SubDataFrame` view for each group. The returned `DataFrame` 
+  then consists of the returned rows plus the grouping columns.
+  Note that this form is much slower than the others due to type instability.
+
+A method is defined with `f` as the first argument, so do-block notation can be used.       
+In that case `f` can also be a named tuple of pairs.
 
 `f` can return a single value, a row or multiple rows. The type of the returned value
 determines the shape of the resulting data frame:
@@ -1002,6 +1043,20 @@ julia> by(df, :a, :c => sum)
 │ 3   │ 3     │ 10    │
 │ 4   │ 4     │ 12    │
 
+julia> combine(gd, [:b, :c] .=> sum, :c => min)
+8×2 DataFrame
+│ Row │ a     │ x1    │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 3     │
+│ 2   │ 1     │ 7     │
+│ 3   │ 2     │ 3     │
+│ 4   │ 2     │ 7     │
+│ 5   │ 3     │ 5     │
+│ 6   │ 3     │ 9     │
+│ 7   │ 4     │ 5     │
+│ 8   │ 4     │ 9     │
+
 julia> by(df, :a, d -> sum(d.c)) # Slower variant
 4×2 DataFrame
 │ Row │ a     │ x1    │
@@ -1062,12 +1117,14 @@ julia> by(df, :a, (:b, :c) => x -> (minb = minimum(x.b), sumc = sum(x.c)))
 """
 by(d::AbstractDataFrame, cols::Any, f::Any; sort::Bool = false) =
     combine(f, groupby(d, cols, sort = sort))
+
 by(f::Any, d::AbstractDataFrame, cols::Any; sort::Bool = false) =
     by(d, cols, f, sort = sort)
-by(d::AbstractDataFrame, cols::Any, f::Pair; sort::Bool = false) =
-    combine(f, groupby(d, cols, sort = sort))
-by(d::AbstractDataFrame, cols::Any, f::Pair...; sort::Bool = false) =
-    combine(f, groupby(d, cols, sort = sort))
+
+by(d::AbstractDataFrame, cols::Any, f::Union{Pair, AbstractVector{<:Pair}}...;
+    sort::Bool = false) =
+    combine(reduce(vcat, f), groupby(d, cols, sort = sort))
+
 by(d::AbstractDataFrame, cols::Any; sort::Bool = false, f...) =
     combine(values(f), groupby(d, cols, sort = sort))
 

diff --git a/test/deprecated.jl b/test/deprecated.jl
@@ -50,10 +50,8 @@ end
     end
 end
 
-# deprecated combine
-
-df = DataFrame(a=[1, 1, 2, 2, 2], b=1:5)
-gd = groupby(df, :a)
+df = DataFrame(a=[1, 1, 2, 2, 2], b=1:5)    
+gd = groupby(df, :a)    
 @test combine(gd) == combine(identity, gd)
 
 @testset "categorical constructor" begin

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -626,7 +626,10 @@ end
         by(df, :a, (:b => sum, :c => sum,)) ==
         by(df, :a, [:b => sum, :c => sum]) ==
         by(df, :a, b_sum = :b => sum, c_sum = :c => sum) ==
-        by(d -> (b_sum=sum(d.b), c_sum=sum(d.c)), df, :a)
+        by(d -> (b_sum=sum(d.b), c_sum=sum(d.c)), df, :a) ==
+        by(df, :a, [:b => sum], :c => sum) ==
+        by(df, :a, [:b => sum], [:c => sum]) ==
+        by(df, :a, [:b, :c] .=> sum) ==
         by(df, :a, d -> (b_sum=sum(d.b), c_sum=sum(d.c)))
 
     @test by(df, :a, :b => vexp, :c => identity) ==
@@ -663,6 +666,9 @@ end
     @test combine(gd, :b => sum, :c => sum) ==
         combine(gd, (:b => sum, :c => sum,)) ==
         combine(gd, [:b => sum, :c => sum]) ==
+        combine(gd, [:b => sum], :c => sum) ==
+        combine(gd, [:b => sum], [:c => sum]) ==
+        combine(gd, [:b, :c] .=> sum) ==
         combine(gd, b_sum = :b => sum, c_sum = :c => sum) ==
         combine((:b, :c) => x -> (b_sum=sum(x.b), c_sum=sum(x.c)), gd) ==
         combine(gd, (:b, :c) => x -> (b_sum=sum(x.b), c_sum=sum(x.c))) ==
@@ -765,6 +771,31 @@ end
     end
 end
 
+# Test that multiple tuples in by and combine throw errs
+@testset "Tuple errors" begin
+    df = DataFrame(a = repeat([1, 3, 2, 4], outer=[2]),
+                   b = repeat([2, 1], outer=[4]))
+    gd = groupby(df, :a)
+    @test_throws MethodError combine(gd, (:b => first, ), (:b => last))
+    @test_throws MethodError combine(gd, (:b => first, ), :b => last)
+    @test_throws MethodError combine(gd, (:b => first, ), [:b => last])
+    @test_throws MethodError combine(gd, (:b => first, ), [:b => last], :b => length)
+end
+
+@testset "Symbol argument and typle argument" begin
+    df = DataFrame(a = repeat([1, 3, 2, 4], outer=[2]),
+                   b = repeat([2, 1], outer=[4]))
+    gd = groupby(df, :a)
+
+    function bar(x)
+        first(x.a)
+    end
+
+    @test combine(gd, :b => first, (:a, :b) => bar) ==
+        combine(gd; b_first = :b => first, x2 = :a => first)
+end
+
+
 struct TestType end
 Base.isless(::TestType, ::Int) = true
 Base.isless(::Int, ::TestType) = false