diff --git a/NEWS.md b/NEWS.md
index 0146ba5386..170b6d929c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -12,6 +12,8 @@
   and documented in the manual for `AbstractDataFrame`, `DataFrameRow`,
   `DataFrameRows`, `DataFrameColumns`, `GroupedDataFrame`, `GroupKeys`, and `GroupKey`
   ([#2573](https://github.com/JuliaData/DataFrames.jl/pull/2573))
+* add `subset` and `subset!` functions that allow to subset rows
+  ([#2496](https://github.com/JuliaData/DataFrames.jl/pull/2496))
 
 ## Deprecated
 
diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md
index 128f0ac9e7..d6d327c94c 100644
--- a/docs/src/lib/functions.md
+++ b/docs/src/lib/functions.md
@@ -102,6 +102,8 @@ first
 last
 only
 nonunique
+subset
+subset!
 unique
 unique!
 ```
diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index 64ba636af1..fabfa5ef36 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -12,6 +12,11 @@ df = DataFrame(grp = repeat(1:2, 3), x = 6:-1:1, y = 4:9, z = [3:7; missing], id
 df2 = DataFrame(grp = [1, 3], w = [10, 11])
 ```
 
+Note that in the comparisons presented below predicates like `x -> x >= 1` can
+be more compactly written as `=>(1)`. The latter form has an additional benefit
+that it is compiled only once per Julia session (as opposed to `x -> x >= 1`
+which defines a new anonymous function every time it is introduced).
+
 ## Comparison with the Python package pandas
 
 The following table compares the main functions of DataFrames.jl with the Python package pandas (version 1.1.0):
@@ -204,7 +209,7 @@ df <- tibble(grp = rep(1:2, 3), x = 6:1, y = 4:9,
 | Rename columns           | `rename(df, x_new = x)`        | `rename(df, :x => :x_new)`             |
 | Pick columns             | `select(df, x, y)`             | `select(df, :x, :y)`                   |
 | Pick & transform columns | `transmute(df, mean(x), y)`    | `select(df, :x => mean, :y)`           |
-| Pick rows                | `filter(df, x >= 1)`           | `filter(:x => >=(1), df)`              |
+| Pick rows                | `filter(df, x >= 1)`           | `subset(df, :x => ByRow(x -> x >= 1))` |
 | Sort rows                | `arrange(df, x)`               | `sort(df, :x)`                         |
 
 As in dplyr, some of these functions can be applied to grouped data frames, in which case they operate by group:
@@ -240,7 +245,7 @@ The following table compares the main functions of DataFrames.jl with Stata:
 | Add new columns        | `egen x_mean = mean(x)` | `transform!(df, :x => mean => :x_mean)` |
 | Rename columns         | `rename x x_new`        | `rename!(df, :x => :x_new)`             |
 | Pick columns           | `keep x y`              | `select!(df, :x, :y)`                   |
-| Pick rows              | `keep if x >= 1`        | `filter!(:x => >=(1), df)`              |
+| Pick rows              | `keep if x >= 1`        | `subset!(df, :x => ByRow(x -> x >= 1)`        |
 | Sort rows              | `sort x`                | `sort!(df, :x)`                         |
 
 Note that the suffix `!` (i.e. `transform!`, `select!`, etc) ensures that the operation transforms the dataframe in place, as in Stata
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
index 0715c40a3e..7417259461 100644
--- a/src/DataFrames.jl
+++ b/src/DataFrames.jl
@@ -64,6 +64,8 @@ export AbstractDataFrame,
        select,
        semijoin,
        stack,
+       subset,
+       subset!,
        transform,
        transform!,
        unique!,
@@ -104,6 +106,7 @@ include("dataframerow/utils.jl")
 include("other/broadcasting.jl")
 
 include("abstractdataframe/selection.jl")
+include("abstractdataframe/subset.jl")
 include("abstractdataframe/iteration.jl")
 include("abstractdataframe/join.jl")
 include("abstractdataframe/reshape.jl")
diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
new file mode 100644
index 0000000000..6f0cdfca31
--- /dev/null
+++ b/src/abstractdataframe/subset.jl
@@ -0,0 +1,283 @@
+# subset allows a transformation specification without a target column name or a column
+
+_process_subset_pair(i::Int, a::ColumnIndex) = a => Symbol(:x, i)
+_process_subset_pair(i::Int, @nospecialize(a::Pair{<:Any, <:Base.Callable})) =
+    first(a) => last(a) => Symbol(:x, i)
+_process_subset_pair(i::Int, a) =
+    throw(ArgumentError("condition specifier $a is not supported by `subset`"))
+
+_and() = throw(ArgumentError("at least one condition must be passed"))
+_and(x::Bool) = x
+_and(x::Bool, y::Bool...) = x && _and(y...)
+
+function _and(x::Any...)
+    loc = findfirst(x -> !(x isa Bool), x)
+    # we know x has positive length and must contain non-boolean
+    @assert !isnothing(loc)
+    xv = x[loc]
+    if ismissing(xv)
+        throw(ArgumentError("missing was returned in condition number $loc " *
+                            "but only true or false are allowed; pass " *
+                            "skipmissing=true to skip missing values"))
+    else
+        throw(ArgumentError("value $xv was returned in condition number $loc " *
+                            "but only true or false are allowed"))
+    end
+end
+
+_and_missing() = throw(ArgumentError("at least one condition must be passed"))
+_and_missing(x::Bool) = x
+_and_missing(x::Bool, y::Union{Bool, Missing}...) = x && _and_missing(y...)
+_and_missing(x::Missing, y::Union{Bool, Missing}...) = false
+
+function _and_missing(x::Any...)
+    loc = findfirst(x -> !(x isa Union{Bool, Missing}), x)
+    # we know x has positive length and must contain non-boolean
+    @assert !isnothing(loc)
+    xv = x[loc]
+    throw(ArgumentError("value $xv was returned in condition number $loc" *
+                        "but only true, false, or missing are allowed"))
+end
+
+
+# Note that _get_subset_conditions will have a large compilation time
+# if more than 32 conditions are passed as `args`.
+function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
+                                @nospecialize(args), skipmissing::Bool)
+    conditions = Any[_process_subset_pair(i, a) for (i, a) in enumerate(args)]
+
+    isempty(conditions) && throw(ArgumentError("at least one condition must be passed"))
+
+    if df isa AbstractDataFrame
+        df_conditions = select(df, conditions..., copycols=!(df isa DataFrame))
+    else
+        df_conditions = select(df, conditions...,
+                               copycols=!(parent(df) isa DataFrame), keepkeys=false)
+    end
+
+    @assert ncol(df_conditions) == length(conditions)
+
+    if skipmissing
+        cond = _and_missing.(eachcol(df_conditions)...)
+    else
+        cond = _and.(eachcol(df_conditions)...)
+    end
+
+    @assert eltype(cond) === Bool
+    return cond
+end
+
+"""
+    subset(df::AbstractDataFrame, args...; skipmissing::Bool=false, view::Bool=false)
+    subset(gdf::GroupedDataFrame, args...; skipmissing::Bool=false, view::Bool=false,
+           ungroup::Bool=true)
+
+Return a copy of data frame `df` or parent of `gdf` containing only rows for
+which all values produced by transformation(s) `args` for a given row are `true`.
+
+Each argument passed in `args` can be either a single column selector or a
+`source_columns => function` transformation specifier following the rules
+described for [`select`](@ref).
+
+Note that as opposed to [`filter`](@ref) the `subset` function works on whole
+columns (or all rows in groups for `GroupedDataFrame`).
+
+If `skipmissing=false` (the default) `args` are required to produce vectors
+containing only `Bool` values. If `skipmissing=true`, additionally `missing` is
+allowed and it is treated as `false` (i.e. rows for which one of the conditions
+returns `missing` are skipped).
+
+If `view=true` a `SubDataFrame` view  is returned instead of a `DataFrame`.
+
+If `ungroup=false` the resulting data frame is re-grouped based on the same
+grouping columns as `gdf` and a `GroupedDataFrame` is returned.
+
+If a `GroupedDataFrame` is passed then it must include all groups present in the
+`parent` data frame, like in [`select!`](@ref).
+
+See also: [`subset!`](@ref), [`filter`](@ref), [`select`](@ref)
+
+# Examples
+
+```
+julia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],
+                      z=[true, true, missing, missing], v=[1, 2, 11, 12])
+4×5 DataFrame
+ Row │ id     x      y      z        v
+     │ Int64  Bool   Bool   Bool?    Int64
+─────┼─────────────────────────────────────
+   1 │     1   true   true     true      1
+   2 │     2  false   true     true      2
+   3 │     3   true  false  missing     11
+   4 │     4  false  false  missing     12
+
+julia> subset(df, :x)
+2×5 DataFrame
+ Row │ id     x     y      z        v
+     │ Int64  Bool  Bool   Bool?    Int64
+─────┼────────────────────────────────────
+   1 │     1  true   true     true      1
+   2 │     3  true  false  missing     11
+
+julia> subset(df, :v => x -> x .> 3)
+2×5 DataFrame
+ Row │ id     x      y      z        v
+     │ Int64  Bool   Bool   Bool?    Int64
+─────┼─────────────────────────────────────
+   1 │     3   true  false  missing     11
+   2 │     4  false  false  missing     12
+
+julia> subset(df, :x, :y => ByRow(!))
+1×5 DataFrame
+ Row │ id     x     y      z        v
+     │ Int64  Bool  Bool   Bool?    Int64
+─────┼────────────────────────────────────
+   1 │     3  true  false  missing     11
+
+julia> subset(df, :x, :z, skipmissing=true)
+1×5 DataFrame
+ Row │ id     x     y     z      v
+     │ Int64  Bool  Bool  Bool?  Int64
+─────┼─────────────────────────────────
+   1 │     1  true  true   true      1
+
+julia> subset(df, :x, :z)
+ERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values
+
+julia> subset(groupby(df, :y), :v => x -> x .> minimum(x))
+2×5 DataFrame
+ Row │ id     x      y      z        v
+     │ Int64  Bool   Bool   Bool?    Int64
+─────┼─────────────────────────────────────
+   1 │     2  false   true     true      2
+   2 │     4  false  false  missing     12
+```
+"""
+function subset(df::AbstractDataFrame, @nospecialize(args...);
+                skipmissing::Bool=false, view::Bool=false)
+    row_selector = _get_subset_conditions(df, args, skipmissing)
+    return view ? Base.view(df, row_selector, :) : df[row_selector, :]
+end
+
+function subset(gdf::GroupedDataFrame, @nospecialize(args...);
+                skipmissing::Bool=false, view::Bool=false,
+                        ungroup::Bool=true)
+    row_selector = _get_subset_conditions(gdf, args, skipmissing)
+    df = parent(gdf)
+    res = view ? Base.view(df, row_selector, :) : df[row_selector, :]
+    # TODO: in some cases it might be faster to groupby gdf.groups[row_selector]
+    return ungroup ? res : groupby(res, groupcols(gdf))
+end
+
+"""
+    subset!(df::AbstractDataFrame, args...; skipmissing::Bool=false)
+    subset!(gdf::GroupedDataFrame{DataFrame}, args..., skipmissing::Bool=false,
+            ungroup::Bool=true)
+
+Update data frame `df` or the parent of `gdf` in place to contain only rows for
+which all values produced by transformation(s) `args` for a given row is `true`.
+
+Each argument passed in `args` can be either a single column selector or a
+`source_columns => function` transformation specifier following the rules
+described for [`select`](@ref).
+
+Note that as opposed to [`filter!`](@ref) the `subset!` function works on whole
+columns (or all rows in groups for `GroupedDataFrame`).
+
+If `skipmissing=false` (the default) `args` are required to produce vectors
+containing only `Bool` values. If `skipmissing=true`, additionally `missing` is
+allowed and it is treated as `false` (i.e. rows for which one of the conditions
+returns `missing` are skipped).
+
+If `ungroup=false` the resulting data frame is re-grouped based on the same
+grouping columns as `gdf` and a `GroupedDataFrame` is returned.
+
+If `GroupedDataFrame` is subsetted then it must include all groups present in the
+`parent` data frame, like in [`select!`](@ref).
+
+See also: [`subset`](@ref), [`filter!`](@ref), [`select!`](@ref)
+
+# Examples
+
+```
+julia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false])
+4×3 DataFrame
+ Row │ id     x      y
+     │ Int64  Bool   Bool
+─────┼─────────────────────
+   1 │     1   true   true
+   2 │     2  false   true
+   3 │     3   true  false
+   4 │     4  false  false
+
+julia> subset!(df, :x, :y => ByRow(!));
+
+julia> df
+1×3 DataFrame
+ Row │ id     x     y
+     │ Int64  Bool  Bool
+─────┼────────────────────
+   1 │     3  true  false
+
+julia> df = DataFrame(id=1:4, y=[true, true, false, false], v=[1, 2, 11, 12]);
+
+julia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));
+
+julia> df
+2×3 DataFrame
+ Row │ id     y      v
+     │ Int64  Bool   Int64
+─────┼─────────────────────
+   1 │     2   true      2
+   2 │     4  false     12
+
+julia> df = DataFrame(id=1:4, x=[true, false, true, false],
+                      z=[true, true, missing, missing], v=1:4)
+4×4 DataFrame
+ Row │ id     x      z        v
+     │ Int64  Bool   Bool?    Int64
+─────┼──────────────────────────────
+   1 │     1   true     true      1
+   2 │     2  false     true      2
+   3 │     3   true  missing      3
+   4 │     4  false  missing      4
+
+julia> subset!(df, :x, :z)
+ERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values
+
+julia> subset!(df, :x, :z, skipmissing=true);
+
+julia> df
+1×4 DataFrame
+ Row │ id     x     z      v
+     │ Int64  Bool  Bool?  Int64
+─────┼───────────────────────────
+   1 │     1  true   true      1
+
+julia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],
+                      z=[true, true, missing, missing], v=[1, 2, 11, 12]);
+
+julia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));
+
+julia> df
+2×5 DataFrame
+ Row │ id     x      y      z        v
+     │ Int64  Bool   Bool   Bool?    Int64
+─────┼─────────────────────────────────────
+   1 │     2  false   true     true      2
+   2 │     4  false  false  missing     12
+```
+"""
+function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false)
+    row_selector = _get_subset_conditions(df, args, skipmissing)
+    return delete!(df, findall(!, row_selector))
+end
+
+function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false,
+                 ungroup::Bool=true)
+    row_selector = _get_subset_conditions(gdf, args, skipmissing)
+    df = parent(gdf)
+    res = delete!(df, findall(!, row_selector))
+    # TODO: in some cases it might be faster to groupby gdf.groups[row_selector]
+    return ungroup ? res : groupby(res, groupcols(gdf))
+end
diff --git a/test/grouping.jl b/test/grouping.jl
index a2df4868ae..cfa3ae2a25 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -3272,4 +3272,245 @@ end
     @test df == df2
 end
 
+@testset "subset and subset!" begin
+    refdf = DataFrame(x = repeat(Any[true, false], 4),
+                      y = repeat([true, false, missing, missing], 2),
+                      z = repeat([1, 2, 3, 3], 2),
+                      id = 1:8)
+
+    for df in (copy(refdf), @view copy(refdf)[1:end-1, :])
+        df2 = copy(df)
+        @test subset(df, :x) ≅ filter(:x => identity, df)
+        @test df ≅ df2
+        @test subset(df, :x) isa DataFrame
+        @test subset(df, :x, view=true) ≅ filter(:x => identity, df)
+        @test subset(df, :x, view=true) isa SubDataFrame
+        @test_throws ArgumentError subset(df, :y)
+        @test_throws ArgumentError subset(df, :y, :x)
+        @test subset(df, :y, skipmissing=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(df, :y, skipmissing=true, view=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(df, :y, :y, skipmissing=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(df, :y, :y, skipmissing=true, view=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(df, :x, :y, skipmissing=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(df, :y, :x, skipmissing=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(df, :x, :y, skipmissing=true, view=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(df, :x, :y, :id => ByRow(<(4)), skipmissing=true) ≅
+              filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, df)
+        @test subset(df, :x, :y, :id => ByRow(<(4)), skipmissing=true, view=true) ≅
+              filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, df)
+        @test subset(df, :x, :id => ByRow(<(4))) ≅
+              filter([:x, :id] => (x, id) -> x && id < 4, df)
+        @test subset(df, :x, :id => ByRow(<(4)), view=true) ≅
+              filter([:x, :id] => (x, id) -> x && id < 4, df)
+        @test_throws ArgumentError subset(df)
+        @test isempty(subset(df, :x, :x => ByRow(!)))
+        @test_throws ArgumentError subset(df, :x => x -> false, :x => x -> missing)
+        @test_throws ArgumentError subset(df, :x => x -> true, :x => x -> missing)
+        @test_throws ArgumentError subset(df, :x => x -> true, :x => x -> 2)
+    end
+
+    for df in (copy(refdf), @view copy(refdf)[1:end-1, :]),
+        gdf in (groupby_checked(df, :z), groupby_checked(df, :z)[[3, 2, 1]])
+        df2 = copy(df)
+        @test subset(gdf, :x) ≅ filter(:x => identity, df)
+        @test df ≅ df2
+        @test subset(gdf, :x) isa DataFrame
+        @test subset(gdf, :x, ungroup=false) ≅
+              groupby_checked(filter(:x => identity, df), :z)
+        @test subset(gdf, :x, ungroup=false) isa GroupedDataFrame{DataFrame}
+        @test subset(gdf, :x, view=true) ≅ filter(:x => identity, df)
+        @test subset(gdf, :x, view=true) isa SubDataFrame
+        @test subset(gdf, :x, view=true, ungroup=false) ≅
+              groupby_checked(filter(:x => identity, df), :z)
+        @test subset(gdf, :x, view=true, ungroup=false) isa GroupedDataFrame{<:SubDataFrame}
+        @test_throws ArgumentError subset(gdf, :y)
+        @test_throws ArgumentError subset(gdf, :y, :x)
+        @test subset(gdf, :y, skipmissing=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(gdf, :y, skipmissing=true, view=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(gdf, :y, :y, skipmissing=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(gdf, :y, :y, skipmissing=true, view=true) ≅ filter(:y => x -> x === true, df)
+        @test subset(gdf, :x, :y, skipmissing=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(gdf, :y, :x, skipmissing=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(gdf, :x, :y, skipmissing=true, view=true) ≅
+              filter([:x, :y] => (x, y) -> x && y === true, df)
+        @test subset(gdf, :x, :y, :id => ByRow(<(4)), skipmissing=true) ≅
+              filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, df)
+        @test subset(gdf, :x, :y, :id => ByRow(<(4)), skipmissing=true, view=true) ≅
+              filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, df)
+        @test subset(gdf, :x, :id => ByRow(<(4))) ≅
+              filter([:x, :id] => (x, id) -> x && id < 4, df)
+        @test subset(gdf, :x, :id => ByRow(<(4)), view=true) ≅
+              filter([:x, :id] => (x, id) -> x && id < 4, df)
+        @test_throws ArgumentError subset(gdf)
+        @test isempty(subset(gdf, :x, :x => ByRow(!)))
+        @test_throws ArgumentError subset(gdf, :x => x -> false, :x => x -> missing)
+        @test_throws ArgumentError subset(gdf, :x => x -> true, :x => x -> missing)
+        @test_throws ArgumentError subset(gdf, :x => x -> true, :x => x -> 2)
+    end
+
+    df = copy(refdf)
+    @test subset!(df, :x) === df
+    @test subset!(df, :x) ≅ df ≅ filter(:x => identity, refdf)
+    df = copy(refdf)
+    @test_throws ArgumentError subset!(df, :y)
+    @test df ≅ refdf
+    df = copy(refdf)
+    @test subset!(df, :y, skipmissing=true) === df
+    @test subset!(df, :y, skipmissing=true) ≅ df ≅ filter(:y => x -> x === true, refdf)
+    df = copy(refdf)
+    @test subset!(df, :x, :y, skipmissing=true) === df
+    @test subset!(df, :x, :y, skipmissing=true) ≅ df ≅
+          filter([:x, :y] => (x, y) -> x && y === true, refdf)
+    df = copy(refdf)
+    @test subset!(df, :x, :y, :id => ByRow(<(4)), skipmissing=true) ≅ df ≅
+          filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, refdf)
+    df = copy(refdf)
+    @test subset!(df, :x, :id => ByRow(<(4))) ≅ df ≅
+          filter([:x, :id] => (x, id) -> x && id < 4, refdf)
+    df = copy(refdf)
+    @test_throws ArgumentError subset!(df)
+    df = copy(refdf)
+    @test isempty(subset!(df, :x, :x => ByRow(!)))
+    @test isempty(df)
+
+    df = copy(refdf)
+    @test_throws ArgumentError subset!(df, :x => x -> false, :x => x -> missing)
+    @test_throws ArgumentError subset!(df, :x => x -> true, :x => x -> missing)
+    @test_throws ArgumentError subset!(df, :x => x -> true, :x => x -> 2)
+
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x) === df
+
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    gdf2 = subset!(gdf, :x, ungroup=false)
+    @test gdf2 isa GroupedDataFrame{DataFrame}
+    @test parent(gdf2) === df
+    @test gdf2 ≅ groupby_checked(df, :z) ≅ groupby_checked(filter(:x => identity, refdf), :z)
+
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x) ≅ df ≅ filter(:x => identity, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test_throws ArgumentError subset!(gdf, :y)
+    @test df ≅ refdf
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :y, skipmissing=true) === df
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :y, skipmissing=true) ≅ df ≅ filter(:y => x -> x === true, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x, :y, skipmissing=true) === df
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x, :y, skipmissing=true) ≅ df ≅
+          filter([:x, :y] => (x, y) -> x && y === true, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x, :y, :id => ByRow(<(4)), skipmissing=true) ≅ df ≅
+          filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test subset!(gdf, :x, :id => ByRow(<(4))) ≅ df ≅
+          filter([:x, :id] => (x, id) -> x && id < 4, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test_throws ArgumentError subset!(gdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test isempty(subset!(gdf, :x, :x => ByRow(!)))
+    @test isempty(df)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)
+    @test_throws ArgumentError subset!(gdf, :x => x -> false, :x => x -> missing)
+    @test_throws ArgumentError subset!(gdf, :x => x -> true, :x => x -> missing)
+    @test_throws ArgumentError subset!(gdf, :x => x -> true, :x => x -> 2)
+
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test subset!(gdf, :x) ≅ df ≅ filter(:x => identity, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test_throws ArgumentError subset!(gdf, :y)
+    @test df ≅ refdf
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test subset!(gdf, :y, skipmissing=true) ≅ df ≅ filter(:y => x -> x === true, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test subset!(gdf, :x, :y, skipmissing=true) ≅ df ≅
+          filter([:x, :y] => (x, y) -> x && y === true, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test subset!(gdf, :x, :y, :id => ByRow(<(4)), skipmissing=true) ≅ df ≅
+          filter([:x, :y, :id] => (x, y, id) -> x && y === true && id < 4, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test subset!(gdf, :x, :id => ByRow(<(4))) ≅ df ≅
+          filter([:x, :id] => (x, id) -> x && id < 4, refdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test_throws ArgumentError subset!(gdf)
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test isempty(subset!(gdf, :x, :x => ByRow(!)))
+    @test isempty(df)
+
+    df = copy(refdf)
+    gdf = groupby_checked(df, :z)[[3, 2, 1]]
+    @test_throws ArgumentError subset!(gdf, :x => x -> false, :x => x -> missing)
+    @test_throws ArgumentError subset!(gdf, :x => x -> true, :x => x -> missing)
+    @test_throws ArgumentError subset!(gdf, :x => x -> true, :x => x -> 2)
+
+    @test_throws ArgumentError subset!(view(refdf, :, :), :x)
+    @test_throws ArgumentError subset!(groupby_checked(view(refdf, :, :), :z), :x)
+
+    df = DataFrame(g=[2, 2, 1, 1, 1, 1, 3, 3, 3], x = 1:9)
+    @test subset(df, :x => x -> x .< mean(x)) == DataFrame(g=[2, 2, 1, 1], x = 1:4)
+    @test subset(groupby_checked(df, :g), :x => x -> x .< mean(x)) ==
+          DataFrame(g=[2, 1, 1, 3], x=[1, 3, 4, 7])
+
+    @test_throws ArgumentError subset(df, :x => x -> missing)
+    @test isempty(subset(df, :x => x -> missing, skipmissing=true))
+    @test isempty(subset(df, :x => x -> false))
+    @test subset(df, :x => x -> true) ≅ df
+    @test_throws ArgumentError subset(df, :x => x -> (a=x,))
+    @test_throws ArgumentError subset(df, :x => (x -> (a=x,)) => AsTable)
+
+    @test_throws ArgumentError subset(DataFrame(x=false, y=missing), :x, :y)
+    @test_throws ArgumentError subset(DataFrame(x=missing, y=false), :x, :y)
+    @test_throws ArgumentError subset(DataFrame(x=missing, y=false), :x)
+    @test_throws ArgumentError subset(DataFrame(x=false, y=missing), :y)
+    @test_throws ArgumentError subset(DataFrame(x=false, y=1), :x, :y)
+    @test_throws ArgumentError subset(DataFrame(x=1, y=false), :x, :y)
+    @test_throws ArgumentError subset(DataFrame(x=1, y=false), :y, :x)
+    @test_throws ArgumentError subset(DataFrame(x=false, y=1), :y)
+
+    @test_throws ArgumentError subset(DataFrame(x=false, y=1), :x, :y, skipmissing=true)
+    @test_throws ArgumentError subset(DataFrame(x=1, y=false), :x, :y, skipmissing=true)
+    @test_throws ArgumentError subset(DataFrame(x=1, y=false), :y, :x, skipmissing=true)
+    @test_throws ArgumentError subset(DataFrame(x=false, y=1), :y, skipmissing=true)
+
+    @test_throws ArgumentError DataFrames._and()
+    @test_throws ArgumentError DataFrames._and_missing()
+end
+
+@testset "make sure we handle idx correctly when groups are reordered" begin
+    df = DataFrame(g=[2, 2, 1, 1, 1], id = 1:5)
+    @test select(df, :g, :id, :id => ByRow(identity) => :id2) ==
+          select(groupby_checked(df, :g), :id, :id => ByRow(identity) => :id2) ==
+          select(groupby_checked(df, :g, sort=true), :id, :id => ByRow(identity) => :id2) ==
+          select(groupby_checked(df, :g)[[2,1]], :id, :id => ByRow(identity) => :id2) ==
+          [df DataFrame(id2=df.id)]
+end
+
 end # module