Skip to content

Commit

Permalink
Merge branch 'master' into i#207-withColumn-arity22
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierBlanvillain authored Nov 21, 2017
2 parents 43a274a + 2709c1d commit 604e472
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 1 deletion.
47 changes: 46 additions & 1 deletion dataset/src/main/scala/frameless/TypedDataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import org.apache.spark.sql._
import shapeless._
import shapeless.labelled.FieldType
import shapeless.ops.hlist.{Diff, IsHCons, Prepend, ToTraversable, Tupler}
import shapeless.ops.record.Keys
import shapeless.ops.record.{Remover, Values, Keys}

/** [[TypedDataset]] is a safer interface for working with `Dataset`.
*
Expand Down Expand Up @@ -607,6 +607,51 @@ class TypedDataset[T] protected[frameless](val dataset: Dataset[T])(implicit val
}
}

/**
* Returns a new Dataset as a tuple with the specified
* column dropped.
* Does not allow for dropping from a single column TypedDataset
*
* {{{
* val d: TypedDataset[Foo(a: String, b: Int...)] = ???
* val result = TypedDataset[(Int, ...)] = d.drop('a)
* }}}
* @param column column to drop specified as a Symbol
* @param genOfT LabelledGeneric derived for T
* @param remover Remover derived for TRep and column
* @param values values of T with column removed
* @param tupler tupler of values
* @param encoder evidence of encoder of the tupled values
* @tparam Out Tupled return type
* @tparam TRep shapeless' record representation of T
* @tparam Removed record of T with column removed
* @tparam ValuesFromRemoved values of T with column removed as an HList
* @tparam V value type of column in T
* @return
*/
def drop[
Out,
TRep <: HList,
Removed <: HList,
ValuesFromRemoved <: HList,
V
](
column: Witness.Lt[Symbol]
)(implicit
genOfT: LabelledGeneric.Aux[T, TRep],
remover: Remover.Aux[TRep, column.T, (V, Removed)],
values: Values.Aux[Removed, ValuesFromRemoved],
tupler: Tupler.Aux[ValuesFromRemoved, Out],
encoder: TypedEncoder[Out]
): TypedDataset[Out] = {
val dropped = dataset
.toDF()
.drop(column.value.name)
.as[Out](TypedExpressionEncoder[Out])

TypedDataset.create[Out](dropped)
}

/** Prepends a new column to the Dataset.
*
* {{{
Expand Down
69 changes: 69 additions & 0 deletions dataset/src/test/scala/frameless/DropTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package frameless

import org.scalacheck.Prop
import org.scalacheck.Prop._

class DropTest extends TypedDatasetSuite {
test("drop five columns") {
def prop[A: TypedEncoder](value: A): Prop = {
val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil)
val d4 = d5.drop('a) //drops first column
val d3 = d4.drop('_4) //drops last column
val d2 = d3.drop('_2) //drops middle column
val d1 = d2.drop('_2)

Tuple1(value) ?= d1.collect().run().head
}

check(prop[Int] _)
check(prop[Long] _)
check(prop[String] _)
check(prop[SQLDate] _)
check(prop[Option[X1[Boolean]]] _)
}

test("drop first column") {
def prop[A: TypedEncoder](value: A): Prop = {
val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
val d2 = d3.drop('a)

(value, value) ?= d2.collect().run().head
}

check(prop[Int] _)
check(prop[Long] _)
check(prop[String] _)
check(prop[SQLDate] _)
check(prop[Option[X1[Boolean]]] _)
}

test("drop middle column") {
def prop[A: TypedEncoder](value: A): Prop = {
val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
val d2 = d3.drop('b)

(value, value) ?= d2.collect().run().head
}

check(prop[Int] _)
check(prop[Long] _)
check(prop[String] _)
check(prop[SQLDate] _)
check(prop[Option[X1[Boolean]]] _)
}

test("drop last column") {
def prop[A: TypedEncoder](value: A): Prop = {
val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
val d2 = d3.drop('c)

(value, value) ?= d2.collect().run().head
}

check(prop[Int] _)
check(prop[Long] _)
check(prop[String] _)
check(prop[SQLDate] _)
check(prop[Option[X1[Boolean]]] _)
}
}

0 comments on commit 604e472

Please sign in to comment.