Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add several parameters related to dart boosting type #1045

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ trait LightGBMBase[TrainedModel <: Model[TrainedModel]] extends Estimator[Traine
}
}

protected def getDartParams(): DartModeParams = {
DartModeParams(getDropRate, getMaxDrop, getSkipDrop, getXGBoostDartMode, getUniformDrop)
}

/**
* Inner train method for LightGBM learners. Calculates the number of workers,
* creates a driver thread, and runs mapPartitions on the dataset.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class LightGBMClassifier(override val uid: String)
getIsUnbalance, getVerbosity, categoricalIndexes, actualNumClasses, getBoostFromAverage,
getBoostingType, getLambdaL1, getLambdaL2, getIsProvideTrainingMetric,
getMetric, getMinGainToSplit, getMaxDeltaStep, getMaxBinByFeature, getMinDataInLeaf, getSlotNames,
getDelegate, getChunkSize)
getDelegate, getChunkSize, getDartParams())
}

def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMClassificationModel = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,45 @@ trait LightGBMBinParams extends Wrappable {
def setBinSampleCount(value: Int): this.type = set(binSampleCount, value)
}

/** Defines parameters for dart mode across all LightGBM learners.
*/
trait LightGBMDartParams extends Wrappable {
val dropRate = new DoubleParam(this, "dropRate",
"Dropout rate: a fraction of previous trees to drop during the dropout")
setDefault(dropRate -> 0.1)

def getDropRate: Double = $(dropRate)
def setDropRate(value: Double): this.type = set(dropRate, value)

val maxDrop = new IntParam(this, "maxDrop",
"Max number of dropped trees during one boosting iteration")
setDefault(maxDrop -> 50)

def getMaxDrop: Int = $(maxDrop)
def setMaxDrop(value: Int): this.type = set(maxDrop, value)

val skipDrop = new DoubleParam(this, "skipDrop",
"Probability of skipping the dropout procedure during a boosting iteration")
setDefault(skipDrop -> 0.5)

def getSkipDrop: Double = $(skipDrop)
def setSkipDrop(value: Double): this.type = set(skipDrop, value)

val xgboostDartMode = new BooleanParam(this, "xgboostDartMode",
"Set this to true to use xgboost dart mode")
setDefault(xgboostDartMode -> false)

def getXGBoostDartMode: Boolean = $(xgboostDartMode)
def setXGBoostDartMode(value: Boolean): this.type = set(xgboostDartMode, value)

val uniformDrop = new BooleanParam(this, "uniformDrop",
"Set this to true to use uniform drop in dart mode")
setDefault(uniformDrop -> false)

def getUniformDrop: Boolean = $(uniformDrop)
def setUniformDrop(value: Boolean): this.type = set(uniformDrop, value)
}

/** Defines parameters for slots across all LightGBM learners.
*/
trait LightGBMSlotParams extends Wrappable {
Expand Down Expand Up @@ -231,7 +270,7 @@ trait LightGBMModelParams extends Wrappable {
trait LightGBMParams extends Wrappable with DefaultParamsWritable with HasWeightCol
with HasValidationIndicatorCol with HasInitScoreCol with LightGBMExecutionParams
with LightGBMSlotParams with LightGBMFractionParams with LightGBMBinParams with LightGBMLearnerParams
with LightGBMPredictionParams {
with LightGBMDartParams with LightGBMPredictionParams {
val numIterations = new IntParam(this, "numIterations",
"Number of iterations, LightGBM constructs num_class * num_iterations trees")
setDefault(numIterations->100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class LightGBMRanker(override val uid: String)
getFeatureFraction, getMaxDepth, getMinSumHessianInLeaf, numTasks, modelStr,
getVerbosity, categoricalIndexes, getBoostingType, getLambdaL1, getLambdaL2, getMaxPosition, getLabelGain,
getIsProvideTrainingMetric, getMetric, getEvalAt, getMinGainToSplit, getMaxDeltaStep,
getMaxBinByFeature, getMinDataInLeaf, getSlotNames, getDelegate, getChunkSize)
getMaxBinByFeature, getMinDataInLeaf, getSlotNames, getDelegate, getChunkSize, getDartParams())
}

def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMRankerModel = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class LightGBMRegressor(override val uid: String)
getEarlyStoppingRound, getImprovementTolerance, getFeatureFraction, getMaxDepth, getMinSumHessianInLeaf,
numTasks, modelStr, getVerbosity, categoricalIndexes, getBoostFromAverage, getBoostingType, getLambdaL1,
getLambdaL2, getIsProvideTrainingMetric, getMetric, getMinGainToSplit, getMaxDeltaStep,
getMaxBinByFeature, getMinDataInLeaf, getSlotNames, getDelegate, getChunkSize)
getMaxBinByFeature, getMinDataInLeaf, getSlotNames, getDelegate, getChunkSize, getDartParams())
}

def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMRegressionModel = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ abstract class TrainParams extends Serializable {
def featureNames: Array[String]
def delegate: Option[LightGBMDelegate]
def chunkSize: Int
def dartModeParams: DartModeParams

override def toString: String = {
// Since passing `isProvideTrainingMetric` to LightGBM as a config parameter won't work,
Expand All @@ -51,10 +52,11 @@ abstract class TrainParams extends Serializable {
s"bagging_seed=$baggingSeed early_stopping_round=$earlyStoppingRound " +
s"feature_fraction=$featureFraction max_depth=$maxDepth min_sum_hessian_in_leaf=$minSumHessianInLeaf " +
s"num_machines=$numMachines objective=$objective verbosity=$verbosity " +
s"lambda_l1=$lambdaL1 lambda_l2=$lambdaL2 metric=$metric min_gain_to_split=$minGainToSplit " +
s"lambda_l1=$lambdaL1 lambda_l2=$lambdaL2 metric=$metric min_gain_to_split=$minGainToSplit " +
s"max_delta_step=$maxDeltaStep min_data_in_leaf=$minDataInLeaf " +
(if (categoricalFeatures.isEmpty) "" else s"categorical_feature=${categoricalFeatures.mkString(",")} ") +
(if (maxBinByFeature.isEmpty) "" else s"max_bin_by_feature=${maxBinByFeature.mkString(",")}")
(if (maxBinByFeature.isEmpty) "" else s"max_bin_by_feature=${maxBinByFeature.mkString(",")} ") +
(if (boostingType == "dart") s"${dartModeParams.toString()}" else "")
}
}

Expand All @@ -73,7 +75,7 @@ case class ClassifierTrainParams(parallelism: String, topK: Int, numIterations:
isProvideTrainingMetric: Boolean, metric: String, minGainToSplit: Double,
maxDeltaStep: Double, maxBinByFeature: Array[Int], minDataInLeaf: Int,
featureNames: Array[String], delegate: Option[LightGBMDelegate],
chunkSize: Int)
chunkSize: Int, dartModeParams: DartModeParams)
extends TrainParams {
override def toString(): String = {
val extraStr =
Expand All @@ -98,7 +100,7 @@ case class RegressorTrainParams(parallelism: String, topK: Int, numIterations: I
isProvideTrainingMetric: Boolean, metric: String, minGainToSplit: Double,
maxDeltaStep: Double, maxBinByFeature: Array[Int], minDataInLeaf: Int,
featureNames: Array[String], delegate: Option[LightGBMDelegate],
chunkSize: Int)
chunkSize: Int, dartModeParams: DartModeParams)
extends TrainParams {
override def toString(): String = {
s"alpha=$alpha tweedie_variance_power=$tweedieVariancePower boost_from_average=${boostFromAverage.toString} " +
Expand All @@ -120,7 +122,7 @@ case class RankerTrainParams(parallelism: String, topK: Int, numIterations: Int,
metric: String, evalAt: Array[Int], minGainToSplit: Double,
maxDeltaStep: Double, maxBinByFeature: Array[Int], minDataInLeaf: Int,
featureNames: Array[String], delegate: Option[LightGBMDelegate],
chunkSize: Int)
chunkSize: Int, dartModeParams: DartModeParams)
extends TrainParams {
override def toString(): String = {
val labelGainStr =
Expand All @@ -130,3 +132,13 @@ case class RankerTrainParams(parallelism: String, topK: Int, numIterations: Int,
s"max_position=$maxPosition $labelGainStr $evalAtStr ${super.toString()}"
}
}

/** Defines the dart mode parameters passed to the LightGBM learners.
*/
case class DartModeParams(dropRate: Double, maxDrop: Int, skipDrop: Double,
xgboostDartMode: Boolean, uniformDrop: Boolean) extends Serializable {
override def toString(): String = {
s"drop_rate=$dropRate max_drop=$maxDrop skip_drop=$skipDrop xgboost_dart_mode=$xgboostDartMode " +
s"uniform_drop=$uniformDrop "
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,20 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
assertBinaryImprovement(scoredDF1, scoredDF2)
}

test("Verify LightGBM Classifier with dart mode parameters") {
// Assert the dart parameters work without failing and setting them to tuned values improves performance
val Array(train, test) = pimaDF.randomSplit(Array(0.8, 0.2), seed)
val scoredDF1 = baseModel.setBoostingType("dart").fit(train).transform(test)
val scoredDF2 = baseModel.setBoostingType("dart")
.setXGBoostDartMode(true)
.setDropRate(0.6)
.setMaxDrop(60)
.setSkipDrop(0.6)
.setUniformDrop(true)
.fit(train).transform(test)
assertBinaryImprovement(scoredDF1, scoredDF2)
}

test("Verify LightGBM Classifier with num tasks parameter") {
val numTasks = Array(0, 1, 2)
numTasks.foreach(nTasks => assertFitWithoutErrors(baseModel.setNumTasks(nTasks), pimaDF))
Expand Down