From 1af3f3e52c7977e7ae9aecc3eed1107aea9416b4 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 12 Apr 2021 13:09:13 +0300
Subject: [PATCH 01/26] Added base for minimal variance sampling booster

---
 include/LightGBM/config.h |   8 +++
 src/boosting/boosting.cpp |   3 +
 src/boosting/mvs.cpp      | 128 ++++++++++++++++++++++++++++++++++++++
 src/boosting/mvs.hpp      |  99 +++++++++++++++++++++++++++++
 4 files changed, 238 insertions(+)
 create mode 100644 src/boosting/mvs.cpp
 create mode 100644 src/boosting/mvs.hpp
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 73696cdb88f8..886195b81a92 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -317,6 +317,14 @@ struct Config {
   // desc = **Note**: if balanced bagging is enabled, ``bagging_fraction`` will be ignored
   double neg_bagging_fraction = 1.0;
 
+  // check = >0.0
+  // check = <=1.0
+  // desc = used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
+  double mvs_lambda = 1e-2;
+
+  // desc = use adaptive variant of mvs boosting
+  bool mvs_adaptive = false;
+
   // alias = subsample_freq
   // desc = frequency for bagging
   // desc = ``0`` means disable bagging; ``k`` means perform bagging at every ``k`` iteration. Every ``k``-th iteration, LightGBM will randomly select ``bagging_fraction * 100 %`` of the data to use for the next ``k`` iterations
diff --git a/src/boosting/boosting.cpp b/src/boosting/boosting.cpp
index 91fa318a0f18..efadfe888070 100644
--- a/src/boosting/boosting.cpp
+++ b/src/boosting/boosting.cpp
@@ -8,6 +8,7 @@
 #include "gbdt.h"
 #include "goss.hpp"
 #include "rf.hpp"
+#include "mvs.hpp"
 
 namespace LightGBM {
 
@@ -42,6 +43,8 @@ Boosting* Boosting::CreateBoosting(const std::string& type, const char* filename
       return new GOSS();
     } else if (type == std::string("rf")) {
       return new RF();
+    } else if (type == std::string("mvs")) {
+      return new MVS();
     } else {
       return nullptr;
     }
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
new file mode 100644
index 000000000000..d6ae84923e31
--- /dev/null
+++ b/src/boosting/mvs.cpp
@@ -0,0 +1,128 @@
+//
+// Created by archer on 11.04.2021.
+//
+
+#include "mvs.hpp"
+
+namespace LightGBM {
+
+static score_t CalculateThreshold(std::vector<score_t> grad_values_copy, double sample_size, data_size_t* big_grad_cnt) {
+  std::vector<score_t> *grad_values = &grad_values_copy;
+  double sum_low = 0.;
+  size_t n_high = 0;
+  int begin = 0;
+  int end = static_cast<int>(grad_values->size());
+
+  while (begin != end) {
+    // TODO do partition in parallel
+    // TODO partition to three parts
+    int middle_begin, middle_end;
+    ArrayArgs<score_t>::Partition(grad_values, begin, end, &middle_begin, &middle_end);
+
+    const size_t n_middle = middle_end - middle_begin;
+    const size_t n_right = end - middle_end;
+    const score_t pivot = (*grad_values)[middle_begin];
+
+    // TODO do sum in parallel
+    double cur_left_sum = std::accumulate(&grad_values->at(begin), &grad_values->at(middle_begin), 0.0);
+    double sum_middle = n_middle * pivot;
+
+    double cur_sampling_rate = (sum_low + cur_left_sum) / pivot + n_right + n_middle + n_high;
+
+    if (cur_sampling_rate > sample_size) {
+      sum_low += sum_middle + cur_left_sum;
+      begin = middle_end;
+    } else {
+      n_high += n_right + n_middle;
+      end = middle_begin;
+    }
+  }
+  *big_grad_cnt = n_high;
+  return sum_low / (sample_size - n_high + MVS::kMVSEps);
+}
+
+static double ComputeLeavesMeanSquaredValue(const Tree &tree) {
+  // TODO sum over leaves are leave values one dimensional
+  // TODO sum using openmp
+  double sum_values = 0.0;
+  for (int i = 0; i < tree.num_leaves(); ++i) {
+    const auto output = tree.LeafOutput(i);
+    sum_values += output * output;
+  }
+  return std::sqrt(sum_values / tree.num_leaves());
+}
+
+void MVS::ResetMVS() {
+  CHECK(config_->bagging_fraction > 0.0f && config_->bagging_fraction < 1.0f && config_->bagging_freq > 0);
+  CHECK(config_->mvs_lambda > 0.0f && config_->mvs_lambda < 1.0f);
+  CHECK(!balanced_bagging_);
+  const auto sample_size = static_cast<size_t>(config_->bagging_fraction * num_data_);
+  CHECK_EQ(sample_size, bag_data_indices_.size());
+  Log::Info("Using MVS");
+
+}
+
+double MVS::GetLambda() {
+  double lambda = ComputeLeavesMeanSquaredValue(*models_.back());
+  return lambda;
+}
+
+void MVS::Bagging(int iter) {
+  bag_data_cnt_ = num_data_;
+  if (mvs_adaptive_) {
+    mvs_lambda_ = GetLambda();
+  }
+
+  auto left_cnt = bagging_runner_.Run<true>(
+      num_data_,
+      [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t *left,
+          data_size_t *) {
+        data_size_t cur_left_cout = BaggingHelper(cur_start, cur_cnt, left);
+        return cur_left_cout;
+      },
+      bag_data_indices_.data());
+  bag_data_cnt_ = left_cnt;
+  if (!is_use_subset_) {
+    tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
+  } else {
+    tmp_subset_->ReSize(bag_data_cnt_);
+    tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
+                            bag_data_cnt_, false);
+    tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
+                                  bag_data_cnt_);
+  }
+}
+
+data_size_t MVS::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *buffer) {
+  if (cnt <= 0) {
+    return 0;
+  }
+
+  std::vector<score_t> tmp_derivatives(cnt, 0.0f);
+  for (data_size_t i = 0; i < cnt; ++i) {
+    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+      size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start * i;
+      tmp_derivatives[i] += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
+    }
+    tmp_derivatives[i] = std::sqrt(tmp_derivatives[i]);
+  }
+
+  auto sample_rate = static_cast<data_size_t>(cnt * config_->bagging_fraction);
+  data_size_t big_grad_cnt = 0;
+  const auto threshold = CalculateThreshold(tmp_derivatives, static_cast<double>(sample_rate), &big_grad_cnt);
+  data_size_t left_cnt = 0;
+  data_size_t big_weight_cnt = 0;
+  for (data_size_t i = 0; i < cnt; ++i) {
+    auto position = start + i;
+    if (tmp_derivatives[i] > threshold) {
+      buffer[left_cnt++] = position;
+      ++big_weight_cnt;
+    } else {
+      double proba_threshold = tmp_derivatives[i] / threshold;
+      data_size_t sampled = left_cnt - big_weight_cnt;
+      data_size_t  rest_needed = ;
+    }
+  }
+}
+
+}  // namspace LightGBM
\ No newline at end of file
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
new file mode 100644
index 000000000000..781f01fcd36b
--- /dev/null
+++ b/src/boosting/mvs.hpp
@@ -0,0 +1,99 @@
+/*!
+ * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+#ifndef LIGHTGBM_BOOSTING_MVSB_H_
+#define LIGHTGBM_BOOSTING_MVSB_H_
+
+#include <LightGBM/boosting.h>
+#include <LightGBM/utils/array_args.h>
+#include <LightGBM/utils/log.h>
+
+#include <string>
+#include <algorithm>
+#include <chrono>
+#include <cstdio>
+#include <cstdint>
+#include <fstream>
+#include <vector>
+
+#include "gbdt.h"
+#include "score_updater.hpp"
+
+namespace LightGBM {
+
+class MVS : public GBDT {
+ public:
+  /*!
+   * \brief Constructor
+   */
+  MVS() : GBDT() {
+  }
+
+  ~MVS() override = default;
+
+  void Init(const Config *config, const Dataset *train_data, const ObjectiveFunction *objective_function,
+            const std::vector<const Metric *> &training_metrics) override {
+    GBDT::Init(config, train_data, objective_function, training_metrics);
+    mvs_lambda_ = config_->mvs_lambda;
+    mvs_adaptive_ = config_->mvs_adaptive;
+    ResetMVS();
+    if (objective_function_ == nullptr) {
+      // use customized objective function
+      size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+      gradients_.resize(total_size, 0.0f);
+      hessians_.resize(total_size, 0.0f);
+    }
+  }
+
+  void ResetTrainingData(const Dataset *train_data, const ObjectiveFunction *objective_function,
+                         const std::vector<const Metric *> &training_metrics) override {
+    GBDT::ResetTrainingData(train_data, objective_function, training_metrics);
+    ResetMVS();
+  }
+
+  void ResetConfig(const Config *config) override {
+    GBDT::ResetConfig(config);
+    mvs_lambda_ = config_->mvs_lambda;
+    mvs_adaptive_ = config_->mvs_adaptive;
+    ResetMVS();
+  }
+
+  void ResetMVS();
+
+  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override {
+    if (gradients != nullptr) {
+      // use customized objective function
+      CHECK(hessians != nullptr && objective_function_ == nullptr);
+      int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
+      #pragma omp parallel for schedule(static)
+      for (int64_t i = 0; i < total_size; ++i) {
+        gradients_[i] = gradients[i];
+        hessians_[i] = hessians[i];
+      }
+      return GBDT::TrainOneIter(gradients_.data(), hessians_.data());
+    } else {
+      CHECK(hessians == nullptr);
+      return GBDT::TrainOneIter(nullptr, nullptr);
+    }
+  }
+
+  data_size_t BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *buffer) override;
+
+  void Bagging(int iter) override;
+  // TODO move this constant to some constants
+  static constexpr double kMVSEps = 1e-20;
+
+ protected:
+
+  bool GetIsConstHessian(const ObjectiveFunction *) override {
+    return false;
+  }
+
+  double GetLambda();
+
+  double mvs_lambda_;
+  bool mvs_adaptive_;
+};
+}  // namespace LightGBM
+#endif   // LIGHTGBM_BOOSTING_MVS_H_
\ No newline at end of file

From 0ad27406adf1b5a1be71e323d6ba65497721e1d7 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 06:19:57 +0300
Subject: [PATCH 02/26] Implemented MVS booster with support for multioutput
 targets, deterministic execution on small datasets/

---
 include/LightGBM/config.h |   2 +-
 src/boosting/boosting.cpp |   2 +
 src/boosting/mvs.cpp      | 195 ++++++++++++++++++++++++++------------
 src/boosting/mvs.hpp      |  11 ++-
 src/io/config.cpp         |   4 +-
 src/io/config_auto.cpp    |   7 ++
 6 files changed, 156 insertions(+), 65 deletions(-)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 886195b81a92..822e8fdccd6b 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -320,7 +320,7 @@ struct Config {
   // check = >0.0
   // check = <=1.0
   // desc = used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
-  double mvs_lambda = 1e-2;
+  double mvs_lambda = 1e-4;
 
   // desc = use adaptive variant of mvs boosting
   bool mvs_adaptive = false;
diff --git a/src/boosting/boosting.cpp b/src/boosting/boosting.cpp
index efadfe888070..23627e6b6e9c 100644
--- a/src/boosting/boosting.cpp
+++ b/src/boosting/boosting.cpp
@@ -59,6 +59,8 @@ Boosting* Boosting::CreateBoosting(const std::string& type, const char* filename
         ret.reset(new GOSS());
       } else if (type == std::string("rf")) {
         return new RF();
+      } else if (type == std::string("mvs")) {
+        return new MVS();
       } else {
         Log::Fatal("Unknown boosting type %s", type.c_str());
       }
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index d6ae84923e31..9670c1b9d80c 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -4,83 +4,113 @@
 
 #include "mvs.hpp"
 
+#include <algorithm>
+
 namespace LightGBM {
 
-static score_t CalculateThreshold(std::vector<score_t> grad_values_copy, double sample_size, data_size_t* big_grad_cnt) {
-  std::vector<score_t> *grad_values = &grad_values_copy;
-  double sum_low = 0.;
-  size_t n_high = 0;
-  int begin = 0;
-  int end = static_cast<int>(grad_values->size());
+using ConstTreeIterator = std::vector<std::unique_ptr<Tree>>::const_iterator;
 
-  while (begin != end) {
-    // TODO do partition in parallel
-    // TODO partition to three parts
-    int middle_begin, middle_end;
-    ArrayArgs<score_t>::Partition(grad_values, begin, end, &middle_begin, &middle_end);
+static double CalculateThresholdSequential(std::vector<score_t>* gradients, data_size_t begin, data_size_t end,
+                                    const double sample_size) {
+  double current_sum_small = 0.0;
+  data_size_t big_grad_size = 0;
 
-    const size_t n_middle = middle_end - middle_begin;
-    const size_t n_right = end - middle_end;
-    const score_t pivot = (*grad_values)[middle_begin];
+  while (begin != end) {
+    data_size_t middle_begin=0, middle_end=0;
+    ArrayArgs<score_t>::Partition(gradients, begin, end, &middle_begin, &middle_end);
+    ++middle_begin; // for half intervals
+    const data_size_t n_middle = middle_end - middle_begin;
+    const data_size_t large_size = middle_begin - begin;
 
-    // TODO do sum in parallel
-    double cur_left_sum = std::accumulate(&grad_values->at(begin), &grad_values->at(middle_begin), 0.0);
-    double sum_middle = n_middle * pivot;
+    const double sum_small = std::accumulate(gradients->begin() + middle_end, gradients->begin() + end, 0.0);
+    const double sum_middle = (*gradients)[middle_begin] * n_middle;
 
-    double cur_sampling_rate = (sum_low + cur_left_sum) / pivot + n_right + n_middle + n_high;
+    const double
+        current_sampling_rate = (current_sum_small + sum_small) / (*gradients)[middle_begin] + big_grad_size + n_middle + large_size;
 
-    if (cur_sampling_rate > sample_size) {
-      sum_low += sum_middle + cur_left_sum;
-      begin = middle_end;
-    } else {
-      n_high += n_right + n_middle;
+    if (current_sampling_rate > sample_size) {
+      current_sum_small += sum_small + sum_middle;
       end = middle_begin;
+    } else {
+      big_grad_size += n_middle + large_size;
+      begin = middle_end;
     }
   }
-  *big_grad_cnt = n_high;
-  return sum_low / (sample_size - n_high + MVS::kMVSEps);
+
+  return current_sum_small / (sample_size - big_grad_size + kEpsilon);
 }
 
-static double ComputeLeavesMeanSquaredValue(const Tree &tree) {
-  // TODO sum over leaves are leave values one dimensional
-  // TODO sum using openmp
+static double ComputeLeavesMeanSquaredValue(ConstTreeIterator begin, ConstTreeIterator end) {
   double sum_values = 0.0;
-  for (int i = 0; i < tree.num_leaves(); ++i) {
-    const auto output = tree.LeafOutput(i);
-    sum_values += output * output;
+  data_size_t num_leaves = (*begin)->num_leaves();
+#pragma omp parallel for schedule(static, 2048) reduction(+:sum_values)
+  for (data_size_t leaf_idx = 0; leaf_idx < num_leaves; ++leaf_idx) {
+    double leave_value = 0.0;
+    for (ConstTreeIterator it = begin; it != end; ++it) {
+      if (leaf_idx < (**it).num_leaves()) {
+        const double value = (*it)->LeafOutput(leaf_idx);
+        leave_value += value * value;
+      }
+    }
+    sum_values += std::sqrt(leave_value);
   }
-  return std::sqrt(sum_values / tree.num_leaves());
+  return sum_values / num_leaves;
 }
 
-void MVS::ResetMVS() {
-  CHECK(config_->bagging_fraction > 0.0f && config_->bagging_fraction < 1.0f && config_->bagging_freq > 0);
-  CHECK(config_->mvs_lambda > 0.0f && config_->mvs_lambda < 1.0f);
-  CHECK(!balanced_bagging_);
-  const auto sample_size = static_cast<size_t>(config_->bagging_fraction * num_data_);
-  CHECK_EQ(sample_size, bag_data_indices_.size());
-  Log::Info("Using MVS");
-
+static double ComputeMeanGradValues(score_t *gradients,
+                                    score_t *hessians,
+                                    data_size_t size,
+                                    data_size_t num_tree_per_iteration) {
+  double sum = 0.0;
+#pragma omp parallel for schedule(static, 1024) reduction(+:sum)
+  for (data_size_t i = 0; i < size; ++i) {
+    double local_hessians = 0.0, local_gradients = 0.0;
+    for (data_size_t j = 0; j < num_tree_per_iteration; ++j) {
+      size_t idx = static_cast<size_t>(size) * j + i;
+      local_hessians += hessians[idx] * hessians[idx];
+      local_gradients += gradients[idx] * gradients[idx];
+    }
+    sum += std::sqrt(local_gradients / local_hessians);
+  }
+  return sum / size;
 }
 
 double MVS::GetLambda() {
-  double lambda = ComputeLeavesMeanSquaredValue(*models_.back());
+  if (!mvs_adaptive_) {
+    return mvs_lambda_;
+  }
+  double lambda =
+      (this->iter_ > 0) ? ComputeLeavesMeanSquaredValue(models_.cend() - num_tree_per_iteration_, models_.cend())
+          / config_->learning_rate
+                        : ComputeMeanGradValues(gradients_.data(),
+                                                hessians_.data(),
+                                                num_data_,
+                                                num_tree_per_iteration_);
+
   return lambda;
 }
 
 void MVS::Bagging(int iter) {
+  if (iter % config_->bagging_freq != 0 && !need_re_bagging_) {
+    return;
+  }
+
   bag_data_cnt_ = num_data_;
-  if (mvs_adaptive_) {
-    mvs_lambda_ = GetLambda();
+  mvs_lambda_ = GetLambda();
+
+  if (num_data_ <= kMaxSequentialSize) {
+    threshold_ = GetThreshold(0, num_data_);
   }
 
   auto left_cnt = bagging_runner_.Run<true>(
       num_data_,
       [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t *left,
           data_size_t *) {
-        data_size_t cur_left_cout = BaggingHelper(cur_start, cur_cnt, left);
-        return cur_left_cout;
+        data_size_t left_count = BaggingHelper(cur_start, cur_cnt, left);
+        return left_count;
       },
       bag_data_indices_.data());
+
   bag_data_cnt_ = left_cnt;
   if (!is_use_subset_) {
     tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
@@ -91,6 +121,8 @@ void MVS::Bagging(int iter) {
     tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                   bag_data_cnt_);
   }
+  threshold_ = 0.0;
+  Log::Debug("MVS Sample size %d %d", left_cnt, static_cast<data_size_t>(config_->bagging_fraction * num_data_));
 }
 
 data_size_t MVS::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *buffer) {
@@ -98,31 +130,72 @@ data_size_t MVS::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *
     return 0;
   }
 
-  std::vector<score_t> tmp_derivatives(cnt, 0.0f);
-  for (data_size_t i = 0; i < cnt; ++i) {
-    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
-      size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start * i;
-      tmp_derivatives[i] += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
-    }
-    tmp_derivatives[i] = std::sqrt(tmp_derivatives[i]);
-  }
+  const double threshold = GetThreshold(start, cnt);
 
-  auto sample_rate = static_cast<data_size_t>(cnt * config_->bagging_fraction);
-  data_size_t big_grad_cnt = 0;
-  const auto threshold = CalculateThreshold(tmp_derivatives, static_cast<double>(sample_rate), &big_grad_cnt);
   data_size_t left_cnt = 0;
+  data_size_t right_pos = cnt;
   data_size_t big_weight_cnt = 0;
   for (data_size_t i = 0; i < cnt; ++i) {
-    auto position = start + i;
-    if (tmp_derivatives[i] > threshold) {
+    data_size_t position = start + i;
+
+    double derivative = 0.0;
+    for (data_size_t j = 0; j < num_tree_per_iteration_; ++j) {
+      size_t idx = static_cast<size_t>(j) * num_data_ + position;
+      derivative += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
+    }
+    derivative = std::sqrt(derivative);
+
+    if (derivative >= threshold) {
       buffer[left_cnt++] = position;
       ++big_weight_cnt;
     } else {
-      double proba_threshold = tmp_derivatives[i] / threshold;
-      data_size_t sampled = left_cnt - big_weight_cnt;
-      data_size_t  rest_needed = ;
+      const double proba_threshold = derivative / threshold;
+      const double proba = bagging_rands_[position / bagging_rand_block_].NextFloat();
+      if (proba < proba_threshold) {
+        buffer[left_cnt++] = position;
+        for (data_size_t tree_id = 0; tree_id < num_tree_per_iteration_; ++tree_id) {
+          size_t idx = static_cast<size_t>(num_data_) * tree_id + position;
+          gradients_[idx] /= proba_threshold;
+          hessians_[idx] /= proba_threshold;
+        }
+      } else {
+        buffer[--right_pos] = position;
+      }
+    }
+  }
+
+  return left_cnt;
+}
+
+double MVS::GetThreshold(data_size_t begin, data_size_t cnt) {
+  data_size_t n_blocks, block_size;
+  Threading::BlockInfoForceSize<data_size_t>(num_data_, bagging_rand_block_, &n_blocks, &block_size);
+  if (num_data_ < kMaxSequentialSize && block_size > 1 && threshold_ != 0.0) {
+    return threshold_;
+  }
+
+  for (data_size_t i = begin; i < begin + cnt; ++i) {
+    tmp_derivatives_[i] = 0.0f;
+    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+      size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + i;
+      tmp_derivatives_[i] += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
     }
+    tmp_derivatives_[i] = std::sqrt(tmp_derivatives_[i]);
   }
+
+  double threshold = CalculateThresholdSequential(&tmp_derivatives_, begin, begin + cnt,
+                                                  cnt * config_->bagging_fraction);
+  return threshold;
+}
+
+void MVS::ResetMVS() {
+  CHECK(config_->bagging_fraction > 0.0f && config_->bagging_fraction < 1.0f && config_->bagging_freq > 0);
+  CHECK(config_->mvs_lambda >= 0.0f);
+  CHECK(!balanced_bagging_);
+
+  bag_data_indices_.resize(num_data_);
+  tmp_derivatives_.resize(num_data_);
+  Log::Info("Using MVS");
 }
 
 }  // namspace LightGBM
\ No newline at end of file
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index 781f01fcd36b..b08099547d25 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -61,12 +61,12 @@ class MVS : public GBDT {
 
   void ResetMVS();
 
-  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override {
+  bool TrainOneIter(const score_t *gradients, const score_t *hessians) override {
     if (gradients != nullptr) {
       // use customized objective function
       CHECK(hessians != nullptr && objective_function_ == nullptr);
       int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
-      #pragma omp parallel for schedule(static)
+      #pragma omp parallel for schedule(static, 1)
       for (int64_t i = 0; i < total_size; ++i) {
         gradients_[i] = gradients[i];
         hessians_[i] = hessians[i];
@@ -80,6 +80,7 @@ class MVS : public GBDT {
 
   data_size_t BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *buffer) override;
 
+
   void Bagging(int iter) override;
   // TODO move this constant to some constants
   static constexpr double kMVSEps = 1e-20;
@@ -90,9 +91,15 @@ class MVS : public GBDT {
     return false;
   }
 
+  double GetThreshold(data_size_t begin, data_size_t end);
+
   double GetLambda();
 
+  static const data_size_t kMaxSequentialSize = 256000;
+
   double mvs_lambda_;
+  double threshold_{0.0};
+  std::vector<score_t> tmp_derivatives_;
   bool mvs_adaptive_;
 };
 }  // namespace LightGBM
diff --git a/src/io/config.cpp b/src/io/config.cpp
index fbb9e339933f..93e2bd511761 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -58,7 +58,9 @@ void GetBoostingType(const std::unordered_map<std::string, std::string>& params,
       *boosting = "goss";
     } else if (value == std::string("rf") || value == std::string("random_forest")) {
       *boosting = "rf";
-    } else {
+    } else if (value == std::string("mvs")) {
+      *boosting = "mvs";
+    }else {
       Log::Fatal("Unknown boosting type %s", value.c_str());
     }
   }
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 06c53e84268a..6716138af439 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -191,6 +191,8 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "max_depth",
   "min_data_in_leaf",
   "min_sum_hessian_in_leaf",
+  "mvs_lambda",
+  "mvs_adaptive",
   "bagging_fraction",
   "pos_bagging_fraction",
   "neg_bagging_fraction",
@@ -345,6 +347,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
   GetDouble(params, "bagging_fraction", &bagging_fraction);
   CHECK_GT(bagging_fraction, 0.0);
   CHECK_LE(bagging_fraction, 1.0);
+  GetDouble(params, "mvs_lambda", &mvs_lambda);
+  CHECK_GT(mvs_lambda, 0.0);
+  GetBool(params, "mvs_adaptive", &mvs_adaptive);
 
   GetDouble(params, "pos_bagging_fraction", &pos_bagging_fraction);
   CHECK_GT(pos_bagging_fraction, 0.0);
@@ -644,6 +649,8 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[min_data_in_leaf: " << min_data_in_leaf << "]\n";
   str_buf << "[min_sum_hessian_in_leaf: " << min_sum_hessian_in_leaf << "]\n";
   str_buf << "[bagging_fraction: " << bagging_fraction << "]\n";
+  str_buf << "[mvs_lambda: " << mvs_lambda << "]\n";
+  str_buf << "[mvs_adaptive" << mvs_adaptive << "]\n";
   str_buf << "[pos_bagging_fraction: " << pos_bagging_fraction << "]\n";
   str_buf << "[neg_bagging_fraction: " << neg_bagging_fraction << "]\n";
   str_buf << "[bagging_freq: " << bagging_freq << "]\n";

From b067a5b3794cbef7e28f03f71d30d2fdd5d7f090 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 07:54:49 +0300
Subject: [PATCH 03/26] Updated documentation and fixed some linting errors

---
 docs/Parameters.rst       | 14 ++++++++++++++
 include/LightGBM/config.h |  3 +++
 src/boosting/mvs.cpp      | 15 ++++++++-------
 src/boosting/mvs.hpp      |  5 ++---
 src/io/config.cpp         |  2 +-
 src/io/config_auto.cpp    | 17 ++++++++++-------
 6 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index db4673b8dcff..3b5af283feaf 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -119,6 +119,8 @@ Core Parameters
 
       -  **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
 
+   -  ``mvs``, Minimal variance sampling <https://arxiv.org/abs/1910.13204>__
+
 -  ``linear_tree`` :raw-html:`<a id="linear_tree" title="Permalink to this parameter" href="#linear_tree">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``linear_trees``
 
    -  fit piecewise linear gradient boosting tree
@@ -336,6 +338,18 @@ Learning Control Parameters
 
    -  **Note**: if balanced bagging is enabled, ``bagging_fraction`` will be ignored
 
+-  ``mvs_lambda`` :raw-html:`<a id="mvs_lambda" title="Permalink to this parameter" href="#mvs_lambda">&#x1F517;&#xFE0E;</a>`, default = ``1e-4``, type = double, constraints: ``0.0 < mvs_lambda <= 1.0``
+
+   -  used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
+
+   -  used only in ``mvs``
+
+-  ``mvs_adaptive`` :raw-html:`<a id="mvs_adaptive" title="Permalink to this parameter" href="#mvs_adaptive">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
+
+   -  use adaptive variant of mvs boosting
+
+   -  used only in ``mvs``
+
 -  ``bagging_freq`` :raw-html:`<a id="bagging_freq" title="Permalink to this parameter" href="#bagging_freq">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``subsample_freq``
 
    -  frequency for bagging
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 2513191ddfab..be63d8ae5254 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -147,6 +147,7 @@ struct Config {
   // desc = ``dart``, `Dropouts meet Multiple Additive Regression Trees <https://arxiv.org/abs/1505.01866>`__
   // desc = ``goss``, Gradient-based One-Side Sampling
   // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
+  // desc = ``mvs``, Minimal variance sampling <https://arxiv.org/abs/1910.13204>__
   std::string boosting = "gbdt";
 
   // alias = linear_trees
@@ -321,9 +322,11 @@ struct Config {
   // check = >0.0
   // check = <=1.0
   // desc = used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
+  // desc = used only in ``mvs``
   double mvs_lambda = 1e-4;
 
   // desc = use adaptive variant of mvs boosting
+  // desc = used only in ``mvs``
   bool mvs_adaptive = false;
 
   // alias = subsample_freq
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index 9670c1b9d80c..ebd464b78d9c 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -1,10 +1,11 @@
-//
-// Created by archer on 11.04.2021.
-//
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
 
 #include "mvs.hpp"
 
-#include <algorithm>
+#include <memory>
 
 namespace LightGBM {
 
@@ -16,9 +17,9 @@ static double CalculateThresholdSequential(std::vector<score_t>* gradients, data
   data_size_t big_grad_size = 0;
 
   while (begin != end) {
-    data_size_t middle_begin=0, middle_end=0;
+    data_size_t middle_begin = 0, middle_end = 0;
     ArrayArgs<score_t>::Partition(gradients, begin, end, &middle_begin, &middle_end);
-    ++middle_begin; // for half intervals
+    ++middle_begin;  // for half intervals
     const data_size_t n_middle = middle_end - middle_begin;
     const data_size_t large_size = middle_begin - begin;
 
@@ -198,4 +199,4 @@ void MVS::ResetMVS() {
   Log::Info("Using MVS");
 }
 
-}  // namspace LightGBM
\ No newline at end of file
+}  // namespace LightGBM
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index b08099547d25..2cfee917ac53 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -82,11 +82,10 @@ class MVS : public GBDT {
 
 
   void Bagging(int iter) override;
-  // TODO move this constant to some constants
+
   static constexpr double kMVSEps = 1e-20;
 
  protected:
-
   bool GetIsConstHessian(const ObjectiveFunction *) override {
     return false;
   }
@@ -103,4 +102,4 @@ class MVS : public GBDT {
   bool mvs_adaptive_;
 };
 }  // namespace LightGBM
-#endif   // LIGHTGBM_BOOSTING_MVS_H_
\ No newline at end of file
+#endif   // LIGHTGBM_BOOSTING_MVS_H_
diff --git a/src/io/config.cpp b/src/io/config.cpp
index 65eaa161cfb0..2a47a1226601 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -60,7 +60,7 @@ void GetBoostingType(const std::unordered_map<std::string, std::string>& params,
       *boosting = "rf";
     } else if (value == std::string("mvs")) {
       *boosting = "mvs";
-    }else {
+    } else {
       Log::Fatal("Unknown boosting type %s", value.c_str());
     }
   }
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 9293c5220877..41a8d56e55b5 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -193,11 +193,11 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "max_depth",
   "min_data_in_leaf",
   "min_sum_hessian_in_leaf",
-  "mvs_lambda",
-  "mvs_adaptive",
   "bagging_fraction",
   "pos_bagging_fraction",
   "neg_bagging_fraction",
+  "mvs_lambda",
+  "mvs_adaptive",
   "bagging_freq",
   "bagging_seed",
   "feature_fraction",
@@ -350,9 +350,6 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
   GetDouble(params, "bagging_fraction", &bagging_fraction);
   CHECK_GT(bagging_fraction, 0.0);
   CHECK_LE(bagging_fraction, 1.0);
-  GetDouble(params, "mvs_lambda", &mvs_lambda);
-  CHECK_GT(mvs_lambda, 0.0);
-  GetBool(params, "mvs_adaptive", &mvs_adaptive);
 
   GetDouble(params, "pos_bagging_fraction", &pos_bagging_fraction);
   CHECK_GT(pos_bagging_fraction, 0.0);
@@ -362,6 +359,12 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
   CHECK_GT(neg_bagging_fraction, 0.0);
   CHECK_LE(neg_bagging_fraction, 1.0);
 
+  GetDouble(params, "mvs_lambda", &mvs_lambda);
+  CHECK_GT(mvs_lambda, 0.0);
+  CHECK_LE(mvs_lambda, 1.0);
+
+  GetBool(params, "mvs_adaptive", &mvs_adaptive);
+
   GetInt(params, "bagging_freq", &bagging_freq);
 
   GetInt(params, "bagging_seed", &bagging_seed);
@@ -654,10 +657,10 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[min_data_in_leaf: " << min_data_in_leaf << "]\n";
   str_buf << "[min_sum_hessian_in_leaf: " << min_sum_hessian_in_leaf << "]\n";
   str_buf << "[bagging_fraction: " << bagging_fraction << "]\n";
-  str_buf << "[mvs_lambda: " << mvs_lambda << "]\n";
-  str_buf << "[mvs_adaptive" << mvs_adaptive << "]\n";
   str_buf << "[pos_bagging_fraction: " << pos_bagging_fraction << "]\n";
   str_buf << "[neg_bagging_fraction: " << neg_bagging_fraction << "]\n";
+  str_buf << "[mvs_lambda: " << mvs_lambda << "]\n";
+  str_buf << "[mvs_adaptive: " << mvs_adaptive << "]\n";
   str_buf << "[bagging_freq: " << bagging_freq << "]\n";
   str_buf << "[bagging_seed: " << bagging_seed << "]\n";
   str_buf << "[feature_fraction: " << feature_fraction << "]\n";

From 8229008a2ae9c9f7a174a908bb9bb50f5b2a0916 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 08:44:25 +0300
Subject: [PATCH 04/26] fixed python sklearn documentation, tryed to fix R Cran
 CI

---
 python-package/lightgbm/sklearn.py |  1 +
 src/boosting/mvs.cpp               |  2 ++
 src/boosting/mvs.hpp               | 10 +++++-----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 2b2261736067..6cf7b5cacab2 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -364,6 +364,7 @@ def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
             'dart', Dropouts meet Multiple Additive Regression Trees.
             'goss', Gradient-based One-Side Sampling.
             'rf', Random Forest.
+            'mvs', Minimal Variance Sampling.
         num_leaves : int, optional (default=31)
             Maximum tree leaves for base learners.
         max_depth : int, optional (default=-1)
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index ebd464b78d9c..e7dc108feab3 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -11,6 +11,8 @@ namespace LightGBM {
 
 using ConstTreeIterator = std::vector<std::unique_ptr<Tree>>::const_iterator;
 
+MVS::MVS() : GBDT() {}
+
 static double CalculateThresholdSequential(std::vector<score_t>* gradients, data_size_t begin, data_size_t end,
                                     const double sample_size) {
   double current_sum_small = 0.0;
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index 2cfee917ac53..9c584bdd6e2c 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -2,8 +2,8 @@
  * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
-#ifndef LIGHTGBM_BOOSTING_MVSB_H_
-#define LIGHTGBM_BOOSTING_MVSB_H_
+#ifndef LIGHTGBM_BOOSTING_MVS_H_
+#define LIGHTGBM_BOOSTING_MVS_H_
 
 #include <LightGBM/boosting.h>
 #include <LightGBM/utils/array_args.h>
@@ -27,10 +27,10 @@ class MVS : public GBDT {
   /*!
    * \brief Constructor
    */
-  MVS() : GBDT() {
-  }
+  MVS();
 
-  ~MVS() override = default;
+  ~MVS() {
+  }
 
   void Init(const Config *config, const Dataset *train_data, const ObjectiveFunction *objective_function,
             const std::vector<const Metric *> &training_metrics) override {

From 0f2620eea2588c1bc799dbf5db2cc06ab6c3730d Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 09:05:32 +0300
Subject: [PATCH 05/26] Second attempt to fix R pipeline

---
 R-package/R/lgb.train.R   | 2 +-
 R-package/src/Makevars.in | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index c47d6ce6901e..c9724953ce7d 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -15,7 +15,7 @@
 #' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'            the "Parameters" section of the documentation} for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"mvs"} or \code{"goss"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfitting. Tree still grow by leaf-wise.}
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 2490ba0757df..7b0ef5852a2e 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -26,6 +26,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
+    boosting/mvs.o\
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From d50769e0201aa3a28a8b10d35b8f2d4792f05a50 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 09:34:53 +0300
Subject: [PATCH 06/26] Fixed R package build for windows and linting error

---
 R-package/R/lgb.train.R       | 3 ++-
 R-package/src/Makevars.win.in | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index c9724953ce7d..cba50e38939d 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -15,7 +15,8 @@
 #' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'            the "Parameters" section of the documentation} for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"mvs"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
+#'                                                      \code{"mvs"} or \code{"goss"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfitting. Tree still grow by leaf-wise.}
diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 0fb2de926905..3ec46956b7c4 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -27,6 +27,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
+    boosting/mvs.o\
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From f531f3a37673f1eacdaf8695ca6816cf03811709 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 10:20:42 +0300
Subject: [PATCH 07/26] Revert "Fixed R package build for windows and linting
 error"

This reverts commit d50769e0
---
 R-package/R/lgb.train.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index cba50e38939d..c9724953ce7d 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -15,8 +15,7 @@
 #' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'            the "Parameters" section of the documentation} for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
-#'                                                      \code{"mvs"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"mvs"} or \code{"goss"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfitting. Tree still grow by leaf-wise.}

From ef1a28c1f43897cd8016cfc129fe4e7bca874db6 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 10:25:42 +0300
Subject: [PATCH 08/26] Revert "Revert "Fixed R package build for windows and
 linting error""

This reverts commit f531f3a37673f1eacdaf8695ca6816cf03811709.
---
 R-package/R/lgb.train.R       | 2 +-
 R-package/src/Makevars.win.in | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index c9724953ce7d..c47d6ce6901e 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -15,7 +15,7 @@
 #' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'            the "Parameters" section of the documentation} for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"mvs"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfitting. Tree still grow by leaf-wise.}
diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 3ec46956b7c4..0fb2de926905 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -27,7 +27,6 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
-    boosting/mvs.o\
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From c6100356140a74e2fc47f602e3eeccc801c3a829 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 10:28:14 +0300
Subject: [PATCH 09/26] Fixed some documentation

---
 docs/Parameters.rst       | 2 +-
 include/LightGBM/config.h | 3 ++-
 src/boosting/mvs.hpp      | 4 +---
 src/io/config_auto.cpp    | 1 -
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 3b5af283feaf..43ecdd83d97b 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -338,7 +338,7 @@ Learning Control Parameters
 
    -  **Note**: if balanced bagging is enabled, ``bagging_fraction`` will be ignored
 
--  ``mvs_lambda`` :raw-html:`<a id="mvs_lambda" title="Permalink to this parameter" href="#mvs_lambda">&#x1F517;&#xFE0E;</a>`, default = ``1e-4``, type = double, constraints: ``0.0 < mvs_lambda <= 1.0``
+-  ``mvs_lambda`` :raw-html:`<a id="mvs_lambda" title="Permalink to this parameter" href="#mvs_lambda">&#x1F517;&#xFE0E;</a>`, default = ``1e-4``, type = double, constraints: ``mvs_lambda > 0.0``
 
    -  used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
 
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index be63d8ae5254..19479c92fe88 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -319,12 +319,13 @@ struct Config {
   // desc = **Note**: if balanced bagging is enabled, ``bagging_fraction`` will be ignored
   double neg_bagging_fraction = 1.0;
 
+  // default = 1e-4
   // check = >0.0
-  // check = <=1.0
   // desc = used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
   // desc = used only in ``mvs``
   double mvs_lambda = 1e-4;
 
+  // default = false
   // desc = use adaptive variant of mvs boosting
   // desc = used only in ``mvs``
   bool mvs_adaptive = false;
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index 9c584bdd6e2c..3e133034f5ae 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -24,9 +24,7 @@ namespace LightGBM {
 
 class MVS : public GBDT {
  public:
-  /*!
-   * \brief Constructor
-   */
+
   MVS();
 
   ~MVS() {
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 41a8d56e55b5..f4fcabdf522d 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -361,7 +361,6 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetDouble(params, "mvs_lambda", &mvs_lambda);
   CHECK_GT(mvs_lambda, 0.0);
-  CHECK_LE(mvs_lambda, 1.0);
 
   GetBool(params, "mvs_adaptive", &mvs_adaptive);
 

From 4425874471eddfbf9c50266602d78606b596c6d7 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 15:14:22 +0300
Subject: [PATCH 10/26] Fixed intendation error in mvs.hpp, fixed some windows
 build issues, added spinx version upper bound

---
 docs/requirements_base.txt       | 2 +-
 src/boosting/mvs.cpp             | 2 ++
 src/boosting/mvs.hpp             | 1 -
 windows/LightGBM.vcxproj         | 2 ++
 windows/LightGBM.vcxproj.filters | 6 ++++++
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/requirements_base.txt b/docs/requirements_base.txt
index baebc41b5e1c..7fa8e2b3fb9f 100644
--- a/docs/requirements_base.txt
+++ b/docs/requirements_base.txt
@@ -1,2 +1,2 @@
-sphinx
+sphinx <= 3.5.4
 sphinx_rtd_theme >= 0.5
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index e7dc108feab3..fbbf9785fd7f 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -6,6 +6,8 @@
 #include "mvs.hpp"
 
 #include <memory>
+#include <numeric>
+
 
 namespace LightGBM {
 
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index 3e133034f5ae..d3e60483fe45 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -24,7 +24,6 @@ namespace LightGBM {
 
 class MVS : public GBDT {
  public:
-
   MVS();
 
   ~MVS() {
diff --git a/windows/LightGBM.vcxproj b/windows/LightGBM.vcxproj
index 59b589a40d51..71a521cc593f 100644
--- a/windows/LightGBM.vcxproj
+++ b/windows/LightGBM.vcxproj
@@ -274,6 +274,7 @@
     <ClInclude Include="..\src\boosting\dart.hpp" />
     <ClInclude Include="..\src\boosting\goss.hpp" />
     <ClInclude Include="..\src\boosting\rf.hpp" />
+    <ClInclude Include="..\src\boosting\mvs.hpp" />
     <ClInclude Include="..\src\boosting\score_updater.hpp" />
     <ClInclude Include="..\src\io\dense_bin.hpp" />
     <ClInclude Include="..\src\io\multi_val_dense_bin.hpp" />
@@ -310,6 +311,7 @@
     <ClCompile Include="..\src\boosting\gbdt.cpp" />
     <ClCompile Include="..\src\boosting\gbdt_model_text.cpp" />
     <ClCompile Include="..\src\boosting\gbdt_prediction.cpp" />
+    <ClCompile Include="..\src\boosting\mvs.cpp" />
     <ClCompile Include="..\src\boosting\prediction_early_stop.cpp" />
     <ClCompile Include="..\src\c_api.cpp" />
     <ClCompile Include="..\src\io\bin.cpp" />
diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters
index 0f48c7564580..9df3e1ebb628 100644
--- a/windows/LightGBM.vcxproj.filters
+++ b/windows/LightGBM.vcxproj.filters
@@ -42,6 +42,9 @@
     <ClInclude Include="..\src\boosting\gbdt.h">
       <Filter>src\boosting</Filter>
     </ClInclude>
+    <ClInclude Include="..\src\boosting\mvs.hpp">
+          <Filter>src\boosting</Filter>
+    </ClInclude>
     <ClInclude Include="..\src\network\linkers.h">
       <Filter>src\network</Filter>
     </ClInclude>
@@ -248,6 +251,9 @@
     <ClCompile Include="..\src\Boosting\gbdt.cpp">
       <Filter>src\boosting</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\Boosting\mvs.cpp">
+          <Filter>src\boosting</Filter>
+    </ClCompile>
     <ClCompile Include="..\src\io\dataset.cpp">
       <Filter>src\io</Filter>
     </ClCompile>

From a5b72f8d604199cf1bb985d3de38f1198c8847de Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 15:14:30 +0300
Subject: [PATCH 11/26] Fixed intendation error in mvs.hpp, fixed some windows
 build issues, added spinx version upper bound

---
 R-package/src/Makevars.win.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 0fb2de926905..3ec46956b7c4 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -27,6 +27,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
+    boosting/mvs.o\
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From fb8ff6e4341e1e49b75a88447b73bca6891c05af Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Sun, 9 May 2021 17:52:23 +0300
Subject: [PATCH 12/26] Update requirements_base.txt

---
 docs/requirements_base.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements_base.txt b/docs/requirements_base.txt
index bb6dbc8e61e1..9314ee0deb35 100644
--- a/docs/requirements_base.txt
+++ b/docs/requirements_base.txt
@@ -1,2 +1,2 @@
-sphinx <= 4
+sphinx < 4
 sphinx_rtd_theme >= 0.5

From d499d158f8267cebf7b2557380d8bb3bc0b19b20 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 16:28:32 +0300
Subject: [PATCH 13/26] Update R-package/src/Makevars.in

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 R-package/src/Makevars.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 7b0ef5852a2e..4cde0fff7c8e 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -26,7 +26,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
-    boosting/mvs.o\
+    boosting/mvs.o \
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From 8a01fb806c70ec018788133d6042c5e2b53309b0 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 16:28:37 +0300
Subject: [PATCH 14/26] Update R-package/src/Makevars.win.in

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 R-package/src/Makevars.win.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 3ec46956b7c4..8e75bd58a309 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -27,7 +27,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
-    boosting/mvs.o\
+    boosting/mvs.o \
     io/bin.o \
     io/config.o \
     io/config_auto.o \

From 4b630a1f259967392522ff900d576cade4984942 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 16:29:25 +0300
Subject: [PATCH 15/26] Added MVS booster support for dask tests

---
 tests/python_package_test/test_dask.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 2d3ac7c606a2..494d3a7930e1 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -41,7 +41,7 @@
 tasks = ['binary-classification', 'multiclass-classification', 'regression', 'ranking']
 distributed_training_algorithms = ['data', 'voting']
 data_output = ['array', 'scipy_csr_matrix', 'dataframe', 'dataframe-with-categorical']
-boosting_types = ['gbdt', 'dart', 'goss', 'rf']
+boosting_types = ['gbdt', 'dart', 'goss', 'rf', 'mvs']
 group_sizes = [5, 5, 5, 10, 10, 10, 20, 20, 20, 50, 50]
 task_to_dask_factory = {
     'regression': lgb.DaskLGBMRegressor,
@@ -266,6 +266,12 @@ def test_classifier(output, task, boosting_type, tree_learner, cluster):
             })
         elif boosting_type == 'goss':
             params['top_rate'] = 0.5
+        elif boosting_type == 'mvs':
+            params.update({
+                'bagging_freq' : 1,
+                'mvs_adaptive' : True,
+                'bagging_fraction': 0.9
+            })
 
         dask_classifier = lgb.DaskLGBMClassifier(
             client=client,
@@ -476,6 +482,12 @@ def test_regressor(output, boosting_type, tree_learner, cluster):
                 'bagging_freq': 1,
                 'bagging_fraction': 0.9,
             })
+        elif boosting_type == 'mvs':
+            params.update({
+                'bagging_freq' : 1,
+                'mvs_adaptive' : True,
+                'bagging_fraction': 0.9
+            })
 
         dask_regressor = lgb.DaskLGBMRegressor(
             client=client,
@@ -671,6 +683,12 @@ def test_ranker(output, group, boosting_type, tree_learner, cluster):
                 'bagging_freq': 1,
                 'bagging_fraction': 0.9,
             })
+        elif boosting_type == 'mvs':
+            params.update({
+                'bagging_freq' : 1,
+                'mvs_adaptive' : True,
+                'bagging_fraction': 0.9
+            })
 
         dask_ranker = lgb.DaskLGBMRanker(
             client=client,

From 49ed4ebd9a3951ec34a36b7e6e17b5c6230e7c88 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 17:14:34 +0300
Subject: [PATCH 16/26] Moved CalculateThresholdSequential to array_args.h and
 renamed it to CalculateThresholdMVS

---
 include/LightGBM/utils/array_args.h | 31 +++++++++++++++++++++++++
 src/boosting/mvs.cpp                | 36 +----------------------------
 2 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h
index 0183ecc22ddb..cd5ebe43a609 100644
--- a/include/LightGBM/utils/array_args.h
+++ b/include/LightGBM/utils/array_args.h
@@ -9,6 +9,7 @@
 #include <LightGBM/utils/threading.h>
 
 #include <algorithm>
+#include <numeric>
 #include <utility>
 #include <vector>
 
@@ -183,6 +184,36 @@ class ArrayArgs {
     }
     return true;
   }
+
+  static double CalculateThresholdMVS(std::vector<VAL_T>* gradients, data_size_t begin, data_size_t end,
+                                             const double sample_size) {
+    double current_sum_small = 0.0;
+    data_size_t big_grad_size = 0;
+
+    while (begin != end) {
+      data_size_t middle_begin = 0, middle_end = 0;
+      ArrayArgs<score_t>::Partition(gradients, begin, end, &middle_begin, &middle_end);
+      ++middle_begin;  // for half intervals
+      const data_size_t n_middle = middle_end - middle_begin;
+      const data_size_t large_size = middle_begin - begin;
+
+      const double sum_small = std::accumulate(gradients->begin() + middle_end, gradients->begin() + end, 0.0);
+      const double sum_middle = (*gradients)[middle_begin] * n_middle;
+
+      const double
+          current_sampling_rate = (current_sum_small + sum_small) / (*gradients)[middle_begin] + big_grad_size + n_middle + large_size;
+
+      if (current_sampling_rate > sample_size) {
+        current_sum_small += sum_small + sum_middle;
+        end = middle_begin;
+      } else {
+        big_grad_size += n_middle + large_size;
+        begin = middle_end;
+      }
+    }
+
+    return current_sum_small / (sample_size - big_grad_size + kEpsilon);
+  }
 };
 
 }  // namespace LightGBM
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index fbbf9785fd7f..5ecdacca1170 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -5,46 +5,12 @@
 
 #include "mvs.hpp"
 
-#include <memory>
-#include <numeric>
-
-
 namespace LightGBM {
 
 using ConstTreeIterator = std::vector<std::unique_ptr<Tree>>::const_iterator;
 
 MVS::MVS() : GBDT() {}
 
-static double CalculateThresholdSequential(std::vector<score_t>* gradients, data_size_t begin, data_size_t end,
-                                    const double sample_size) {
-  double current_sum_small = 0.0;
-  data_size_t big_grad_size = 0;
-
-  while (begin != end) {
-    data_size_t middle_begin = 0, middle_end = 0;
-    ArrayArgs<score_t>::Partition(gradients, begin, end, &middle_begin, &middle_end);
-    ++middle_begin;  // for half intervals
-    const data_size_t n_middle = middle_end - middle_begin;
-    const data_size_t large_size = middle_begin - begin;
-
-    const double sum_small = std::accumulate(gradients->begin() + middle_end, gradients->begin() + end, 0.0);
-    const double sum_middle = (*gradients)[middle_begin] * n_middle;
-
-    const double
-        current_sampling_rate = (current_sum_small + sum_small) / (*gradients)[middle_begin] + big_grad_size + n_middle + large_size;
-
-    if (current_sampling_rate > sample_size) {
-      current_sum_small += sum_small + sum_middle;
-      end = middle_begin;
-    } else {
-      big_grad_size += n_middle + large_size;
-      begin = middle_end;
-    }
-  }
-
-  return current_sum_small / (sample_size - big_grad_size + kEpsilon);
-}
-
 static double ComputeLeavesMeanSquaredValue(ConstTreeIterator begin, ConstTreeIterator end) {
   double sum_values = 0.0;
   data_size_t num_leaves = (*begin)->num_leaves();
@@ -188,7 +154,7 @@ double MVS::GetThreshold(data_size_t begin, data_size_t cnt) {
     tmp_derivatives_[i] = std::sqrt(tmp_derivatives_[i]);
   }
 
-  double threshold = CalculateThresholdSequential(&tmp_derivatives_, begin, begin + cnt,
+  double threshold = ArrayArgs<score_t>::CalculateThresholdMVS(&tmp_derivatives_, begin, begin + cnt,
                                                   cnt * config_->bagging_fraction);
   return threshold;
 }

From d018ed0bb509cf95e9f9cbddf7ebc67cb5ce9d42 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 18:58:20 +0300
Subject: [PATCH 17/26] Added cpp tests for ArrayArgs::CalculateThresholdMVS
 and ArrayArgs::Partition.

---
 include/LightGBM/utils/array_args.h           |  2 +-
 tests/cpp_tests/test_mvs_threshold_search.cpp | 57 +++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 tests/cpp_tests/test_mvs_threshold_search.cpp

diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h
index cd5ebe43a609..51e13969900d 100644
--- a/include/LightGBM/utils/array_args.h
+++ b/include/LightGBM/utils/array_args.h
@@ -191,7 +191,7 @@ class ArrayArgs {
     data_size_t big_grad_size = 0;
 
     while (begin != end) {
-      data_size_t middle_begin = 0, middle_end = 0;
+      data_size_t middle_begin = begin - 1, middle_end = end;
       ArrayArgs<score_t>::Partition(gradients, begin, end, &middle_begin, &middle_end);
       ++middle_begin;  // for half intervals
       const data_size_t n_middle = middle_end - middle_begin;
diff --git a/tests/cpp_tests/test_mvs_threshold_search.cpp b/tests/cpp_tests/test_mvs_threshold_search.cpp
new file mode 100644
index 000000000000..088c71450a11
--- /dev/null
+++ b/tests/cpp_tests/test_mvs_threshold_search.cpp
@@ -0,0 +1,57 @@
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
+#include <gtest/gtest.h>
+#include <LightGBM/meta.h>
+#include <LightGBM/utils/array_args.h>
+
+using namespace LightGBM;
+
+template<class Value>
+double ComputeExpectationOfMVS(const std::vector<Value> &grads, double threshold) {
+  double expectation = 0.0;
+  for (const auto &value: grads) {
+    if (value >= threshold) {
+      expectation += 1.;
+    } else {
+      expectation += value / threshold;
+    }
+  }
+  return expectation;
+}
+
+void ComputeSamplingRate(std::vector<score_t> gradients,
+                       const double sampling_fraction,
+                       double *expected_sample_size,
+                       double *resulting_sample_size) {
+  CHECK(expected_sample_size != nullptr);
+  CHECK(resulting_sample_size != nullptr);
+  *expected_sample_size = sampling_fraction * static_cast<double>(gradients.size());
+  double threshold = ArrayArgs<score_t>::CalculateThresholdMVS(&gradients, 0, gradients.size(), *expected_sample_size);
+  *resulting_sample_size = ComputeExpectationOfMVS(gradients, threshold);
+}
+
+TEST(SearchThresholdMVS, Basic) {
+  std::vector<LightGBM::score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
+  double expected, resulting;
+  ComputeSamplingRate(gradients, 0.5, &expected, &resulting);
+  EXPECT_DOUBLE_EQ(expected, resulting);
+}
+
+TEST(ArrayArgs, Partition) {
+  std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
+  data_size_t middle_begin = -1, middle_end = gradients.size();
+  ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);
+  EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
+  EXPECT_GT(gradients[0], gradients[middle_begin + 1]);
+  EXPECT_GT(gradients[middle_begin + 1], gradients.back());
+}
+
+TEST(SearchThresholdMVS, PartitionOneElement) {
+  std::vector<score_t> gradients({0.5f});
+  data_size_t middle_begin = -1, middle_end = gradients.size();
+  ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);
+  EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
+}
\ No newline at end of file

From d62c98c7d7945161ab09b6a2d2fb1348f465fcbc Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 19:00:07 +0300
Subject: [PATCH 18/26] Fix linter errors in test_dask.py

---
 tests/python_package_test/test_dask.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 494d3a7930e1..4a301a70009a 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -268,8 +268,8 @@ def test_classifier(output, task, boosting_type, tree_learner, cluster):
             params['top_rate'] = 0.5
         elif boosting_type == 'mvs':
             params.update({
-                'bagging_freq' : 1,
-                'mvs_adaptive' : True,
+                'bagging_freq': 1,
+                'mvs_adaptive': True,
                 'bagging_fraction': 0.9
             })
 
@@ -484,8 +484,8 @@ def test_regressor(output, boosting_type, tree_learner, cluster):
             })
         elif boosting_type == 'mvs':
             params.update({
-                'bagging_freq' : 1,
-                'mvs_adaptive' : True,
+                'bagging_freq': 1,
+                'mvs_adaptive': True,
                 'bagging_fraction': 0.9
             })
 
@@ -685,8 +685,8 @@ def test_ranker(output, group, boosting_type, tree_learner, cluster):
             })
         elif boosting_type == 'mvs':
             params.update({
-                'bagging_freq' : 1,
-                'mvs_adaptive' : True,
+                'bagging_freq': 1,
+                'mvs_adaptive': True,
                 'bagging_fraction': 0.9
             })
 

From 8cee27e8f23eb142355987bb8e29238e1a1e88b1 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 19:01:20 +0300
Subject: [PATCH 19/26] Fixed UB in ArrayArgs::Partition, when it is called
 with one element.

---
 include/LightGBM/utils/array_args.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h
index 51e13969900d..c6362b84b41d 100644
--- a/include/LightGBM/utils/array_args.h
+++ b/include/LightGBM/utils/array_args.h
@@ -104,7 +104,7 @@ class ArrayArgs {
     int j = end - 1;
     int p = i;
     int q = j;
-    if (start >= end) {
+    if (start >= end - 1) {
       return;
     }
     std::vector<VAL_T>& ref = *arr;

From 224ac053e25ed498435cffcfc119570901046ff4 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 20:04:32 +0300
Subject: [PATCH 20/26] Fixed linter errors

---
 tests/cpp_tests/test_mvs_threshold_search.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/cpp_tests/test_mvs_threshold_search.cpp b/tests/cpp_tests/test_mvs_threshold_search.cpp
index 088c71450a11..ef6f97bc2df5 100644
--- a/tests/cpp_tests/test_mvs_threshold_search.cpp
+++ b/tests/cpp_tests/test_mvs_threshold_search.cpp
@@ -7,12 +7,12 @@
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/array_args.h>
 
-using namespace LightGBM;
 
 template<class Value>
 double ComputeExpectationOfMVS(const std::vector<Value> &grads, double threshold) {
+  using namespace LightGBM;
   double expectation = 0.0;
-  for (const auto &value: grads) {
+  for (const auto &value : grads) {
     if (value >= threshold) {
       expectation += 1.;
     } else {
@@ -22,10 +22,11 @@ double ComputeExpectationOfMVS(const std::vector<Value> &grads, double threshold
   return expectation;
 }
 
-void ComputeSamplingRate(std::vector<score_t> gradients,
+void ComputeSamplingRate(std::vector<LightGBM::score_t> gradients,
                        const double sampling_fraction,
                        double *expected_sample_size,
                        double *resulting_sample_size) {
+  using namespace LightGBM;
   CHECK(expected_sample_size != nullptr);
   CHECK(resulting_sample_size != nullptr);
   *expected_sample_size = sampling_fraction * static_cast<double>(gradients.size());
@@ -34,13 +35,15 @@ void ComputeSamplingRate(std::vector<score_t> gradients,
 }
 
 TEST(SearchThresholdMVS, Basic) {
-  std::vector<LightGBM::score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
+  using namespace LightGBM;
+  std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
   double expected, resulting;
   ComputeSamplingRate(gradients, 0.5, &expected, &resulting);
   EXPECT_DOUBLE_EQ(expected, resulting);
 }
 
 TEST(ArrayArgs, Partition) {
+  using namespace LightGBM;
   std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
   data_size_t middle_begin = -1, middle_end = gradients.size();
   ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);
@@ -50,8 +53,9 @@ TEST(ArrayArgs, Partition) {
 }
 
 TEST(SearchThresholdMVS, PartitionOneElement) {
+  using namespace LightGBM;
   std::vector<score_t> gradients({0.5f});
   data_size_t middle_begin = -1, middle_end = gradients.size();
   ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);
   EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
-}
\ No newline at end of file
+}

From 5cd44229c07f86ecef6cc5c145e39cb04a88ee1c Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Mon, 10 May 2021 23:14:39 +0300
Subject: [PATCH 21/26] Added more cpp tests and fixed linting errors

---
 tests/cpp_tests/test_mvs_threshold_search.cpp | 62 ++++++++++++++++---
 1 file changed, 52 insertions(+), 10 deletions(-)

diff --git a/tests/cpp_tests/test_mvs_threshold_search.cpp b/tests/cpp_tests/test_mvs_threshold_search.cpp
index ef6f97bc2df5..3d25e4c9d023 100644
--- a/tests/cpp_tests/test_mvs_threshold_search.cpp
+++ b/tests/cpp_tests/test_mvs_threshold_search.cpp
@@ -7,10 +7,14 @@
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/array_args.h>
 
+#include <random>
+
+using LightGBM::data_size_t;
+using LightGBM::score_t;
+using LightGBM::ArrayArgs;
 
 template<class Value>
 double ComputeExpectationOfMVS(const std::vector<Value> &grads, double threshold) {
-  using namespace LightGBM;
   double expectation = 0.0;
   for (const auto &value : grads) {
     if (value >= threshold) {
@@ -22,18 +26,30 @@ double ComputeExpectationOfMVS(const std::vector<Value> &grads, double threshold
   return expectation;
 }
 
-void ComputeSamplingRate(std::vector<LightGBM::score_t> gradients,
-                       const double sampling_fraction,
-                       double *expected_sample_size,
-                       double *resulting_sample_size) {
-  using namespace LightGBM;
-  CHECK(expected_sample_size != nullptr);
-  CHECK(resulting_sample_size != nullptr);
+void ComputeSamplingRate(std::vector<score_t> gradients,
+                         const double sampling_fraction,
+                         double *expected_sample_size,
+                         double *resulting_sample_size) {
+  EXPECT_TRUE(expected_sample_size);
+  EXPECT_TRUE(resulting_sample_size);
+
   *expected_sample_size = sampling_fraction * static_cast<double>(gradients.size());
+
   double threshold = ArrayArgs<score_t>::CalculateThresholdMVS(&gradients, 0, gradients.size(), *expected_sample_size);
+
   *resulting_sample_size = ComputeExpectationOfMVS(gradients, threshold);
 }
 
+template<class VAL_T>
+std::vector<VAL_T> GenerateRandomVector(std::mt19937_64 &rng, size_t size) {
+  std::uniform_real_distribution<VAL_T> distribution(1., 2.0f);
+  std::vector<VAL_T> result;
+  for (size_t i = 0; i < size; ++i) {
+    result.emplace_back(distribution(rng));
+  }
+  return result;
+}
+
 TEST(SearchThresholdMVS, Basic) {
   using namespace LightGBM;
   std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
@@ -42,18 +58,44 @@ TEST(SearchThresholdMVS, Basic) {
   EXPECT_DOUBLE_EQ(expected, resulting);
 }
 
-TEST(ArrayArgs, Partition) {
+TEST(SearchThresholdMVS, SameGradientValue) {
   using namespace LightGBM;
+  std::vector<score_t> gradients;
+
+  for (size_t i = 0; i < 10; ++i) {
+    gradients.emplace_back(1.);
+  }
+
+  double expected, resulting;
+  ComputeSamplingRate(gradients, 0.5, &expected, &resulting);
+  EXPECT_DOUBLE_EQ(expected, resulting);
+  EXPECT_DOUBLE_EQ(resulting, 5.);
+}
+
+TEST(SearchThresholdMVS, LargeTest) {
+  std::mt19937_64 rng(42);
+  const size_t number_of_iterations = 100;
+  for (size_t i = 0; i < number_of_iterations; ++i) {
+    std::vector<score_t> grad = GenerateRandomVector<score_t>(rng, 10000);
+
+    double expected, resulting;
+    ComputeSamplingRate(std::move(grad), 0.01 + (0.98 * i) / number_of_iterations, &expected, &resulting);
+    EXPECT_NEAR(expected, resulting, 1e-3);
+  }
+}
+
+TEST(ArrayArgs, Partition) {
   std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
   data_size_t middle_begin = -1, middle_end = gradients.size();
+
   ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);
+
   EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
   EXPECT_GT(gradients[0], gradients[middle_begin + 1]);
   EXPECT_GT(gradients[middle_begin + 1], gradients.back());
 }
 
 TEST(SearchThresholdMVS, PartitionOneElement) {
-  using namespace LightGBM;
   std::vector<score_t> gradients({0.5f});
   data_size_t middle_begin = -1, middle_end = gradients.size();
   ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);

From 468102b04b8b8bafc6c80c1d4e33bf6db7fcc311 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Tue, 11 May 2021 00:00:28 +0300
Subject: [PATCH 22/26] Fixed linting errors

---
 tests/cpp_tests/test_mvs_threshold_search.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/cpp_tests/test_mvs_threshold_search.cpp b/tests/cpp_tests/test_mvs_threshold_search.cpp
index 3d25e4c9d023..7a6463d303fe 100644
--- a/tests/cpp_tests/test_mvs_threshold_search.cpp
+++ b/tests/cpp_tests/test_mvs_threshold_search.cpp
@@ -51,7 +51,6 @@ std::vector<VAL_T> GenerateRandomVector(std::mt19937_64 &rng, size_t size) {
 }
 
 TEST(SearchThresholdMVS, Basic) {
-  using namespace LightGBM;
   std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
   double expected, resulting;
   ComputeSamplingRate(gradients, 0.5, &expected, &resulting);
@@ -59,7 +58,6 @@ TEST(SearchThresholdMVS, Basic) {
 }
 
 TEST(SearchThresholdMVS, SameGradientValue) {
-  using namespace LightGBM;
   std::vector<score_t> gradients;
 
   for (size_t i = 0; i < 10; ++i) {

From fd3f64a174cefb758e78f03a00fcd02d5164b446 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Tue, 11 May 2021 18:50:48 +0300
Subject: [PATCH 23/26] Updated R-package documentation Updated documentation
 Updated test_mvs_threshold_search.cpp Added parallel computation of
 regularized absolute value term. Added new mvs parameter from constant.

---
 R-package/R/lgb.cv.R                          |  3 ++-
 R-package/R/lgb.train.R                       |  3 ++-
 R-package/R/lightgbm.R                        |  3 ++-
 R-package/man/lgb.cv.Rd                       |  3 ++-
 R-package/man/lgb.train.Rd                    |  3 ++-
 R-package/man/lightgbm.Rd                     |  3 ++-
 docs/Parameters.rst                           | 10 ++++++++
 include/LightGBM/config.h                     |  8 ++++++
 src/boosting/mvs.cpp                          | 25 +++++++++++--------
 src/boosting/mvs.hpp                          |  2 --
 src/io/config_auto.cpp                        |  5 ++++
 tests/cpp_tests/test_mvs_threshold_search.cpp |  8 +++---
 12 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 74a4e4d64728..5d6b52f8b5bc 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -41,7 +41,8 @@ CVBooster <- R6::R6Class(
 #'                   into a predictor model which frees up memory and the original datasets
 #' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"goss"}
+#'                                                   or \code{"mvs"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfit when #data is small. Tree still grow by leaf-wise.}
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index c47d6ce6901e..18bfffa53611 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -15,7 +15,8 @@
 #' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 #'            the "Parameters" section of the documentation} for more information. A few key parameters:
 #'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
+#'                                                      \code{"goss"} or \code{"mvs"}.}
 #'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                                 overfitting. Tree still grow by leaf-wise.}
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index e2df9063ed26..17169ec28272 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -90,7 +90,8 @@ NULL
 #'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
-#'         \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+#'         \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
+#'                                               \code{"goss"} or \code{"mvs"}.}
 #'         \item{\code{num_leaves}: Maximum number of leaves in one tree.}
 #'         \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
 #'                          overfit when #data is small. Tree still grow by leaf-wise.}
diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index ec606d880ac6..9e5238eb7825 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -118,7 +118,8 @@ into a predictor model which frees up memory and the original datasets}
 
 \item{...}{other parameters, see Parameters.rst for more information. A few key parameters:
 \itemize{
-    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"}, \code{"goss"}
+                                       or \code{"mvs"}.}
     \item{\code{num_leaves}: Maximum number of leaves in one tree.}
     \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
                      overfit when #data is small. Tree still grow by leaf-wise.}
diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
index 40c7135d3b26..1239872d6099 100644
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -102,7 +102,8 @@ original datasets}
 \item{...}{other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
 the "Parameters" section of the documentation} for more information. A few key parameters:
 \itemize{
-    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
+                                          \code{"goss"} or \code{"mvs"}.}
     \item{\code{num_leaves}: Maximum number of leaves in one tree.}
     \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
                      overfitting. Tree still grow by leaf-wise.}
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 6512dbc6b23a..e175a453cc7c 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -63,7 +63,8 @@ set to the iteration number of the best iteration.}
                        say "the first and tenth columns").}
    \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
                      into a predictor model which frees up memory and the original datasets}
-    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
+    \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"},
+                                          \code{"goss"} or \code{"mvs"}.}
     \item{\code{num_leaves}: Maximum number of leaves in one tree.}
     \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
                      overfit when #data is small. Tree still grow by leaf-wise.}
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 43ecdd83d97b..e837a92ac51a 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -350,6 +350,16 @@ Learning Control Parameters
 
    -  used only in ``mvs``
 
+-  ``mvs_max_sequential_size`` :raw-html:`<a id="mvs_max_sequential_size" title="Permalink to this parameter" href="#mvs_max_sequential_size">&#x1F517;&#xFE0E;</a>`, default = ``256000``, type = int, constraints: ``mvs_max_sequential_size > 0``
+
+   -  used in MVS boosting training dataset size is greater than ``mvs_max_sequential_size``, than threshold
+
+   -  for MVS is chosen for each thread independently.
+
+   -  used only in ``mvs``
+
+   -  **Note**: on small dataset setting this parameter less than size of dataset may produce results depending on number of threads
+
 -  ``bagging_freq`` :raw-html:`<a id="bagging_freq" title="Permalink to this parameter" href="#bagging_freq">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``subsample_freq``
 
    -  frequency for bagging
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 19479c92fe88..d1a0db9a7b9b 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -330,6 +330,14 @@ struct Config {
   // desc = used only in ``mvs``
   bool mvs_adaptive = false;
 
+  // default = 256000
+  // check = >0
+  // desc = used in MVS boosting training dataset size is greater than ``mvs_max_sequential_size``, than threshold
+  // desc = for MVS is chosen for each thread independently.
+  // desc = used only in ``mvs``
+  // desc = **Note**: on small dataset setting this parameter less than size of dataset may produce results depending on number of threads
+  int mvs_max_sequential_size = 256000;
+
   // alias = subsample_freq
   // desc = frequency for bagging
   // desc = ``0`` means disable bagging; ``k`` means perform bagging at every ``k`` iteration. Every ``k``-th iteration, LightGBM will randomly select ``bagging_fraction * 100 %`` of the data to use for the next ``k`` iterations
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index 5ecdacca1170..b5dbad319195 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -5,6 +5,8 @@
 
 #include "mvs.hpp"
 
+#include <memory>
+
 namespace LightGBM {
 
 using ConstTreeIterator = std::vector<std::unique_ptr<Tree>>::const_iterator;
@@ -69,7 +71,17 @@ void MVS::Bagging(int iter) {
   bag_data_cnt_ = num_data_;
   mvs_lambda_ = GetLambda();
 
-  if (num_data_ <= kMaxSequentialSize) {
+  #pragma omp parallel for schedule(static, 1024)
+  for (data_size_t i = 0; i < num_data_; ++i) {
+    tmp_derivatives_[i] = 0.0f;
+    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+      size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + i;
+      tmp_derivatives_[i] += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
+    }
+    tmp_derivatives_[i] = std::sqrt(tmp_derivatives_[i]);
+  }
+
+  if (num_data_ <= config_->mvs_max_sequential_size) {
     threshold_ = GetThreshold(0, num_data_);
   }
 
@@ -141,19 +153,10 @@ data_size_t MVS::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *
 double MVS::GetThreshold(data_size_t begin, data_size_t cnt) {
   data_size_t n_blocks, block_size;
   Threading::BlockInfoForceSize<data_size_t>(num_data_, bagging_rand_block_, &n_blocks, &block_size);
-  if (num_data_ < kMaxSequentialSize && block_size > 1 && threshold_ != 0.0) {
+  if (num_data_ <= config_->mvs_max_sequential_size && block_size > 1 && threshold_ != 0.0) {
     return threshold_;
   }
 
-  for (data_size_t i = begin; i < begin + cnt; ++i) {
-    tmp_derivatives_[i] = 0.0f;
-    for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
-      size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + i;
-      tmp_derivatives_[i] += gradients_[idx] * gradients_[idx] + mvs_lambda_ * hessians_[idx] * hessians_[idx];
-    }
-    tmp_derivatives_[i] = std::sqrt(tmp_derivatives_[i]);
-  }
-
   double threshold = ArrayArgs<score_t>::CalculateThresholdMVS(&tmp_derivatives_, begin, begin + cnt,
                                                   cnt * config_->bagging_fraction);
   return threshold;
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index d3e60483fe45..02eeb34649a3 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -91,8 +91,6 @@ class MVS : public GBDT {
 
   double GetLambda();
 
-  static const data_size_t kMaxSequentialSize = 256000;
-
   double mvs_lambda_;
   double threshold_{0.0};
   std::vector<score_t> tmp_derivatives_;
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index f4fcabdf522d..f581f1f32a0c 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -198,6 +198,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "neg_bagging_fraction",
   "mvs_lambda",
   "mvs_adaptive",
+  "mvs_max_sequential_size",
   "bagging_freq",
   "bagging_seed",
   "feature_fraction",
@@ -364,6 +365,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetBool(params, "mvs_adaptive", &mvs_adaptive);
 
+  GetInt(params, "mvs_max_sequential_size", &mvs_max_sequential_size);
+  CHECK_GT(mvs_max_sequential_size, 0);
+
   GetInt(params, "bagging_freq", &bagging_freq);
 
   GetInt(params, "bagging_seed", &bagging_seed);
@@ -660,6 +664,7 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[neg_bagging_fraction: " << neg_bagging_fraction << "]\n";
   str_buf << "[mvs_lambda: " << mvs_lambda << "]\n";
   str_buf << "[mvs_adaptive: " << mvs_adaptive << "]\n";
+  str_buf << "[mvs_max_sequential_size: " << mvs_max_sequential_size << "]\n";
   str_buf << "[bagging_freq: " << bagging_freq << "]\n";
   str_buf << "[bagging_seed: " << bagging_seed << "]\n";
   str_buf << "[feature_fraction: " << feature_fraction << "]\n";
diff --git a/tests/cpp_tests/test_mvs_threshold_search.cpp b/tests/cpp_tests/test_mvs_threshold_search.cpp
index 7a6463d303fe..85974deb6b01 100644
--- a/tests/cpp_tests/test_mvs_threshold_search.cpp
+++ b/tests/cpp_tests/test_mvs_threshold_search.cpp
@@ -41,11 +41,11 @@ void ComputeSamplingRate(std::vector<score_t> gradients,
 }
 
 template<class VAL_T>
-std::vector<VAL_T> GenerateRandomVector(std::mt19937_64 &rng, size_t size) {
+std::vector<VAL_T> GenerateRandomVector(std::mt19937_64 *rng, size_t size) {
   std::uniform_real_distribution<VAL_T> distribution(1., 2.0f);
   std::vector<VAL_T> result;
   for (size_t i = 0; i < size; ++i) {
-    result.emplace_back(distribution(rng));
+    result.emplace_back(distribution(*rng));
   }
   return result;
 }
@@ -74,7 +74,7 @@ TEST(SearchThresholdMVS, LargeTest) {
   std::mt19937_64 rng(42);
   const size_t number_of_iterations = 100;
   for (size_t i = 0; i < number_of_iterations; ++i) {
-    std::vector<score_t> grad = GenerateRandomVector<score_t>(rng, 10000);
+    std::vector<score_t> grad = GenerateRandomVector<score_t>(&rng, 10000);
 
     double expected, resulting;
     ComputeSamplingRate(std::move(grad), 0.01 + (0.98 * i) / number_of_iterations, &expected, &resulting);
@@ -93,7 +93,7 @@ TEST(ArrayArgs, Partition) {
   EXPECT_GT(gradients[middle_begin + 1], gradients.back());
 }
 
-TEST(SearchThresholdMVS, PartitionOneElement) {
+TEST(ArrayArgs, PartitionOneElement) {
   std::vector<score_t> gradients({0.5f});
   data_size_t middle_begin = -1, middle_end = gradients.size();
   ArrayArgs<score_t>::Partition(&gradients, 0, gradients.size(), &middle_begin, &middle_end);

From 11df7890724868ce0609758fb81062829e3f48a9 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Fri, 14 May 2021 19:52:42 +0300
Subject: [PATCH 24/26] Updated MVS Lambda algorithm

---
 src/boosting/mvs.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index b5dbad319195..02dc5c7f9015 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -54,7 +54,6 @@ double MVS::GetLambda() {
   }
   double lambda =
       (this->iter_ > 0) ? ComputeLeavesMeanSquaredValue(models_.cend() - num_tree_per_iteration_, models_.cend())
-          / config_->learning_rate
                         : ComputeMeanGradValues(gradients_.data(),
                                                 hessians_.data(),
                                                 num_data_,

From ddcab83422a9d21ce6393a968449a466843d52f4 Mon Sep 17 00:00:00 2001
From: kruda <karudakov@gmail.com>
Date: Tue, 6 Jul 2021 15:31:50 +0300
Subject: [PATCH 25/26] Updated documentation, MVS::GetLambda,
 MVS::GetThreshold, updated MVS::ResetConfig

---
 docs/Parameters.rst       | 24 ++++++++++++++++++++++++
 include/LightGBM/config.h |  4 ++--
 src/boosting/mvs.cpp      | 35 +++++++++++++++--------------------
 src/boosting/mvs.hpp      |  4 +++-
 4 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index c54bf96a6f8e..8050f193e9b7 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -139,6 +139,8 @@ Core Parameters
 
       -  **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
 
+   -  ``mvs``, Minimal variance sampling <https://arxiv.org/abs/1910.13204>__
+
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 
    -  path of training data, LightGBM will train from this data
@@ -334,6 +336,28 @@ Learning Control Parameters
 
    -  **Note**: if balanced bagging is enabled, ``bagging_fraction`` will be ignored
 
+-  ``mvs_lambda`` :raw-html:`<a id="mvs_lambda" title="Permalink to this parameter" href="#mvs_lambda">&#x1F517;&#xFE0E;</a>`, default = ``1e-4``, type = double, constraints: ``mvs_lambda > 0.0``
+
+   -  used in MVS boosting. If ``mvs_adaptive == true`` then this value is ignored.
+
+   -  used only in ``mvs``
+
+-  ``mvs_adaptive`` :raw-html:`<a id="mvs_adaptive" title="Permalink to this parameter" href="#mvs_adaptive">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
+
+   -  use adaptive variant of mvs boosting
+
+   -  used only in ``mvs``
+
+-  ``mvs_max_sequential_size`` :raw-html:`<a id="mvs_max_sequential_size" title="Permalink to this parameter" href="#mvs_max_sequential_size">&#x1F517;&#xFE0E;</a>`, default = ``256000``, type = int, constraints: ``mvs_max_sequential_size > 0``
+
+   -  used in MVS boosting training. If dataset size is greater than ``mvs_max_sequential_size``, then threshold
+
+   -  for MVS is chosen for each thread independently.
+
+   -  used only in ``mvs``
+
+   -  **Note**: on small dataset setting this parameter less than size of dataset may produce results depending on number of threads
+
 -  ``bagging_freq`` :raw-html:`<a id="bagging_freq" title="Permalink to this parameter" href="#bagging_freq">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``subsample_freq``
 
    -  frequency for bagging
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 286250ddd691..bb9a2c251591 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -308,7 +308,7 @@ struct Config {
 
   // default = 1e-4
   // check = >0.0
-  // desc = used in MVS boosting if ``mvs_adaptive == true`` than this value is ignored
+  // desc = used in MVS boosting. If ``mvs_adaptive == true`` then this value is ignored.
   // desc = used only in ``mvs``
   double mvs_lambda = 1e-4;
 
@@ -319,7 +319,7 @@ struct Config {
 
   // default = 256000
   // check = >0
-  // desc = used in MVS boosting training dataset size is greater than ``mvs_max_sequential_size``, than threshold
+  // desc = used in MVS boosting training. If dataset size is greater than ``mvs_max_sequential_size``, then threshold
   // desc = for MVS is chosen for each thread independently.
   // desc = used only in ``mvs``
   // desc = **Note**: on small dataset setting this parameter less than size of dataset may produce results depending on number of threads
diff --git a/src/boosting/mvs.cpp b/src/boosting/mvs.cpp
index 02dc5c7f9015..c3ba89f215ba 100644
--- a/src/boosting/mvs.cpp
+++ b/src/boosting/mvs.cpp
@@ -13,10 +13,11 @@ using ConstTreeIterator = std::vector<std::unique_ptr<Tree>>::const_iterator;
 
 MVS::MVS() : GBDT() {}
 
-static double ComputeLeavesMeanSquaredValue(ConstTreeIterator begin, ConstTreeIterator end) {
+static double ComputeLeavesMeanSquaredValue(ConstTreeIterator begin,
+                                            ConstTreeIterator end,
+                                            const data_size_t num_leaves) {
   double sum_values = 0.0;
-  data_size_t num_leaves = (*begin)->num_leaves();
-#pragma omp parallel for schedule(static, 2048) reduction(+:sum_values)
+#pragma omp parallel for schedule(static, 2048) reduction(+ : sum_values)
   for (data_size_t leaf_idx = 0; leaf_idx < num_leaves; ++leaf_idx) {
     double leave_value = 0.0;
     for (ConstTreeIterator it = begin; it != end; ++it) {
@@ -30,12 +31,11 @@ static double ComputeLeavesMeanSquaredValue(ConstTreeIterator begin, ConstTreeIt
   return sum_values / num_leaves;
 }
 
-static double ComputeMeanGradValues(score_t *gradients,
-                                    score_t *hessians,
+static double ComputeMeanGradValues(score_t *gradients, score_t *hessians,
                                     data_size_t size,
                                     data_size_t num_tree_per_iteration) {
   double sum = 0.0;
-#pragma omp parallel for schedule(static, 1024) reduction(+:sum)
+#pragma omp parallel for schedule(static, 1024) reduction(+ : sum)
   for (data_size_t i = 0; i < size; ++i) {
     double local_hessians = 0.0, local_gradients = 0.0;
     for (data_size_t j = 0; j < num_tree_per_iteration; ++j) {
@@ -52,25 +52,23 @@ double MVS::GetLambda() {
   if (!mvs_adaptive_) {
     return mvs_lambda_;
   }
-  double lambda =
-      (this->iter_ > 0) ? ComputeLeavesMeanSquaredValue(models_.cend() - num_tree_per_iteration_, models_.cend())
-                        : ComputeMeanGradValues(gradients_.data(),
-                                                hessians_.data(),
-                                                num_data_,
-                                                num_tree_per_iteration_);
-
-  return lambda;
+  if (this->iter_ > 0) {
+    return ComputeLeavesMeanSquaredValue(models_.cend() - num_tree_per_iteration_,
+                                         models_.cend(), config_->num_leaves);
+  }
+  return ComputeMeanGradValues(gradients_.data(), hessians_.data(), num_data_,
+                               num_tree_per_iteration_);
 }
 
 void MVS::Bagging(int iter) {
   if (iter % config_->bagging_freq != 0 && !need_re_bagging_) {
     return;
   }
-
+  need_re_bagging_ = false;
   bag_data_cnt_ = num_data_;
   mvs_lambda_ = GetLambda();
 
-  #pragma omp parallel for schedule(static, 1024)
+  //#pragma omp parallel for schedule(static, 1024)
   for (data_size_t i = 0; i < num_data_; ++i) {
     tmp_derivatives_[i] = 0.0f;
     for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
@@ -150,9 +148,7 @@ data_size_t MVS::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t *
 }
 
 double MVS::GetThreshold(data_size_t begin, data_size_t cnt) {
-  data_size_t n_blocks, block_size;
-  Threading::BlockInfoForceSize<data_size_t>(num_data_, bagging_rand_block_, &n_blocks, &block_size);
-  if (num_data_ <= config_->mvs_max_sequential_size && block_size > 1 && threshold_ != 0.0) {
+  if (num_data_ <= config_->mvs_max_sequential_size && threshold_ != 0.0) {
     return threshold_;
   }
 
@@ -165,7 +161,6 @@ void MVS::ResetMVS() {
   CHECK(config_->bagging_fraction > 0.0f && config_->bagging_fraction < 1.0f && config_->bagging_freq > 0);
   CHECK(config_->mvs_lambda >= 0.0f);
   CHECK(!balanced_bagging_);
-
   bag_data_indices_.resize(num_data_);
   tmp_derivatives_.resize(num_data_);
   Log::Info("Using MVS");
diff --git a/src/boosting/mvs.hpp b/src/boosting/mvs.hpp
index 02eeb34649a3..13f081fbf016 100644
--- a/src/boosting/mvs.hpp
+++ b/src/boosting/mvs.hpp
@@ -51,6 +51,8 @@ class MVS : public GBDT {
 
   void ResetConfig(const Config *config) override {
     GBDT::ResetConfig(config);
+    need_re_bagging_ = mvs_adaptive_ != config->mvs_adaptive
+        || (mvs_lambda_ != config->mvs_lambda && !mvs_adaptive_ && !config->mvs_adaptive);
     mvs_lambda_ = config_->mvs_lambda;
     mvs_adaptive_ = config_->mvs_adaptive;
     ResetMVS();
@@ -63,7 +65,7 @@ class MVS : public GBDT {
       // use customized objective function
       CHECK(hessians != nullptr && objective_function_ == nullptr);
       int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
-      #pragma omp parallel for schedule(static, 1)
+      #pragma omp parallel for schedule(static, 1024)
       for (int64_t i = 0; i < total_size; ++i) {
         gradients_[i] = gradients[i];
         hessians_[i] = hessians[i];

From 31ab4d42312a6973b4b8620317ef17b050d7eb50 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 11 Mar 2022 16:46:09 +0300
Subject: [PATCH 26/26] [ci] fix current `master` fails with graphviz-related
 error (#5068)

* Update test_windows.ps1

* Update .appveyor.yml

* Update test_windows.ps1

* Update .appveyor.yml
---
 .ci/test_windows.ps1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index 1273d3713350..d4c5012a1b87 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -52,7 +52,7 @@ if ($env:TASK -eq "swig") {
 
 conda install -q -y -n $env:CONDA_ENV joblib matplotlib numpy pandas psutil pytest scikit-learn scipy ; Check-Output $?
 # python-graphviz has to be installed separately to prevent conda from downgrading to pypy
-conda install -q -y -n $env:CONDA_ENV python-graphviz ; Check-Output $?
+conda install -q -y -n $env:CONDA_ENV libxml2 python-graphviz ; Check-Output $?
 
 if ($env:TASK -eq "regular") {
   mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build