From 04e28294d29fd7e43e622770091ab6961f3b0f10 Mon Sep 17 00:00:00 2001 From: dhb <1084714805@qq.com> Date: Mon, 23 Sep 2024 23:53:17 +0800 Subject: [PATCH] merge parameters of spsa tune --- Rapfi/search/mcts/parameter.h | 31 +++++++++++++++---------------- Rapfi/search/mcts/search.cpp | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/Rapfi/search/mcts/parameter.h b/Rapfi/search/mcts/parameter.h index 0e29d01..4296f67 100644 --- a/Rapfi/search/mcts/parameter.h +++ b/Rapfi/search/mcts/parameter.h @@ -22,29 +22,28 @@ namespace Search::MCTS { -constexpr float MaxNewVisitsProp = 0.2f; +constexpr float MaxNewVisitsProp = 0.275f; -constexpr float CpuctExploration = 1.0f; -constexpr float CpuctExplorationLog = 0.4f; -constexpr float CpuctExplorationBase = 500; +constexpr float CpuctExploration = 0.40f; +constexpr float CpuctExplorationLog = 0.75f; +constexpr float CpuctExplorationBase = 336; constexpr float CpuctParentVisitBias = 0.1f; -constexpr float CpuctUtilityStdevScale = 0.0f; -constexpr float CpuctUtilityVarPrior = 0.16f; -constexpr float CpuctUtilityVarPriorWeight = 2.0f; +constexpr float CpuctUtilityStdevScale = 0.035f; +constexpr float CpuctUtilityVarPrior = 0.18f; +constexpr float CpuctUtilityVarPriorWeight = 2.14f; -constexpr float FpuReductionMax = 0.1f; -constexpr float FpuLossProp = 0.0f; -constexpr float RootFpuReductionMax = 0.05f; -constexpr float RootFpuLossProp = 0.0f; -constexpr float FpuUtilityBlendPow = 2.0f; +constexpr float FpuReductionMax = 0.075f; +constexpr float FpuLossProp = 0.001f; +constexpr float RootFpuReductionMax = 0.075f; +constexpr float RootFpuLossProp = 0.0036f; +constexpr float FpuUtilityBlendPow = 1.73f; -constexpr uint32_t MinTranspositionSkipVisits = 10; +constexpr uint32_t MinTranspositionSkipVisits = 12; constexpr bool UseLCBForBestmoveSelection = true; -constexpr float LCBStdevs = - 4.265f; // quantile of student's t dist(n) at confidence level 1-1e-5 as n -> inf -constexpr float LCBMinVisitProp = 0.15f; +constexpr float LCBStdevs = 5.0f; +constexpr float LCBMinVisitProp = 0.12f; constexpr float PolicyTemperature = 1.0f; diff --git a/Rapfi/search/mcts/search.cpp b/Rapfi/search/mcts/search.cpp index 77301f6..dac1e6a 100644 --- a/Rapfi/search/mcts/search.cpp +++ b/Rapfi/search/mcts/search.cpp @@ -38,7 +38,7 @@ inline float cpuctExplorationFactor(uint32_t parentVisits) float cpuct = CpuctExploration; if (CpuctExplorationLog != 0) cpuct += CpuctExplorationLog * std::log(1.0f + parentVisits / CpuctExplorationBase); - return cpuct * std::sqrt(parentVisits + CpuctParentVisitBias); + return cpuct * std::sqrt(parentVisits + 1e-2f); } /// Compute the initial utility value for unexplored children, considering first play urgency.