add a button to control feature selection

microsoft · Sep 25, 2024 · 28ea3c3 · 28ea3c3
1 parent 5b5dfee
commit 28ea3c3
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 15 deletions.
diff --git a/rdagent/app/kaggle/conf.py b/rdagent/app/kaggle/conf.py
@@ -55,5 +55,7 @@ class Config:
 
     if_action_choosing_based_on_UCB: bool = False
 
+    if_using_feature_selection: bool = False
+
 
 KAGGLE_IMPLEMENT_SETTING = KaggleBasePropSetting()
diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py
@@ -35,6 +35,7 @@ def __init__(self, competition: str) -> None:
         self.model_output_channel = None
         self._analysis_competition_description()
         self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB
+        self.if_using_feature_selection = KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection
 
         self._output_format = self.output_format
         self._interface = self.interface

diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml
@@ -39,7 +39,7 @@ hypothesis_and_feedback: |-
 hypothesis_output_format: |-
   The output should follow JSON format. The schema is as follows:
   {
-    "action": "If "hypothesis_specification" provides the action you need to take, please follow "hypothesis_specification" to choose the action. Otherwise, based on previous experimental results, suggest the action you believe is most appropriate at the moment. It should be one of ["Feature engineering", "Feature processing", "Model feature selection", "Model tuning"]"
+    "action": "If "hypothesis_specification" provides the action you need to take, please follow "hypothesis_specification" to choose the action. Otherwise, based on previous experimental results, suggest the action you believe is most appropriate at the moment. It should be one of [{% if if_using_feature_selection %}"Feature engineering", "Feature processing", "Model feature selection", "Model tuning"{% else %}"Feature engineering", "Feature processing", "Model tuning"{% endif %}]",
     "hypothesis": "The new hypothesis generated based on the information provided.",
     "reason": "The reason why you generate this hypothesis. It should be comprehensive and logical. It should cover the other keys below and extend them.",
     "concise_reason": "Two-line summary. First line focuses on a concise justification for the change. Second line generalizes a knowledge statement.",

diff --git a/rdagent/scenarios/kaggle/proposal/proposal.py b/rdagent/scenarios/kaggle/proposal/proposal.py
@@ -36,7 +36,7 @@
 KG_ACTION_LIST = [
     KG_ACTION_FEATURE_ENGINEERING,
     KG_ACTION_FEATURE_PROCESSING,
-    KG_ACTION_MODEL_FEATURE_SELECTION,
+    *([KG_ACTION_MODEL_FEATURE_SELECTION] if KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection else []),
     KG_ACTION_MODEL_TUNING,
 ]
 
@@ -82,18 +82,14 @@ class KGHypothesisGen(ModelHypothesisGen):
 
     def __init__(self, scen: Scenario) -> Tuple[dict, bool]:
         super().__init__(scen)
-        self.action_counts = {
-            "Feature engineering": 0,
-            "Feature processing": 0,
-            "Model feature selection": 0,
-            "Model tuning": 0,
-        }
-        self.reward_estimates = {
-            "Feature engineering": 0.0,
-            "Feature processing": 0.0,
-            "Model feature selection": 0.2,
-            "Model tuning": 1.0,
-        }
+        actions = ["Feature engineering", "Feature processing", "Model tuning"]
+        if KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection:
+            actions.insert(2, "Model feature selection")
+        self.action_counts = dict.fromkeys(actions, 0)
+        self.reward_estimates = {action: 0.0 for action in actions}
+        if KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection:
+            self.reward_estimates["Model feature selection"] = 0.2
+        self.reward_estimates["Model tuning"] = 1.0
         self.confidence_parameter = 1.0
         self.initial_performance = 0.0
 
@@ -240,7 +236,9 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
         context_dict = {
             "hypothesis_and_feedback": hypothesis_and_feedback,
             "RAG": self.generate_RAG_content(trace),
-            "hypothesis_output_format": prompt_dict["hypothesis_output_format"],
+            "hypothesis_output_format": Environment(undefined=StrictUndefined)
+            .from_string(prompt_dict["hypothesis_output_format"])
+            .render(if_using_feature_selection=KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection),
             "hypothesis_specification": f"next experiment action is {action}"
             if self.scen.if_action_choosing_based_on_UCB
             else None,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -55,5 +55,7 @@ class Config:

		if_action_choosing_based_on_UCB: bool = False

		if_using_feature_selection: bool = False


		KAGGLE_IMPLEMENT_SETTING = KaggleBasePropSetting()