feat: Feature selection v3 to support all actions (#280)

* Update feedback.py to support all actions Feedback.py is updated to support all actions. * Update prompts.yaml to support all actions * Revised for CI * CI * fix a ci bug * fix a ci bug --------- Co-authored-by: WinstonLiye <[email protected]>
microsoft · Sep 20, 2024 · 0047641 · WinstonLiyt · Sep 20, 2024 · WinstonLiyt
1 parent 83058c8
commit 0047641
Show file tree

Hide file tree

Showing 2 changed files with 120 additions and 15 deletions.
diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py
@@ -46,12 +46,37 @@ def process_results(current_result, sota_result):
 
 
 class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
+    def get_available_features(self, exp: Experiment):
+        features = []
+
+        for feature_info in exp.experiment_workspace.data_description:
+            task_info, feature_shape = feature_info
+            features.append(
+                {"name": task_info.factor_name, "description": task_info.factor_description, "shape": feature_shape}
+            )
+
+        return features
+
+    def get_model_code(self, exp: Experiment):
+        model_type = exp.sub_tasks[0].model_type if exp.sub_tasks else None
+        if model_type == "XGBoost":
+            return exp.sub_workspace_list[0].code_dict.get(
+                "model_xgb.py"
+            )  # TODO Check if we need to replace this by using RepoAnalyzer
+        elif model_type == "RandomForest":
+            return exp.sub_workspace_list[0].code_dict.get("model_rf.py")
+        elif model_type == "LightGBM":
+            return exp.sub_workspace_list[0].code_dict.get("model_lgb.py")
+        elif model_type == "NN":
+            return exp.sub_workspace_list[0].code_dict.get("model_nn.py")
+        else:
+            return None
+
     def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
         """
         The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
         For example: `mlflow` of Qlib will be included.
         """
-
         """
         Generate feedback for the given experiment and hypothesis.
         Args:
@@ -84,28 +109,44 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
             combined_result = process_results(current_result, current_result)  # Compare with itself
             print("Warning: No previous experiments to compare against. Using current result as baseline.")
 
+        available_features = self.get_available_features(exp)
+        # Get the appropriate model code
+        model_code = self.get_model_code(exp)
+
+        # Generate the user prompt based on the action type
+        if hypothesis.action == "Model tuning":
+            prompt_key = "model_tuning_feedback_generation"
+        elif hypothesis.action == "Model feature selection":
+            prompt_key = "feature_selection_feedback_generation"
+        else:
+            prompt_key = "factor_feedback_generation"
+
         # Generate the system prompt
         sys_prompt = (
             Environment(undefined=StrictUndefined)
-            .from_string(prompt_dict["factor_feedback_generation"]["system"])
+            .from_string(prompt_dict[prompt_key]["system"])
             .render(scenario=self.scen.get_scenario_all_desc())
         )
 
-        # Generate the user prompt based on the action type
-        if hypothesis.action == "Model Tuning":  # TODO Add other prompts here
-            prompt_key = "model_feedback_generation"
-        else:
-            prompt_key = "factor_feedback_generation"
+        # Prepare render dictionary
+        render_dict = {
+            "context": self.scen.get_scenario_all_desc(),
+            "last_hypothesis": trace.hist[-1][0] if trace.hist else None,
+            "last_task": trace.hist[-1][1] if trace.hist else None,
+            "last_code": self.get_model_code(trace.hist[-1][1]) if trace.hist else None,
+            "last_result": trace.hist[-1][1].result if trace.hist else None,
+            "hypothesis": hypothesis,
+            "exp": exp,
+            "model_code": model_code,
+            "available_features": available_features,
+            "combined_result": combined_result,
+            "hypothesis_text": hypothesis_text,
+            "task_details": tasks_factors,
+        }
 
         # Generate the user prompt
         usr_prompt = (
-            Environment(undefined=StrictUndefined)
-            .from_string(prompt_dict[prompt_key]["user"])
-            .render(
-                hypothesis_text=hypothesis_text,
-                task_details=tasks_factors,
-                combined_result=combined_result,
-            )
+            Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict)
         )
 
         # Call the APIBackend to generate the response for hypothesis feedback

diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml
@@ -198,4 +198,68 @@ factor_feedback_generation:
     Consider Changing Direction for Significant Gaps with the Best Result:
       - If the new results significantly differ from the best, consider exploring a new direction.
       - Avoid re-implementing previous features as those that surpassed the best are already included in the feature library and will be used in each run.
-    Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run.
+    Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run.
+
+
+feature_selection_feedback_generation:
+  system: |-
+    You are a professional feature selection assistant for machine learning models. Your task is to analyze the current feature selection strategy, evaluate its effectiveness, and suggest improvements.
+    
+    Consider the following when analyzing:
+    1. How well does the current feature selection support the hypothesis?
+    2. Which features seem to contribute most to the model's performance?
+    3. Are there any features that might be redundant or noisy?
+    4. What new feature selection strategies might improve the model?
+
+    Provide detailed and constructive feedback, focusing on actionable insights for feature selection improvement.
+    
+    Respond in JSON format. Example JSON structure for Result Analysis:
+    {
+      "Observations": "Your overall observations about the feature selection effectiveness",
+      "Feedback for Hypothesis": "How well the results support or refute the hypothesis",
+      "New Hypothesis": "Suggested new hypothesis for feature selection in the next iteration",
+      "Reasoning": "Detailed reasoning for the new hypothesis, including which features to keep, remove, or add",
+      "Decision": true or false
+    }
+
+  user: |-
+    We are in an experiment of finding hypotheses for feature selection and validating or rejecting them to optimize our model's performance.
+    Here is the context: {{context}}. 
+
+    {% if last_hypothesis %} 
+    Last Round Information:
+    Hypothesis: {{last_hypothesis.hypothesis}}
+    Task: {{last_task}}
+    Code Implemented: {{last_code}}
+    Result: {{last_result}}
+    {% else %}
+    This is the first round. No previous information available. As long as the performance is not too negative (e.g., ICIR is greater than 0), treat it as successful. Do not set the threshold too high.  
+    {% endif %} 
+    
+    Current Round Information:
+    Hypothesis: {{hypothesis.hypothesis}}
+    Experiment Setup: {{exp.sub_tasks[0]}}
+    Model Code Implemented (focus on the select() method): 
+    ```python
+    {{model_code}}
+    ```
+    Relevant Reasoning: {{hypothesis.reason}}
+    Result: {{exp.result}}
+
+    Available Features:
+    {% for feature in available_features %}
+    - {{feature.name}}: {{feature.description}}
+      Shape: {{feature.shape}}
+    {% endfor %}
+
+    Compare and observe the results. Which result has a better return and lower risk? If the performance increases, the hypothesis should be considered positive (working). 
+    
+    Based on the hypotheses, relevant reasoning, and results (comparison), provide detailed and constructive feedback and suggest a new hypothesis for feature selection. 
+
+    In your feedback, consider:
+    1. How effective is the current feature selection strategy?
+    2. Are there any patterns in the selected or discarded features that might inform future selections?
+    3. How might we refine or change the feature selection approach to improve model performance?
+    4. Are there any domain-specific considerations that should inform our feature selection?
+
+    Remember to focus on the select() method in the model code, as this is where feature selection is implemented.