Skip to content

Commit

Permalink
feat: Feature selection v3 to support all actions (#280)
Browse files Browse the repository at this point in the history
* Update feedback.py to support all actions

Feedback.py is updated to support all actions.

* Update prompts.yaml to support all actions

* Revised for CI

* CI

* fix a ci bug

* fix a ci bug

---------

Co-authored-by: WinstonLiye <[email protected]>
  • Loading branch information
xisen-w and WinstonLiyt authored Sep 20, 2024
1 parent 83058c8 commit 0047641
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 15 deletions.
69 changes: 55 additions & 14 deletions rdagent/scenarios/kaggle/developer/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,37 @@ def process_results(current_result, sota_result):


class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
def get_available_features(self, exp: Experiment):
features = []

for feature_info in exp.experiment_workspace.data_description:
task_info, feature_shape = feature_info

This comment has been minimized.

Copy link
@WinstonLiyt

WinstonLiyt Sep 20, 2024

Author Collaborator

It should not be .factor_name as task_info is a string.

features.append(
{"name": task_info.factor_name, "description": task_info.factor_description, "shape": feature_shape}

This comment has been minimized.

Copy link
@WinstonLiyt

WinstonLiyt Sep 20, 2024

Author Collaborator

The term shape here refers to how many factors this task covers.

)

return features

def get_model_code(self, exp: Experiment):
model_type = exp.sub_tasks[0].model_type if exp.sub_tasks else None
if model_type == "XGBoost":
return exp.sub_workspace_list[0].code_dict.get(
"model_xgb.py"
) # TODO Check if we need to replace this by using RepoAnalyzer
elif model_type == "RandomForest":
return exp.sub_workspace_list[0].code_dict.get("model_rf.py")
elif model_type == "LightGBM":
return exp.sub_workspace_list[0].code_dict.get("model_lgb.py")
elif model_type == "NN":
return exp.sub_workspace_list[0].code_dict.get("model_nn.py")
else:
return None

def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
"""
The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
For example: `mlflow` of Qlib will be included.
"""

"""
Generate feedback for the given experiment and hypothesis.
Args:
Expand Down Expand Up @@ -84,28 +109,44 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
combined_result = process_results(current_result, current_result) # Compare with itself
print("Warning: No previous experiments to compare against. Using current result as baseline.")

available_features = self.get_available_features(exp)
# Get the appropriate model code
model_code = self.get_model_code(exp)

This comment has been minimized.

Copy link
@WinstonLiyt

WinstonLiyt Sep 20, 2024

Author Collaborator

There is no task for model in feature engineering; it should also be written in the form of model_description.


# Generate the user prompt based on the action type
if hypothesis.action == "Model tuning":
prompt_key = "model_tuning_feedback_generation"
elif hypothesis.action == "Model feature selection":
prompt_key = "feature_selection_feedback_generation"
else:
prompt_key = "factor_feedback_generation"

# Generate the system prompt
sys_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["factor_feedback_generation"]["system"])
.from_string(prompt_dict[prompt_key]["system"])
.render(scenario=self.scen.get_scenario_all_desc())
)

# Generate the user prompt based on the action type
if hypothesis.action == "Model Tuning": # TODO Add other prompts here
prompt_key = "model_feedback_generation"
else:
prompt_key = "factor_feedback_generation"
# Prepare render dictionary
render_dict = {
"context": self.scen.get_scenario_all_desc(),
"last_hypothesis": trace.hist[-1][0] if trace.hist else None,
"last_task": trace.hist[-1][1] if trace.hist else None,
"last_code": self.get_model_code(trace.hist[-1][1]) if trace.hist else None,
"last_result": trace.hist[-1][1].result if trace.hist else None,
"hypothesis": hypothesis,
"exp": exp,
"model_code": model_code,
"available_features": available_features,
"combined_result": combined_result,
"hypothesis_text": hypothesis_text,
"task_details": tasks_factors,
}

# Generate the user prompt
usr_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict[prompt_key]["user"])
.render(
hypothesis_text=hypothesis_text,
task_details=tasks_factors,
combined_result=combined_result,
)
Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict)
)

# Call the APIBackend to generate the response for hypothesis feedback
Expand Down
66 changes: 65 additions & 1 deletion rdagent/scenarios/kaggle/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,68 @@ factor_feedback_generation:
Consider Changing Direction for Significant Gaps with the Best Result:
- If the new results significantly differ from the best, consider exploring a new direction.
- Avoid re-implementing previous features as those that surpassed the best are already included in the feature library and will be used in each run.
Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run.
Note: Only features with 'Feature Implementation' as True are implemented and tested in this experiment. If 'Feature Implementation' is False, the hypothesis for that feature cannot be verified in this run.
feature_selection_feedback_generation:
system: |-
You are a professional feature selection assistant for machine learning models. Your task is to analyze the current feature selection strategy, evaluate its effectiveness, and suggest improvements.
Consider the following when analyzing:
1. How well does the current feature selection support the hypothesis?
2. Which features seem to contribute most to the model's performance?
3. Are there any features that might be redundant or noisy?
4. What new feature selection strategies might improve the model?
Provide detailed and constructive feedback, focusing on actionable insights for feature selection improvement.
Respond in JSON format. Example JSON structure for Result Analysis:
{
"Observations": "Your overall observations about the feature selection effectiveness",
"Feedback for Hypothesis": "How well the results support or refute the hypothesis",
"New Hypothesis": "Suggested new hypothesis for feature selection in the next iteration",
"Reasoning": "Detailed reasoning for the new hypothesis, including which features to keep, remove, or add",
"Decision": true or false
}
user: |-
We are in an experiment of finding hypotheses for feature selection and validating or rejecting them to optimize our model's performance.
Here is the context: {{context}}.
{% if last_hypothesis %}
Last Round Information:
Hypothesis: {{last_hypothesis.hypothesis}}
Task: {{last_task}}
Code Implemented: {{last_code}}
Result: {{last_result}}
{% else %}
This is the first round. No previous information available. As long as the performance is not too negative (e.g., ICIR is greater than 0), treat it as successful. Do not set the threshold too high.
{% endif %}
Current Round Information:
Hypothesis: {{hypothesis.hypothesis}}
Experiment Setup: {{exp.sub_tasks[0]}}
Model Code Implemented (focus on the select() method):
```python
{{model_code}}
```
Relevant Reasoning: {{hypothesis.reason}}
Result: {{exp.result}}
Available Features:
{% for feature in available_features %}
- {{feature.name}}: {{feature.description}}
Shape: {{feature.shape}}
{% endfor %}
Compare and observe the results. Which result has a better return and lower risk? If the performance increases, the hypothesis should be considered positive (working).
Based on the hypotheses, relevant reasoning, and results (comparison), provide detailed and constructive feedback and suggest a new hypothesis for feature selection.
In your feedback, consider:
1. How effective is the current feature selection strategy?
2. Are there any patterns in the selected or discarded features that might inform future selections?
3. How might we refine or change the feature selection approach to improve model performance?
4. Are there any domain-specific considerations that should inform our feature selection?
Remember to focus on the select() method in the model code, as this is where feature selection is implemented.

0 comments on commit 0047641

Please sign in to comment.