ci issues

microsoft · Sep 22, 2024 · fe6b4c3 · fe6b4c3
1 parent 4bbcc4b
commit fe6b4c3
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 28 deletions.
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_nn.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_nn.py
@@ -1,12 +1,13 @@
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-import numpy as np
 
 # Check if a GPU is available
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 # Restored three-layer model structure
 class HybridFeatureInteractionModel(nn.Module):
@@ -26,6 +27,7 @@ def forward(self, x):
         x = torch.sigmoid(self.fc3(x))
         return x
 
+
 # Training function
 def fit(X_train, y_train, X_valid, y_valid):
     num_features = X_train.shape[1]
@@ -34,10 +36,12 @@ def fit(X_train, y_train, X_valid, y_valid):
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 
     # Convert to TensorDataset and create DataLoader
-    train_dataset = TensorDataset(torch.tensor(X_train.to_numpy(), dtype=torch.float32),
-                                  torch.tensor(y_train.reshape(-1), dtype=torch.float32))
-    valid_dataset = TensorDataset(torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
-                                  torch.tensor(y_valid.reshape(-1), dtype=torch.float32))
+    train_dataset = TensorDataset(
+        torch.tensor(X_train.to_numpy(), dtype=torch.float32), torch.tensor(y_train.reshape(-1), dtype=torch.float32)
+    )
+    valid_dataset = TensorDataset(
+        torch.tensor(X_valid.to_numpy(), dtype=torch.float32), torch.tensor(y_valid.reshape(-1), dtype=torch.float32)
+    )
     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
     valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
 
@@ -58,14 +62,15 @@ def fit(X_train, y_train, X_valid, y_valid):
 
     return model
 
+
 # Prediction function
 def predict(model, X):
     model.eval()
     predictions = []
     with torch.no_grad():
         X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device)  # Move data to the device
         for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
-            batch = X_tensor[i:i + 32]  # Predict in batches
+            batch = X_tensor[i : i + 32]  # Predict in batches
             pred = model(batch).squeeze().cpu().numpy()  # Move results back to CPU
             predictions.extend(pred)
-    return np.array(predictions)  # Return boolean predictions
+    return np.array(predictions)  # Return boolean predictions
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/fea_share_preprocess.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/fea_share_preprocess.py
@@ -1,11 +1,13 @@
 import os
+
 import pandas as pd
 from sklearn.compose import ColumnTransformer
 from sklearn.impute import SimpleImputer
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
 
+
 def prepreprocess():
     """
     This method loads the data, drops the unnecessary columns, and splits it into train and validation sets.
@@ -22,6 +24,7 @@ def prepreprocess():
 
     return X_train, X_valid, y_train, y_valid
 
+
 def preprocess_fit(X_train: pd.DataFrame):
     """
     Fits the preprocessor on the training data and returns the fitted preprocessor.
@@ -53,6 +56,7 @@ def preprocess_fit(X_train: pd.DataFrame):
 
     return preprocessor
 
+
 def preprocess_transform(X: pd.DataFrame, preprocessor):
     """
     Transforms the given DataFrame using the fitted preprocessor.
@@ -72,6 +76,7 @@ def preprocess_transform(X: pd.DataFrame, preprocessor):
 
     return X_transformed
 
+
 def preprocess_script():
     """
     This method applies the preprocessing steps to the training, validation, and test datasets.
@@ -95,9 +100,11 @@ def preprocess_script():
     X_valid = preprocess_transform(X_valid, preprocessor)
 
     # Load and preprocess the test data
-    submission_df = pd.read_csv("/data/userdata/v-haoranpan/RD-Agent/git_ignore_folder/data/playground-series-s4e9/test.csv")
+    submission_df = pd.read_csv(
+        "/data/userdata/v-haoranpan/RD-Agent/git_ignore_folder/data/playground-series-s4e9/test.csv"
+    )
     ids = submission_df["id"]
     submission_df = submission_df.drop(["id"], axis=1)
     X_test = preprocess_transform(submission_df, preprocessor)
 
-    return X_train, X_valid, y_train, y_valid, X_test, ids
+    return X_train, X_valid, y_train, y_valid, X_test, ids
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_nn.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_nn.py
@@ -1,12 +1,13 @@
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-import numpy as np
 
 # Check if a GPU is available
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 # Modified model for regression
 class HybridFeatureInteractionModel(nn.Module):
@@ -26,6 +27,7 @@ def forward(self, x):
         x = self.fc3(x)  # No activation for regression
         return x
 
+
 # Training function
 def fit(X_train, y_train, X_valid, y_valid):
     num_features = X_train.shape[1]
@@ -36,11 +38,11 @@ def fit(X_train, y_train, X_valid, y_valid):
     # Convert to TensorDataset and create DataLoader
     train_dataset = TensorDataset(
         torch.tensor(X_train.to_numpy(), dtype=torch.float32),
-        torch.tensor(y_train.to_numpy().reshape(-1), dtype=torch.float32)  # Convert to NumPy array
+        torch.tensor(y_train.to_numpy().reshape(-1), dtype=torch.float32),  # Convert to NumPy array
     )
     valid_dataset = TensorDataset(
         torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
-        torch.tensor(y_valid.to_numpy().reshape(-1), dtype=torch.float32)  # Convert to NumPy array
+        torch.tensor(y_valid.to_numpy().reshape(-1), dtype=torch.float32),  # Convert to NumPy array
     )
     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
     valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
@@ -62,14 +64,15 @@ def fit(X_train, y_train, X_valid, y_valid):
 
     return model
 
+
 # Prediction function
 def predict(model, X):
     model.eval()
     predictions = []
     with torch.no_grad():
         X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device)  # Move data to the device
         for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
-            batch = X_tensor[i:i + 32]  # Predict in batches
+            batch = X_tensor[i : i + 32]  # Predict in batches
             pred = model(batch).squeeze().cpu().numpy()  # Move results back to CPU
             predictions.extend(pred)
-    return np.array(predictions)  # Return predicted values
+    return np.array(predictions)  # Return predicted values
diff --git a/...t/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_randomforest.py b/...t/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_randomforest.py
@@ -1,7 +1,8 @@
+import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import mean_squared_error
-import numpy as np
+
 
 def select(X: pd.DataFrame) -> pd.DataFrame:
     """
@@ -10,6 +11,7 @@ def select(X: pd.DataFrame) -> pd.DataFrame:
     # For now, we assume all features are relevant. This can be expanded to feature selection logic.
     return X
 
+
 def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_valid: pd.Series):
     """
     Define and train the Random Forest model. Merge feature selection into the pipeline.
@@ -32,6 +34,7 @@ def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_vali
 
     return model
 
+
 def predict(model, X):
     """
     Keep feature selection's consistency and make predictions.
@@ -42,4 +45,4 @@ def predict(model, X):
     # Predict using the trained model
     y_pred = model.predict(X_selected)
 
-    return y_pred
+    return y_pred
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_xgboost.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/model/model_xgboost.py
@@ -1,10 +1,12 @@
 import pandas as pd
 import xgboost as xgb
 
+
 def select(X: pd.DataFrame) -> pd.DataFrame:
     # Ignore feature selection logic
     return X
 
+
 def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_valid: pd.DataFrame):
     """Define and train the model. Merge feature_select"""
     X_train = select(X_train)
@@ -24,11 +26,12 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v
 
     return bst
 
+
 def predict(model, X):
     """
     Keep feature select's consistency.
     """
     X = select(X)
     dtest = xgb.DMatrix(X)
     y_pred = model.predict(dtest)
-    return y_pred
+    return y_pred
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/train.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/train.py
@@ -14,18 +14,21 @@
 np.random.seed(SEED)
 DIRNAME = Path(__file__).absolute().resolve().parent
 
+
 def compute_rmse(y_true, y_pred):
     """Compute RMSE for regression."""
     mse = mean_squared_error(y_true, y_pred)
     rmse = np.sqrt(mse)
     return rmse
 
+
 def import_module_from_path(module_name, module_path):
     spec = importlib.util.spec_from_file_location(module_name, module_path)
     module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(module)
     return module
 
+
 # 1) Preprocess the data
 X_train, X_valid, y_train, y_valid, X_test, ids = preprocess_script()
 
@@ -97,4 +100,4 @@ def import_module_from_path(module_name, module_path):
 
 # 8) Submit predictions for the test set
 submission_result = pd.DataFrame({"id": ids, "price": y_test_pred})
-submission_result.to_csv("submission.csv", index=False)
+submission_result.to_csv("submission.csv", index=False)
diff --git a/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_nn.py b/rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_nn.py
@@ -1,12 +1,13 @@
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-import numpy as np
 
 # Check if a GPU is available
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 # Restored three-layer model structure
 class HybridFeatureInteractionModel(nn.Module):
@@ -26,6 +27,7 @@ def forward(self, x):
         x = torch.sigmoid(self.fc3(x))
         return x
 
+
 # Training function
 def fit(X_train, y_train, X_valid, y_valid):
     num_features = X_train.shape[1]
@@ -34,10 +36,12 @@ def fit(X_train, y_train, X_valid, y_valid):
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 
     # Convert to TensorDataset and create DataLoader
-    train_dataset = TensorDataset(torch.tensor(X_train.to_numpy(), dtype=torch.float32),
-                                  torch.tensor(y_train.reshape(-1), dtype=torch.float32))
-    valid_dataset = TensorDataset(torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
-                                  torch.tensor(y_valid.reshape(-1), dtype=torch.float32))
+    train_dataset = TensorDataset(
+        torch.tensor(X_train.to_numpy(), dtype=torch.float32), torch.tensor(y_train.reshape(-1), dtype=torch.float32)
+    )
+    valid_dataset = TensorDataset(
+        torch.tensor(X_valid.to_numpy(), dtype=torch.float32), torch.tensor(y_valid.reshape(-1), dtype=torch.float32)
+    )
     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
     valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
 
@@ -58,14 +62,15 @@ def fit(X_train, y_train, X_valid, y_valid):
 
     return model
 
+
 # Prediction function
 def predict(model, X):
     model.eval()
     predictions = []
     with torch.no_grad():
         X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device)  # Move data to the device
         for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
-            batch = X_tensor[i:i + 32]  # Predict in batches
+            batch = X_tensor[i : i + 32]  # Predict in batches
             pred = model(batch).squeeze().cpu().numpy()  # Move results back to CPU
             predictions.extend(pred)
-    return np.array(predictions)  # Return boolean predictions
+    return np.array(predictions)  # Return boolean predictions