Skip to content

Commit

Permalink
ci issues
Browse files Browse the repository at this point in the history
  • Loading branch information
TPLin22 committed Sep 22, 2024
1 parent 4bbcc4b commit fe6b4c3
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import numpy as np

# Check if a GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Restored three-layer model structure
class HybridFeatureInteractionModel(nn.Module):
Expand All @@ -26,6 +27,7 @@ def forward(self, x):
x = torch.sigmoid(self.fc3(x))
return x


# Training function
def fit(X_train, y_train, X_valid, y_valid):
num_features = X_train.shape[1]
Expand All @@ -34,10 +36,12 @@ def fit(X_train, y_train, X_valid, y_valid):
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Convert to TensorDataset and create DataLoader
train_dataset = TensorDataset(torch.tensor(X_train.to_numpy(), dtype=torch.float32),
torch.tensor(y_train.reshape(-1), dtype=torch.float32))
valid_dataset = TensorDataset(torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
torch.tensor(y_valid.reshape(-1), dtype=torch.float32))
train_dataset = TensorDataset(
torch.tensor(X_train.to_numpy(), dtype=torch.float32), torch.tensor(y_train.reshape(-1), dtype=torch.float32)
)
valid_dataset = TensorDataset(
torch.tensor(X_valid.to_numpy(), dtype=torch.float32), torch.tensor(y_valid.reshape(-1), dtype=torch.float32)
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

Expand All @@ -58,14 +62,15 @@ def fit(X_train, y_train, X_valid, y_valid):

return model


# Prediction function
def predict(model, X):
model.eval()
predictions = []
with torch.no_grad():
X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device) # Move data to the device
for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
batch = X_tensor[i:i + 32] # Predict in batches
batch = X_tensor[i : i + 32] # Predict in batches
pred = model(batch).squeeze().cpu().numpy() # Move results back to CPU
predictions.extend(pred)
return np.array(predictions) # Return boolean predictions
return np.array(predictions) # Return boolean predictions
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder


def prepreprocess():
"""
This method loads the data, drops the unnecessary columns, and splits it into train and validation sets.
Expand All @@ -22,6 +24,7 @@ def prepreprocess():

return X_train, X_valid, y_train, y_valid


def preprocess_fit(X_train: pd.DataFrame):
"""
Fits the preprocessor on the training data and returns the fitted preprocessor.
Expand Down Expand Up @@ -53,6 +56,7 @@ def preprocess_fit(X_train: pd.DataFrame):

return preprocessor


def preprocess_transform(X: pd.DataFrame, preprocessor):
"""
Transforms the given DataFrame using the fitted preprocessor.
Expand All @@ -72,6 +76,7 @@ def preprocess_transform(X: pd.DataFrame, preprocessor):

return X_transformed


def preprocess_script():
"""
This method applies the preprocessing steps to the training, validation, and test datasets.
Expand All @@ -95,9 +100,11 @@ def preprocess_script():
X_valid = preprocess_transform(X_valid, preprocessor)

# Load and preprocess the test data
submission_df = pd.read_csv("/data/userdata/v-haoranpan/RD-Agent/git_ignore_folder/data/playground-series-s4e9/test.csv")
submission_df = pd.read_csv(
"/data/userdata/v-haoranpan/RD-Agent/git_ignore_folder/data/playground-series-s4e9/test.csv"
)
ids = submission_df["id"]
submission_df = submission_df.drop(["id"], axis=1)
X_test = preprocess_transform(submission_df, preprocessor)

return X_train, X_valid, y_train, y_valid, X_test, ids
return X_train, X_valid, y_train, y_valid, X_test, ids
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import numpy as np

# Check if a GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Modified model for regression
class HybridFeatureInteractionModel(nn.Module):
Expand All @@ -26,6 +27,7 @@ def forward(self, x):
x = self.fc3(x) # No activation for regression
return x


# Training function
def fit(X_train, y_train, X_valid, y_valid):
num_features = X_train.shape[1]
Expand All @@ -36,11 +38,11 @@ def fit(X_train, y_train, X_valid, y_valid):
# Convert to TensorDataset and create DataLoader
train_dataset = TensorDataset(
torch.tensor(X_train.to_numpy(), dtype=torch.float32),
torch.tensor(y_train.to_numpy().reshape(-1), dtype=torch.float32) # Convert to NumPy array
torch.tensor(y_train.to_numpy().reshape(-1), dtype=torch.float32), # Convert to NumPy array
)
valid_dataset = TensorDataset(
torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
torch.tensor(y_valid.to_numpy().reshape(-1), dtype=torch.float32) # Convert to NumPy array
torch.tensor(y_valid.to_numpy().reshape(-1), dtype=torch.float32), # Convert to NumPy array
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
Expand All @@ -62,14 +64,15 @@ def fit(X_train, y_train, X_valid, y_valid):

return model


# Prediction function
def predict(model, X):
model.eval()
predictions = []
with torch.no_grad():
X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device) # Move data to the device
for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
batch = X_tensor[i:i + 32] # Predict in batches
batch = X_tensor[i : i + 32] # Predict in batches
pred = model(batch).squeeze().cpu().numpy() # Move results back to CPU
predictions.extend(pred)
return np.array(predictions) # Return predicted values
return np.array(predictions) # Return predicted values
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np


def select(X: pd.DataFrame) -> pd.DataFrame:
"""
Expand All @@ -10,6 +11,7 @@ def select(X: pd.DataFrame) -> pd.DataFrame:
# For now, we assume all features are relevant. This can be expanded to feature selection logic.
return X


def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_valid: pd.Series):
"""
Define and train the Random Forest model. Merge feature selection into the pipeline.
Expand All @@ -32,6 +34,7 @@ def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame, y_vali

return model


def predict(model, X):
"""
Keep feature selection's consistency and make predictions.
Expand All @@ -42,4 +45,4 @@ def predict(model, X):
# Predict using the trained model
y_pred = model.predict(X_selected)

return y_pred
return y_pred
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pandas as pd
import xgboost as xgb


def select(X: pd.DataFrame) -> pd.DataFrame:
# Ignore feature selection logic
return X


def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_valid: pd.DataFrame):
"""Define and train the model. Merge feature_select"""
X_train = select(X_train)
Expand All @@ -24,11 +26,12 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v

return bst


def predict(model, X):
"""
Keep feature select's consistency.
"""
X = select(X)
dtest = xgb.DMatrix(X)
y_pred = model.predict(dtest)
return y_pred
return y_pred
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@
np.random.seed(SEED)
DIRNAME = Path(__file__).absolute().resolve().parent


def compute_rmse(y_true, y_pred):
"""Compute RMSE for regression."""
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
return rmse


def import_module_from_path(module_name, module_path):
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module


# 1) Preprocess the data
X_train, X_valid, y_train, y_valid, X_test, ids = preprocess_script()

Expand Down Expand Up @@ -97,4 +100,4 @@ def import_module_from_path(module_name, module_path):

# 8) Submit predictions for the test set
submission_result = pd.DataFrame({"id": ids, "price": y_test_pred})
submission_result.to_csv("submission.csv", index=False)
submission_result.to_csv("submission.csv", index=False)
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import numpy as np

# Check if a GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Restored three-layer model structure
class HybridFeatureInteractionModel(nn.Module):
Expand All @@ -26,6 +27,7 @@ def forward(self, x):
x = torch.sigmoid(self.fc3(x))
return x


# Training function
def fit(X_train, y_train, X_valid, y_valid):
num_features = X_train.shape[1]
Expand All @@ -34,10 +36,12 @@ def fit(X_train, y_train, X_valid, y_valid):
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Convert to TensorDataset and create DataLoader
train_dataset = TensorDataset(torch.tensor(X_train.to_numpy(), dtype=torch.float32),
torch.tensor(y_train.reshape(-1), dtype=torch.float32))
valid_dataset = TensorDataset(torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
torch.tensor(y_valid.reshape(-1), dtype=torch.float32))
train_dataset = TensorDataset(
torch.tensor(X_train.to_numpy(), dtype=torch.float32), torch.tensor(y_train.reshape(-1), dtype=torch.float32)
)
valid_dataset = TensorDataset(
torch.tensor(X_valid.to_numpy(), dtype=torch.float32), torch.tensor(y_valid.reshape(-1), dtype=torch.float32)
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

Expand All @@ -58,14 +62,15 @@ def fit(X_train, y_train, X_valid, y_valid):

return model


# Prediction function
def predict(model, X):
model.eval()
predictions = []
with torch.no_grad():
X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device) # Move data to the device
for i in tqdm(range(0, len(X_tensor), 32), desc="Predicting", leave=False):
batch = X_tensor[i:i + 32] # Predict in batches
batch = X_tensor[i : i + 32] # Predict in batches
pred = model(batch).squeeze().cpu().numpy() # Move results back to CPU
predictions.extend(pred)
return np.array(predictions) # Return boolean predictions
return np.array(predictions) # Return boolean predictions

0 comments on commit fe6b4c3

Please sign in to comment.