diff --git a/recognition/LICENSE b/recognition/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/recognition/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/recognition/siamese_richard_chantra/README.MD b/recognition/siamese_richard_chantra/README.MD new file mode 100644 index 0000000000..de17c0258b --- /dev/null +++ b/recognition/siamese_richard_chantra/README.MD @@ -0,0 +1,193 @@ +# Classification of Melanoma using the ISIC 2020 Kaggle Challenge Dataset with Siamese Networks + +Author: Richard Chantra +Student Number: s43032053 + +## Problem Statement + +Melanomas are responsible for 75% of skin cancer deaths, with an estimated 7,000 annual fatalities. This project aims to assist dermatologists by developing a computer vision system to assist in classifying melanomas. We utilize the International Skin Imaging Collaboration (ISIC) 2020 Dataset, the largest publicly available collection of dermatologically-QC skin lesions, with the goal of achieving 0.8 accuracy on the test set. + +## Structure of Dataset + +The dataset comes from the ISIC 2020 Challenge and presents a significant class imbalance challenge: + +- Total Images: 33,126 + - Benign (non-melanoma): 98% (32,626 images) + - Malignant (melanoma): 2% (584 images) +- Image Format: 256x256 pixel JPG files +- Source: https://www.kaggle.com/datasets/nischaydnk/isic-2020-jpg-256x256-resized/data + +## Preprocessing + +Our preprocessing pipeline addresses several key challenges: + +**Adjusting Class Imbalance:** +The extreme 98:2 ratio of benign to malignant cases required careful handling. We implemented an oversampling strategy targeting a 67:33 (benign:malignant) distribution in the training set. This ratio was chosen after extensive experimentation, as it provided optimal balance between benign case accuracy and melanoma detection sensitivity. + +**Data Augmentation:** +To improve model robustness, we implemented a controlled augmentation strategy: +- Random horizontal and vertical flips +- Random rotations between 0-360 degrees +- Augmentations applied with 30% probability to maintain dataset characteristics +- Ratio of transformations: horizontal flips : vertical flips : rotations = 1:1:2 + +**Image Normalization:** +Images are normalized using ResNet50's pretrained requirements: +- Mean values: [0.485, 0.456, 0.406] +- Standard deviation: [0.229, 0.224, 0.225] +This normalization ensures optimal feature extraction from the pretrained network. + +**Dataset Handling:** +- 80:20 train-test split for model evaluation +- Siamese pair creation with 50% similar and 50% dissimilar pairs +- Careful control to prevent self-pairing of images + +## Architecture + +A Siamese network was used with ResNet50 to learn features from melanoma images. ResNet50 creates 2048-length vectors from each image and these embeddings help identify melanomas. + +The Siamese setup compares images in pairs using two ResNet50s that share the same weights. We used contrastive loss because we only need to separate two classes: benign and malignant. This was simpler than triplet loss which would add unnecessary complexity. + +![Siamese Network Architecture](assets/siamese_architecture.png) + +The MLP classifier takes these features and makes the final decision. It reduces the 2048 features through three layers (128→64→32→1). We added high dropout rates (0.7, 0.5) because we had few malignant samples and needed to prevent overfitting. + +## Training Observations + +Key insights from the training process: + +1. Class imbalance significantly impacts model performance: + - Initial experiments showed bias toward benign prediction + - 67:33 ratio provided best balance of sensitivity and specificity + +2. Model Stability: + - Embeddings showed overfitting beyond 8 epochs + - Addressed through learning rate adjustment and weight decay + - Upsampling outperformed downsampling in maintaining data characteristics + +3. Class Distribution Effects: + - 50:50 split showed high malignant recall but poor benign performance + - Final 67:33 ratio achieved better overall balance + +## Results + + +### Performance Metrics +- Overall Accuracy: 94% +- Malignant Detection Rate: 32% +- Benign Accuracy: 95% +- ROC-AUC Score: 0.792 + +### Detailed Classification Performance +``` + Precision Recall F1-score Support +Benign 0.99 0.95 0.97 6509 +Malignant 0.10 0.32 0.15 117 +``` + +### Confusion Matrix +``` +Predicted: Benign Malignant +Actual Benign: 6162 347 +Actual Malignant: 79 38 +``` + +### Training and Evaluation Plots + +Embedding Distribution +This plot shows the distribution of embedding distances. There is a clear separation between similar and dissimilar pairs. + +t-SNE Visualization +This is a t-SNE visualization of learned embeddings and shows the ability of the Siamese Network to create useful clusters. + +MLP Loss +This is a plot of the MLP Classifier training loss and shows consistent convergence. + +Siamese Loss +This is a plot of the Siamese Network training loss and it demonstrates stable learning. + +ROC Curve +This is the ROC curve with AUC = 0.792. This indicates good discriminative ability. + +### Discussion +The model achieved mixed results across different metrics. The overall accuracy was 94% but only 32% of melanomas were detected. The benign detection rate was strong at 95% accuracy. + +Looking at the confusion matrix: +6162 Benign were correctly identified +347 Benign were mistakenly flagged as malignant +79 Malignant were missed +38 Malignant were caught + +The training graphs show steady improvement. Siamese network loss dropped from 0.23 to 0.08 over 15 epochs, while the MLP classifier stabilized at 0.16 loss after 10 epochs. +The t-SNE visualization displays clear grouping of similar cases, though some overlap exists between benign and malignant clusters. The ROC curve analysis produced an AUC score of 0.792, indicating decent separation between classes despite the severe data imbalance. + +## Conclusions + +The results highlight a key issue in melanoma detection that getting high overall accuracy doesn't mean the system works well enough for practical use. The results suggests the model can spot general patterns separating benign from malignant cases but struggles obtaining a high accuracy. More melanoma samples and targeted architectural changes could improve detection rates. + +## Instructions + +1. **Data Preparation:** + - Download the ISIC 2020 dataset from Kaggle + - Extract images to a designated folder + - Ensure CSV metadata file is present + +2. **Environment Setup:** + - Install Python 3.8 or higher + - Install required dependencies + - Set up appropriate paths in configuration + +3. **Model Training:** + - `dataset.py` only needs to be run to get an overview of the data otherwise all data preparation happens in `train.py` + - Run `train.py` with any additional parameters + - Monitor training progress (Using the current set up should take around 30 minutes using a NVIDIA L40s) + - Review generated metrics + +4. **Making Predictions:** + - Ensure images are in a designated directory + - Run `predict.py` with any additional parameters + - Review classification results + +## Files + +- `modules.py`: Defines Siamese network, MLP classifier, loss functions, and evaluation +- `dataset.py`: Manages data loading, augmentation, and data balancing +- `train.py`: Trains the Siamese Network and MLP classifier +- `predict.py`: Performs predictions and evaluation on new image data +- `README.md`: Project documentation + + +## Dependencies +```markdown +matplotlib==3.8.2 +numpy==2.1.2 +pandas==2.2.3 +Pillow==11.0.0 +scikit_learn==1.3.2 +seaborn==0.13.2 +torch==2.2.1+cu121 +torchvision==0.17.1+cu121 +tqdm==4.66.5 +``` + +## References + +1. Becoming Human. (n.d.). *Siamese networks: Algorithm, applications and PyTorch implementation*. Retrieved from https://becominghuman.ai/siamese-networks-algorithm-applications-and-pytorch-implementation-4ffa3304c18 + +2. Song, T. (n.d.). *PyTorch implementation of Siamese network*. Retrieved from https://tianyusong.com/projects/pytorch-implementation%E2%80%8B-siamese-network/ + +3. Challenge Enthusiast. (n.d.). *Training a Siamese model with a triplet loss function on MNIST dataset using PyTorch*. Retrieved from https://challengeenthusiast.com/training-a-siamese-model-with-a-triplet-loss-function-on-mnist-dataset-using-pytorch-225908e59bda + +4. Analytics Vidhya. (n.d.). *A friendly introduction to Siamese networks*. Retrieved from https://medium.com/analytics-vidhya/a-friendly-introduction-to-siamese-networks-283f31bf38cd + +5. Hackernoon. (n.d.). *One-shot learning with Siamese networks in PyTorch*. Retrieved from https://hackernoon.com/one-shot-learning-with-siamese-networks-in-pytorch-8ddaab10340e?source=post_page-----283f31bf38cd-------------------------------- + +6. PyTorch. (n.d.). *Siamese network main code example*. GitHub. Retrieved from https://github.com/pytorch/examples/blob/main/siamese_network/main.py + +7. Analytics India Magazine. (n.d.). *A beginner's guide to Scikit-learn’s MLPClassifier*. Retrieved from https://analyticsindiamag.com/ai-mysteries/a-beginners-guide-to-scikit-learns-mlpclassifier/ + +8. GeeksforGeeks. (n.d.). *How to normalize images in PyTorch*. Retrieved from https://www.geeksforgeeks.org/how-to-normalize-images-in-pytorch/ + +9. Abdallah, A. (2022). *Oversampling for better machine learning with imbalanced data*. Medium. Retrieved from https://medium.com/@abdallahashraf90x/oversampling-for-better-machine-learning-with-imbalanced-data-68f9b5ac2696 + +10. Metaor AI. (2023). *Solving the class imbalance problem*. Medium. Retrieved from https://medium.com/metaor-artificial-intelligence/solving-the-class-imbalance-problem-58cb926b5a0f \ No newline at end of file diff --git a/recognition/siamese_richard_chantra/assets/confusion_matrix.png b/recognition/siamese_richard_chantra/assets/confusion_matrix.png new file mode 100644 index 0000000000..4787fe3efc Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/confusion_matrix.png differ diff --git a/recognition/siamese_richard_chantra/assets/embeddings_distribution.png b/recognition/siamese_richard_chantra/assets/embeddings_distribution.png new file mode 100644 index 0000000000..ea86aa38e3 Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/embeddings_distribution.png differ diff --git a/recognition/siamese_richard_chantra/assets/embeddings_tsne.png b/recognition/siamese_richard_chantra/assets/embeddings_tsne.png new file mode 100644 index 0000000000..d76f38acbb Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/embeddings_tsne.png differ diff --git a/recognition/siamese_richard_chantra/assets/mlp_loss.png b/recognition/siamese_richard_chantra/assets/mlp_loss.png new file mode 100644 index 0000000000..3c775bd3c5 Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/mlp_loss.png differ diff --git a/recognition/siamese_richard_chantra/assets/roc_curve.png b/recognition/siamese_richard_chantra/assets/roc_curve.png new file mode 100644 index 0000000000..3d891fe4ff Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/roc_curve.png differ diff --git a/recognition/siamese_richard_chantra/assets/siamese_architecture.png b/recognition/siamese_richard_chantra/assets/siamese_architecture.png new file mode 100644 index 0000000000..05c71ebbbe Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/siamese_architecture.png differ diff --git a/recognition/siamese_richard_chantra/assets/siamese_loss.png b/recognition/siamese_richard_chantra/assets/siamese_loss.png new file mode 100644 index 0000000000..e6698c092d Binary files /dev/null and b/recognition/siamese_richard_chantra/assets/siamese_loss.png differ diff --git a/recognition/siamese_richard_chantra/dataset.py b/recognition/siamese_richard_chantra/dataset.py new file mode 100644 index 0000000000..45642767eb --- /dev/null +++ b/recognition/siamese_richard_chantra/dataset.py @@ -0,0 +1,233 @@ +""" +- Manages and preprocesses melanoma dataset for model training +- Includes data loading, augmentation, and DataLoader generation + +@author: richardchantra +@student_number: 43032053 +""" + +import pandas as pd +from PIL import Image +import torch +from torch.utils.data import DataLoader, Dataset +from sklearn.model_selection import train_test_split +import torchvision.transforms as transforms +import numpy as np +import argparse + +class DataManager: + """ + Managing the data flows and processing for the ISIC-2020 dataset + """ + def __init__(self, csv_path, img_dir): + self.csv_path = csv_path + self.img_dir = img_dir + self.data = None + self.train_loader = None + self.test_loader = None + + def load_data(self): + """ + Loading metadata from the CSV file + """ + self.data = pd.read_csv(self.csv_path) + + def balance_dataset(self, data, data_augmentation='oversampling', target_ratio=None): + """ + Balance dataset using specified sampling strategy + target_ratio is the ratio of benign desired in the balanced dataset + """ + if data_augmentation is None: + return data + + # Separate majority and minority classes + malignant = data[data['target'] == 1] + benign = data[data['target'] == 0] + + if data_augmentation == 'oversampling': + n_samples = len(benign) + malignant_oversampled = malignant.sample(n=n_samples, replace=True, random_state=42) + balanced_data = pd.concat([benign, malignant_oversampled]) + elif data_augmentation == 'undersampling': + n_samples = len(malignant) + benign_undersampled = benign.sample(n=n_samples, random_state=42) + balanced_data = pd.concat([malignant, benign_undersampled]) + elif data_augmentation == 'ratio' and target_ratio is not None: + # Calculate target numbers for the specified ratio + total_samples = len(data) + target_benign_samples = int(total_samples * target_ratio) + target_malignant_samples = total_samples - target_benign_samples + + # Downsample benign or keep to maintain target + if len(benign) > target_benign_samples: + benign_sampled = benign.sample(n=target_benign_samples, random_state=42) + else: + benign_sampled = benign + + # Oversample malignant to reach target + malignant_oversampled = malignant.sample(n=target_malignant_samples, replace=True, random_state=42) + balanced_data = pd.concat([benign_sampled, malignant_oversampled]) + else: + raise ValueError(f"Invalid parameter for: data_augmentation or target_ratio") + + return balanced_data.sample(frac=1, random_state=42).reset_index(drop=True) + + def split_data(self): + """ + Split the data into training and testing sets and balance + """ + # Normal train test split + train_data, test_data = train_test_split(self.data, test_size=0.2, random_state=42, stratify=self.data['target']) + # Balance dataset based on oversampling, undersampling or oversampling using a ratio + balanced_train_data = self.balance_dataset(train_data, data_augmentation='ratio', target_ratio=0.67) + + return balanced_train_data, test_data + + def create_dataloaders(self, batch_size=256): + """ + Creating torch DataLoader objects for training and testing + """ + train_data, test_data = self.split_data() + + # Create DataLoader + train_dataset = SiameseDataset(train_data, self.img_dir) + test_dataset = SiameseDataset(test_data, self.img_dir) + + self.train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=True, + num_workers=4 + ) + self.test_loader = DataLoader( + test_dataset, + batch_size=batch_size, + shuffle=False, + pin_memory=True, + num_workers=4 + ) + + def print_statistics(self): + """ + Print dataset statistics before and after balancing + """ + # Original dataset statistics + class_distribution = self.data['target'].value_counts() + print(f"Original dataset statistics:\n" + f"Total images: {len(self.data)}\n" + f"Classes distribution:\n{class_distribution}\n") + + # Split and display training/testing data statistics + train_data, test_data = self.split_data() + train_distribution = train_data['target'].value_counts() + test_distribution = test_data['target'].value_counts() + print(f"After balancing training data:\n" + f"Training set distribution:\n{train_distribution}\n" + f"Test set distribution:\n{test_distribution}\n" + f"\nNote: 0 = benign, 1 = malignant") + +class SiameseDataset(Dataset): + """ + Dataset for Siamese Network training and melanoma classification + """ + def __init__(self, data, img_dir): + self.data = data + self.img_dir = img_dir + self.diagnosis_labels = data['target'].values + self.image_ids = data['isic_id'].values + + # resize and normalize + self.transform = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) +]) + def __len__(self): + """ + Returns the length of the dataset + """ + return len(self.data) + + def __getitem__(self, idx): + """ + Takes an index and returns a pair of images with their labels + """ + # Get the first image and diagnosis label + img1_id = self.image_ids[idx] + img1_diagnosis = self.diagnosis_labels[idx] + + # Random choice of same-diagnosis or different-diagnosis pair + should_get_same_class = np.random.random() > 0.5 + + if should_get_same_class: + # Get another image with same diagnosis label + same_class_indices = np.where(self.diagnosis_labels == img1_diagnosis)[0] + second_idx = np.random.choice(same_class_indices) + while second_idx == idx: # Don't pick same pair + second_idx = np.random.choice(same_class_indices) + img2_id = self.image_ids[second_idx] + similarity_label = torch.tensor(0.0) # 0 = similar pair + else: + # Get an image with different diagnosis + other_class_indices = np.where(self.diagnosis_labels != img1_diagnosis)[0] + second_idx = np.random.choice(other_class_indices) + img2_id = self.image_ids[second_idx] + similarity_label = torch.tensor(1.0) # 1 = dissimilar pair + + # Get second image's diagnosis + img2_diagnosis = self.diagnosis_labels[second_idx] + + # Load and preprocess images + img1 = self.load_and_transform(img1_id) + img2 = self.load_and_transform(img2_id) + + return { + 'img1': img1, + 'img2': img2, + 'similarity_label': similarity_label, + 'diagnosis1': torch.tensor(img1_diagnosis, dtype=torch.float32), + 'diagnosis2': torch.tensor(img2_diagnosis, dtype=torch.float32) + } + + def load_and_transform(self, image_id, threshold=0.7): + """ + Load image and apply random augmentations if over theshold + """ + img_path = f'{self.img_dir}{image_id}.jpg' + image = Image.open(img_path).convert('RGB') + + # 30% chance of augmentation + if np.random.random() > threshold: + # List of possible augmentations: equal chance of a flip and a rotation + augmentations = [ + transforms.functional.hflip, + transforms.functional.vflip, + lambda img: transforms.functional.rotate(img, np.random.uniform(0, 360)), + lambda img: transforms.functional.rotate(img, np.random.uniform(0, 360)) + ] + + # Random augmentation of image + aug_func = np.random.choice(augmentations) + image = aug_func(image) + + # Apply standard preprocessing + return self.transform(image) + +if __name__ == "__main__": + # Argument Parsing + parser = argparse.ArgumentParser(description="Dataset Manager for Melanoma Classification") + parser.add_argument('--csv_path', type=str, default='archive/train-metadata.csv', + help='Path to the CSV metadata file') + parser.add_argument('--img_dir', type=str, default='archive/train-image/image/', + help='Directory path to the image files') + parser.add_argument('--batch_size', type=int, default=256, + help='Batch size for DataLoader') + args = parser.parse_args() + + # Initialize DataManager with arguments + data_manager = DataManager(args.csv_path, args.img_dir) + data_manager.load_data() + data_manager.create_dataloaders(batch_size=args.batch_size) + data_manager.print_statistics() \ No newline at end of file diff --git a/recognition/siamese_richard_chantra/modules.py b/recognition/siamese_richard_chantra/modules.py new file mode 100644 index 0000000000..81b64d6c6e --- /dev/null +++ b/recognition/siamese_richard_chantra/modules.py @@ -0,0 +1,314 @@ +""" +- The Siamese Network, MLP Classifier and Contrastive Loss are defined for use in train.py and predict.py +- The Predict class is defined for prediction using a saved model in predict.py +- The Evaluation class is defined for evaluating the performance post training in train.py + +@author: richardchantra +@student_number: 43032053 +""" + +import os +import torch +import torch.nn as nn +import torchvision.models as models +from torchvision.models import ResNet50_Weights +import torchvision.transforms as transforms +from tqdm import tqdm +import numpy as np +import matplotlib.pyplot as plt +from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix +import seaborn as sns +from PIL import Image + +class SiameseNetwork(nn.Module): + """ + Siamese Network for learning image embeddings of benign and malignant melanomas. + """ + def __init__(self): + super(SiameseNetwork, self).__init__() + + # ResNet50 Feature Extractor + resnet = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) + self.features = nn.Sequential(*list(resnet.children())[:-1]) + + # Fully Connected Layer + self.fc = nn.Sequential( + nn.Linear(2048, 512), + nn.ReLU(), + nn.Dropout(0.3), + nn.Linear(512, 256), + nn.ReLU(), + nn.Linear(256, 128) + ) + + def forward(self, x1, x2): + """ + Forward pass to compute embeddings for a pair of images + """ + # Get embeddings for both images + out1 = self.get_embedding(x1) + out2 = self.get_embedding(x2) + return out1, out2 + + def get_embedding(self, x): + """ + Computing embeddings for a single image + """ + features = self.features(x) + features = features.view(features.size(0), -1) + return self.fc(features) + + def contrastive_loss(self, output1, output2, label, margin=1.0): + """ + Contrastive loss for Siamese Network training + """ + # Calculate euclidean distance + euclidean_distance = torch.sqrt(torch.sum((output1 - output2) ** 2, dim=1) + 1e-6) + + # Calculate contrastive loss + loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) + + label * torch.pow(torch.clamp(margin - euclidean_distance, min=0.0), 2)) + + return loss + +class MLPClassifier(nn.Module): + """ + MLP Classifier using Siamese Network embeddings to predict melanoma + """ + def __init__(self, embedding_dim=128): + super(MLPClassifier, self).__init__() + self.classifier = nn.Sequential( + nn.Linear(embedding_dim, 64), + nn.ReLU(), + nn.Dropout(0.7), + nn.Linear(64, 32), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(32, 1), + nn.Sigmoid() + ) + + def forward(self, embedding): + """ + Input: embedding from Siamese network + Output: probability of being malignant (0 = benign, 1 = malignant) + """ + return self.classifier(embedding) + + + +class Predict: + """ + Handling prediction for images and using a trained SiameseNetwork and MLPClassifier + """ + def __init__(self, siamese_network, mlp_classifier, device): + self.siamese_network = siamese_network + self.mlp_classifier = mlp_classifier + self.device = device + + @staticmethod + def load_image(image_path): + """ + Load and preprocess an image for prediction. + """ + # Apply transformations + transform = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + ]) + image = Image.open(image_path).convert('RGB') + return transform(image).unsqueeze(0) + + def predict_image(self, image_path): + """ + Predict whether a new image is benign or malignant. + """ + # Load and preprocess the image + image = self.load_image(image_path).to(self.device) + + # Eval mode + self.siamese_network.eval() + self.mlp_classifier.eval() + + with torch.no_grad(): + # Generate embedding for the image + embedding = self.siamese_network.get_embedding(image) + + # Classify the embedding + output = self.mlp_classifier(embedding) + prediction = (output > 0.5).float() + probability = output.item() + + return prediction.item(), probability + + def batch_predict(self, folder): + """ + Performs predictions on all images within a specified directory + """ + predictions = [] + probabilities = [] + image_names = [] + + for filename in tqdm(os.listdir(folder)[:10], desc="Predicting images"): + if filename.endswith(('.jpg')): + image_path = os.path.join(folder, filename) + prediction, probability = self.predict_image(image_path) + + predictions.append(prediction) + probabilities.append(probability) + image_names.append(filename) + + return predictions, probabilities, image_names + + def evaluate_predictions(self, predictions, probabilities): + """ + Evaluates a set of metrics for a batch of predictions + """ + benign_count = predictions.count(0) + malignant_count = predictions.count(1) + avg_probability = np.mean(probabilities) + + report = classification_report( + predictions, [1]*len(predictions), target_names=['Benign', 'Malignant'] + ) + + return { + 'benign_count': benign_count, + 'malignant_count': malignant_count, + 'avg_probability': avg_probability, + 'classification_report': report + } + + def predict(self, data_loader): + """ + Run predictions on the given data loader + """ + # Set models to evaluation mode + self.siamese_network.eval() + self.mlp_classifier.eval() + + preds = [] + probs = [] + labels = [] + + with torch.no_grad(): + for batch in tqdm(data_loader, desc="Predicting"): + # Move batch to GPU if available + images = batch['img1'].to(self.device) # Get first image from pair + batch_labels = batch['diagnosis1'].to(self.device) # Get true label + + # Get embeddings from Siamese Network + embeddings = self.siamese_network.get_embedding(images) + + # Probability of being malignant + batch_probs = self.mlp_classifier(embeddings).squeeze() + batch_preds = (batch_probs > 0.5).float() + + # Store results using CPU + preds.extend(batch_preds.cpu().numpy()) + probs.extend(batch_probs.cpu().numpy()) + labels.extend(batch_labels.cpu().numpy()) + + # Convert lists to numpy arrays + return np.array(preds), np.array(probs), np.array(labels) + +class Evaluate: + """ + Evaluating the classifier using a number of metrics + """ + def __init__(self, preds, probs, labels): + self.preds = preds + self.probs = probs + self.labels = labels + + def evaluate(self): + """ + Evaluate predictions and return metrics + """ + return { + 'basic_metrics': self._get_basic_metrics(), + 'roc_auc': self._get_roc_auc(), + 'class_report': classification_report(self.labels, self.preds, + target_names=['Benign', 'Malignant']) + } + + def _get_basic_metrics(self): + """ + Calculate accuracy metrics for both classes + """ + accuracy = (self.preds == self.labels).mean() + malignant_mask = self.labels == 1 + benign_mask = self.labels == 0 + + return { + 'accuracy': accuracy.round(2), + 'malignant_accuracy': (self.preds[malignant_mask] == self.labels[malignant_mask]).mean().round(2), + 'benign_accuracy': (self.preds[benign_mask] == self.labels[benign_mask]).mean().round(2) + } + + def _get_roc_auc(self): + """ + Calculate ROC curve and AUC score + """ + fpr, tpr, _ = roc_curve(self.labels, self.probs) + return {'fpr': fpr, 'tpr': tpr, 'auc': auc(fpr, tpr)} + + def plot_results(self): + """ + Generate ROC curve and confusion matrix plots + """ + # Plot ROC curve + roc_data = self._get_roc_auc() + plt.figure(figsize=(10, 8)) + sns.lineplot(x=roc_data['fpr'], y=roc_data['tpr']) + sns.lineplot(x=[0, 1], y=[0, 1], linestyle='--') + plt.xlabel('False Positive Rate') + plt.ylabel('True Positive Rate') + plt.title(f'ROC Curve (AUC = {roc_data["auc"]:.3f})') + plt.savefig('roc_curve.png') + plt.close() + + # Plot confusion matrix + cm = confusion_matrix(self.labels, self.preds) + plt.figure(figsize=(10, 8)) + sns.heatmap(cm, annot=True, fmt='d', cmap='vlag', + xticklabels=['Benign', 'Malignant'], + yticklabels=['Benign', 'Malignant']) + plt.title('Confusion Matrix') + plt.savefig('confusion_matrix.png') + plt.close() + + def save_results(self, filename='evaluation_results.txt'): + """ + Save evaluation metrics to file + """ + results = self.evaluate() + metrics = results['basic_metrics'] + + with open(filename, 'w') as f: + f.write("\nEvaluation Results:\n") + f.write(f"Overall Accuracy: {metrics['accuracy']}\n") + f.write(f"Malignant Accuracy: {metrics['malignant_accuracy']}\n") + f.write(f"Benign Accuracy: {metrics['benign_accuracy']}\n") + f.write(f"ROC-AUC Score: {results['roc_auc']['auc']}\n\n") + f.write(f"Classification Report:\n{results['class_report']}\n") + + @staticmethod + def plot_loss(data, title, xlabel, ylabel, save_path, color='b', marker='o'): + """ + Plots and saves a training loss graph + """ + plt.figure(figsize=(8, 6)) + plt.plot(data, label=title, marker=marker, color=color) + plt.title(title) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(save_path) + plt.close() + print(f"Plot saved at: {save_path}") + \ No newline at end of file diff --git a/recognition/siamese_richard_chantra/predict.py b/recognition/siamese_richard_chantra/predict.py new file mode 100644 index 0000000000..357b091deb --- /dev/null +++ b/recognition/siamese_richard_chantra/predict.py @@ -0,0 +1,53 @@ +""" +- Predicts melanoma classifications for images in a directory +- Provides evaluation metrics for batch predictions + +@author: richardchantra +@student_number: 43032053 +""" + +import torch +from modules import SiameseNetwork, MLPClassifier, Predict +import argparse + +def main(): + # Argument Parsing + parser = argparse.ArgumentParser(description="Predicting melanomas using trained models on a directory of melanoma images") + parser.add_argument('--folder_path', type=str, default='archive/test-image/image/', + help='Directory with new images for prediction') + parser.add_argument('--siamese_model_path', type=str, default='best_siamese_network.pth', + help='Path to the saved Siamese Network model weights') + parser.add_argument('--mlp_model_path', type=str, default='best_mlp_classifier.pth', + help='Path to the saved MLP Classifier model weights') + args = parser.parse_args() + + # Set device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + # Load trained models + siamese_network = SiameseNetwork().to(device) + mlp_classifier = MLPClassifier().to(device) + + # Load the saved model weights + siamese_network_checkpoint = torch.load(args.siamese_model_path) + mlp_classifier_checkpoint = torch.load(args.mlp_model_path) + siamese_network.load_state_dict(siamese_network_checkpoint['model_state_dict']) + mlp_classifier.load_state_dict(mlp_classifier_checkpoint['model_state_dict']) + + # Create Predict instance + predictor = Predict(siamese_network, mlp_classifier, device) + + # Run predictions on the specified directory + predictions, probabilities, image_names = predictor.batch_predict(args.folder_path) + + # Evaluate and display results + results = predictor.evaluate_predictions(predictions, probabilities) + print(f"\nEvaluation Results for Directory: {args.folder_path}") + print(f"Benign Count: {results['benign_count']}") + print(f"Malignant Count: {results['malignant_count']}") + print(f"Average Probability of Malignant Melanoma: {results['avg_probability']:.2f}") + print("\nClassification Report:\n", results['classification_report']) + +if __name__ == "__main__": + main() diff --git a/recognition/siamese_richard_chantra/train.py b/recognition/siamese_richard_chantra/train.py new file mode 100644 index 0000000000..c263352793 --- /dev/null +++ b/recognition/siamese_richard_chantra/train.py @@ -0,0 +1,226 @@ +""" +- Trains a Siamese Network and MLP Classifier on melanoma images +- Evaluates model performance after training + +@author: richardchantra +@student_number: 43032053 +""" + +import os +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +from dataset import DataManager +from modules import SiameseNetwork, MLPClassifier, Evaluate, Predict +import argparse + +def train_siamese_network(siamese_network, optimizer, train_loader, epochs=5, margin=1.6): + """ + Train Siamese Network to learn embeddings from images + """ + siamese_network.train() + best_loss = float('inf') + epoch_losses = [] + + for epoch in range(epochs): + running_loss = 0.0 + + for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Siamese"): + # Get batch data + img1 = batch['img1'].to(device) + img2 = batch['img2'].to(device) + similarity_label = batch['similarity_label'].to(device) + + # Forward pass + optimizer.zero_grad() + embedding1, embedding2 = siamese_network(img1, img2) + + # Calculate loss + loss = siamese_network.contrastive_loss(embedding1, embedding2, similarity_label, margin) + + # Backward pass + loss.backward() + torch.nn.utils.clip_grad_norm_(siamese_network.parameters(), 1.0) + optimizer.step() + + running_loss += loss.item() + + epoch_loss = running_loss / len(train_loader) + epoch_losses.append(epoch_loss) + print(f"Siamese Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}") + + # Save best model + if epoch_loss < best_loss: + best_loss = epoch_loss + torch.save({ + 'epoch': epoch, + 'model_state_dict': siamese_network.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': best_loss, + }, 'best_siamese_network.pth') + return epoch_losses + +def train_mlp_classifier(siamese_network, mlp_classifier, optimizer, train_loader, epochs=5): + """ + Train MLP classifier using Siamese Network embeddings + """ + mlp_classifier.train() + siamese_network.eval() + criterion = nn.BCELoss() + best_acc = 0.0 + epoch_losses = [] + + for epoch in range(epochs): + running_loss = 0.0 + correct = 0 + total = 0 + + for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Classifier"): + # Get image and its label + img1 = batch['img1'].to(device) + diagnosis_label = batch['diagnosis1'].to(device).unsqueeze(1) + + # Get embeddings without gradient tracking + with torch.no_grad(): + embeddings = siamese_network.get_embedding(img1) + + # Forward pass + optimizer.zero_grad() + outputs = mlp_classifier(embeddings) + loss = criterion(outputs, diagnosis_label) + + # Backward pass + loss.backward() + optimizer.step() + + running_loss += loss.item() + + # Calculate accuracy + predicted = (outputs > 0.5).float() + total += diagnosis_label.size(0) + correct += (predicted == diagnosis_label).sum().item() + + epoch_loss = running_loss / len(train_loader) + epoch_losses.append(epoch_loss) + accuracy = 100 * correct / total + print(f"Classifier Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%") + + # Save best model + if accuracy > best_acc: + best_acc = accuracy + torch.save({ + 'epoch': epoch, + 'model_state_dict': mlp_classifier.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'accuracy': best_acc, + }, 'best_mlp_classifier.pth') + return epoch_losses + +def main(): + # Argument Parsing + parser = argparse.ArgumentParser(description="Training a Siamese Network and MLP Classifier on melanoma images") + parser.add_argument('--csv_path', type=str, default='archive/train-metadata.csv', + help='Path to the CSV metadata file') + parser.add_argument('--img_dir', type=str, default='archive/train-image/image/', + help='Directory path to the image files') + parser.add_argument('--batch_size', type=int, default=256, + help='Batch size for DataLoader') + parser.add_argument('--epochs_siamese', type=int, default=16, + help='Number of epochs for training the Siamese Network') + parser.add_argument('--epochs_mlp', type=int, default=12, + help='Number of epochs for training the MLP Classifier') + parser.add_argument('--save_dir', type=str, default="plots", + help='Directory to save training plots') + args = parser.parse_args() + + # Set device + global device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + # Setup data + data_manager = DataManager(args.csv_path, args.img_dir) + data_manager.load_data() + data_manager.create_dataloaders(batch_size=args.batch_size) + train_loader = data_manager.train_loader + test_loader = data_manager.test_loader + + # Initialize models + siamese_network = SiameseNetwork().to(device) + mlp_classifier = MLPClassifier().to(device) + + # Initialize optimizers + optimizer_siamese = optim.Adam( + siamese_network.parameters(), + lr=0.0004, + weight_decay=5e-5 + ) + optimizer_mlp = optim.Adam( + mlp_classifier.parameters(), + lr=0.001, + weight_decay=1e-4 + ) + + # Train the Siamese Network + print("Training Siamese Network to learn embeddings from images:") + siamese_losses = train_siamese_network( + siamese_network, + optimizer_siamese, + train_loader, + epochs=args.epochs_siamese + ) + + # Train the MLP classifier + print("\nTraining MLPClassifier using learned embeddings:") + mlp_losses = train_mlp_classifier( + siamese_network, + mlp_classifier, + optimizer_mlp, + train_loader, + epochs=args.epochs_mlp + ) + + # Plot and save training losses + save_dir = "plots" + os.makedirs(save_dir, exist_ok=True) + + Evaluate.plot_loss( + siamese_losses, + title="Siamese Network Training Loss per Epoch", + xlabel="Epoch", + ylabel="Loss", + save_path=os.path.join(save_dir, "siamese_network_loss.png"), + color='b', + marker='o' + ) + Evaluate.plot_loss( + mlp_losses, + title="MLP Classifier Training Loss per Epoch", + xlabel="Epoch", + ylabel="Loss", + save_path=os.path.join(save_dir, "mlp_classifier_loss.png"), + color='g', + marker='s' + ) + + # Evaluate the model after training + print("\nEvaluating the model on test data:") + predictor = Predict(siamese_network, mlp_classifier, device) + preds, probs, labels = predictor.predict(test_loader) + + evaluator = Evaluate(preds, probs, labels) + results = evaluator.evaluate() + + print("\nEvaluation Results:\n") + print(f"Overall Accuracy: {results['basic_metrics']['accuracy']}") + print(f"Malignant Accuracy: {results['basic_metrics']['malignant_accuracy']}") + print(f"ROC-AUC Score: {results['roc_auc']['auc']}\n") + print(results['class_report']) + + # Optionally save and plot evaluation results + evaluator.plot_results() + evaluator.save_results() + +if __name__ == "__main__": + main()