Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove validate email #442

Merged
merged 3 commits into from
Aug 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion conda_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ dependencies:
- recommonmark

- pip:
- validate_email
- tweepy
- simhash
- wikipedia
Expand Down
3 changes: 0 additions & 3 deletions orangecontrib/text/pubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
from Bio import Entrez
from Bio import Medline
from validate_email import validate_email

try:
from Orange.misc import environ
Expand Down Expand Up @@ -173,8 +172,6 @@ class Pubmed:
MAX_BATCH_SIZE = 1000

def __init__(self, email, progress_callback=None, error_callback=None):
if not validate_email(email):
raise ValueError('{} is not a valid email address.'.format(email))

Entrez.email = email

Expand Down
7 changes: 0 additions & 7 deletions orangecontrib/text/tests/test_pubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,6 @@ def setUp(self):
error_callback=error_callback
)

def test_pubmed_object_creation(self):
self.assertRaises(
ValueError,
Pubmed,
'faulty_email'
)

def test_mesh_headings_to_class(self):
input_headings = [
'heading1 & heading2/heading3,heading4/*heading5',
Expand Down
190 changes: 111 additions & 79 deletions orangecontrib/text/widgets/owpubmed.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import re
from datetime import date

from AnyQt.QtCore import QDate, Qt
from AnyQt.QtWidgets import (QApplication, QComboBox, QDateEdit, QTextEdit,
QFrame, QDialog, QCalendarWidget, QVBoxLayout)
from validate_email import validate_email
QFrame, QDialog, QCalendarWidget, QVBoxLayout,
QFormLayout)

from Orange.widgets import gui
from Orange.widgets.credentials import CredentialManager
from Orange.widgets.settings import Setting
from Orange.widgets.widget import OWWidget, Msg
from orangecontrib.text.corpus import Corpus
Expand All @@ -20,11 +22,71 @@ def _i(name, icon_path='icons'):
return os.path.join(widget_path, icon_path, name)


EMAIL_REGEX = re.compile(r"[^@]+@[^@]+\.[^@]+")


def validate_email(email):
return EMAIL_REGEX.match(email)


class Output:
CORPUS = 'Corpus'


class OWPubmed(OWWidget):
class EmailCredentialsDialog(OWWidget):
name = "Pubmed Email"
want_main_area = False
resizing_enabled = False
email_manager = CredentialManager('Email')
email_input = ''

class Error(OWWidget.Error):
invalid_credentials = Msg('This email is invalid.')

def __init__(self, parent):
super().__init__()
self.parent = parent
self.api = None

form = QFormLayout()
form.setContentsMargins(5, 5, 5, 5)
self.email_edit = gui.lineEdit(
self, self, 'email_input', controlWidth=400)
form.addRow('Email:', self.email_edit)
self.controlArea.layout().addLayout(form)
self.submit_button = gui.button(
self.controlArea, self, "OK", self.accept)

self.load_credentials()

def setVisible(self, visible):
super().setVisible(visible)
self.email_edit.setFocus()

def load_credentials(self):
self.email_edit.setText(self.email_manager.key)

def save_credentials(self):
self.email_manager.key = self.email_input

def check_credentials(self):
if validate_email(self.email_input):
self.save_credentials()
return True
else:
return False

def accept(self, silent=False):
if not silent:
self.Error.invalid_credentials.clear()
valid = self.check_credentials()
if valid:
self.parent.sync_email(self.email_input)
super().accept()
else:
self.Error.invalid_credentials()

name = 'Pubmed'
description = 'Fetch data from Pubmed.'
icon = 'icons/Pubmed.svg'
Expand All @@ -39,7 +101,6 @@ class OWPubmed(OWWidget):
MIN_DATE = date(1800, 1, 1)

# Settings.
recent_emails = Setting([])
author = Setting('')
pub_date_from = Setting('')
pub_date_to = Setting('')
Expand All @@ -54,36 +115,34 @@ class OWPubmed(OWWidget):
includes_abstract = Setting(True)
includes_url = Setting(True)

email = None

class Warning(OWWidget.Warning):
no_query = Msg('Please specify the keywords for this query.')

class Error(OWWidget.Error):
api_error = Msg('API error: {}.')
email_error = Msg('Email not set. Pleas set it with the email button.')

def __init__(self):
super().__init__()

self.output_corpus = None
self.pubmed_api = None
self.progress = None
self.email_is_valid = False
self.record_count = 0
self.download_running = False

# API key
self.email_dlg = self.EmailCredentialsDialog(self)
gui.button(self.controlArea, self, 'Email',
callback=self.email_dlg.exec_,
focusPolicy=Qt.NoFocus)
gui.separator(self.controlArea)

# To hold all the controls. Makes access easier.
self.pubmed_controls = []

h_box = gui.hBox(self.controlArea)
label = gui.label(h_box, self, 'Email:')
label.setMaximumSize(label.sizeHint())
# Drop-down for recent emails.
self.email_combo = QComboBox(h_box)
self.email_combo.setMinimumWidth(150)
self.email_combo.setEditable(True)
self.email_combo.lineEdit().textChanged.connect(self.sync_email)
h_box.layout().addWidget(self.email_combo)
self.email_combo.activated[int].connect(self.select_email)

# RECORD SEARCH
self.search_tabs = gui.tabWidget(self.controlArea)
# --- Regular search ---
Expand Down Expand Up @@ -183,18 +242,18 @@ def __init__(self):

# RECORD RETRIEVAL
# Text includes box.
text_includes_box = gui.widgetBox(self.controlArea,
'Text includes', addSpace=True)
self.authors_checkbox = gui.checkBox(text_includes_box, self,
'includes_authors', 'Authors')
self.title_checkbox = gui.checkBox(text_includes_box, self,
'includes_title', 'Article title')
self.mesh_checkbox = gui.checkBox(text_includes_box, self,
'includes_mesh', 'Mesh headings')
self.abstract_checkbox = gui.checkBox(text_includes_box, self,
'includes_abstract', 'Abstract')
self.url_checkbox = gui.checkBox(text_includes_box, self,
'includes_url', 'URL')
text_includes_box = gui.widgetBox(
self.controlArea, 'Text includes', addSpace=True)
self.authors_checkbox = gui.checkBox(
text_includes_box, self, 'includes_authors', 'Authors')
self.title_checkbox = gui.checkBox(
text_includes_box, self, 'includes_title', 'Article title')
self.mesh_checkbox = gui.checkBox(
text_includes_box, self, 'includes_mesh', 'Mesh headings')
self.abstract_checkbox = gui.checkBox(
text_includes_box, self, 'includes_abstract', 'Abstract')
self.url_checkbox = gui.checkBox(
text_includes_box, self, 'includes_url', 'URL')
self.pubmed_controls.append(self.authors_checkbox)
self.pubmed_controls.append(self.title_checkbox)
self.pubmed_controls.append(self.mesh_checkbox)
Expand Down Expand Up @@ -228,48 +287,29 @@ def __init__(self):
self,
'Number of records retrieved: /')

# Load the most recent emails.
self.set_email_list()

# Load the most recent queries.
self.set_keyword_list()

# Check the email and enable controls accordingly.
if self.recent_emails:
email = self.recent_emails[0]
self.email_is_valid = validate_email(email)

self.enable_controls()

def sync_email(self):
email = self.email_combo.currentText()
self.email_is_valid = validate_email(email)
self.enable_controls()

def enable_controls(self):
# Enable/disable controls accordingly.
for control in self.pubmed_controls:
control.setEnabled(self.email_is_valid)
if self.pubmed_api is None or self.pubmed_api.search_record_count == 0:
self.retrieve_records_button.setEnabled(False)
if not self.email_is_valid:
self.email_combo.setFocus()
def sync_email(self, email):
self.Error.email_error.clear()
self.email = email

def run_search(self):
self.Error.clear()
self.Warning.clear()

# check if email exists
if self.email is None:
self.Error.email_error()
return

self.run_search_button.setEnabled(False)
self.retrieve_records_button.setEnabled(False)

# Add the email to history.
email = self.email_combo.currentText()
if email not in self.recent_emails:
self.recent_emails.insert(0, email)

# Check if the PubMed object is present.
if self.pubmed_api is None:
self.pubmed_api = Pubmed(
email=email,
email=self.email,
progress_callback=self.api_progress_callback,
error_callback=self.api_error_callback,
)
Expand Down Expand Up @@ -309,6 +349,13 @@ def run_search(self):
self.enable_controls()
self.update_search_info()

def enable_controls(self):
# Enable/disable controls accordingly.
self.run_search_button.setEnabled(True)
enabled = self.pubmed_api is not None and \
not self.pubmed_api.search_record_count == 0
self.retrieve_records_button.setEnabled(enabled)

def retrieve_records(self):
self.Warning.clear()
self.Error.clear()
Expand All @@ -318,13 +365,11 @@ def retrieve_records(self):

if self.download_running:
self.download_running = False
self.run_search_button.setEnabled(True)
self.retrieve_records_button.setText('Retrieve records')
self.pubmed_api.stop_retrieving()
return

self.download_running = True
self.run_search_button.setEnabled(False)
self.output_corpus = None # Clear the old records.

# Change the button label.
Expand Down Expand Up @@ -401,20 +446,6 @@ def update_retrieval_info(self):
self.retrieval_info_label.sizeHint()
)

def select_email(self, n):
if n < len(self.recent_emails):
email = self.recent_emails[n]
del self.recent_emails[n]
self.recent_emails.insert(0, email)

if len(self.recent_emails) > 0:
self.set_email_list()

def set_email_list(self):
self.email_combo.clear()
for email in self.recent_emails:
self.email_combo.addItem(email)

def select_keywords(self, n):
if n < len(self.recent_keywords):
keywords = self.recent_keywords[n]
Expand Down Expand Up @@ -454,18 +485,18 @@ def send_report(self):
('Query', terms if terms else None),
('Authors', authors if authors else None),
('Date', 'from {} to {}'.format(self.pub_date_from,
self.pub_date_to)),
('Number of records retrieved', '{}/{}'.format(len(
self.output_corpus) if self.output_corpus else 0,
max_records_count))
self.pub_date_to)),
('Number of records retrieved', '{}/{}'.format(
len(self.output_corpus) if self.output_corpus else 0,
max_records_count))
))
else:
query = self.advanced_query_input.toPlainText()
self.report_items((
('Query', query if query else None),
('Number of records retrieved', '{}/{}'.format(len(
self.output_corpus) if self.output_corpus else 0,
max_records_count))
('Number of records retrieved', '{}/{}'.format(
len(self.output_corpus) if self.output_corpus else 0,
max_records_count))
))


Expand Down Expand Up @@ -498,6 +529,7 @@ def __init__(self, parent, windowTitle='Date picker'):
def set_date(self, date):
self.picked_date = date.toString('yyyy/MM/dd')


if __name__ == '__main__':
app = QApplication([])
widget = OWPubmed()
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ scipy
nltk>=3.0.5 # TweetTokenizer introduces in 3.0.5
scikit-learn
numpy
validate_email
# fixing boto issue in gensim; TODO: remove after while and check if tests passes
gensim>=0.12.3 # LDA's show topics unified in 0.12.3
setuptools-git
Orange3>=3.4.3
Expand Down