Skip to content

Commit

Permalink
fix args
Browse files Browse the repository at this point in the history
  • Loading branch information
tompollard committed Mar 26, 2019
1 parent b3dd581 commit 5c5a071
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 133 deletions.
90 changes: 45 additions & 45 deletions neuroner/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,51 +21,51 @@ def parse_arguments(arguments=None):
arguments:
arguments the arguments, optionally given as argument
'''
default_param = neuromodel._get_default_param()
# default_param = neuromodel._get_default_param()

parser = argparse.ArgumentParser(description='''NeuroNER CLI''', formatter_class=RawTextHelpFormatter)

parser.add_argument('--parameters_filepath', required=False, default=os.path.join('.','parameters.ini'), help='The parameters file')
parser.add_argument('--character_embedding_dimension', required=False, default=default_param['character_embedding_dimension'], help='')
parser.add_argument('--character_lstm_hidden_state_dimension', required=False, default=default_param['character_lstm_hidden_state_dimension'], help='')
parser.add_argument('--check_for_digits_replaced_with_zeros', required=False, default=default_param['check_for_digits_replaced_with_zeros'], help='')
parser.add_argument('--check_for_lowercase', required=False, default=default_param['check_for_lowercase'], help='')
parser.add_argument('--dataset_text_folder', required=False, default=default_param['dataset_text_folder'], help='')
parser.add_argument('--debug', required=False, default=default_param['debug'], help='')
parser.add_argument('--dropout_rate', required=False, default=default_param['dropout_rate'], help='')
parser.add_argument('--experiment_name', required=False, default=default_param['experiment_name'], help='')
parser.add_argument('--freeze_token_embeddings', required=False, default=default_param['freeze_token_embeddings'], help='')
parser.add_argument('--gradient_clipping_value', required=False, default=default_param['gradient_clipping_value'], help='')
parser.add_argument('--learning_rate', required=False, default=default_param['learning_rate'], help='')
parser.add_argument('--load_only_pretrained_token_embeddings', required=False, default=default_param['load_only_pretrained_token_embeddings'], help='')
parser.add_argument('--load_all_pretrained_token_embeddings', required=False, default=default_param['load_all_pretrained_token_embeddings'], help='')
parser.add_argument('--main_evaluation_mode', required=False, default=default_param['main_evaluation_mode'], help='')
parser.add_argument('--maximum_number_of_epochs', required=False, default=default_param['maximum_number_of_epochs'], help='')
parser.add_argument('--number_of_cpu_threads', required=False, default=default_param['number_of_cpu_threads'], help='')
parser.add_argument('--number_of_gpus', required=False, default=default_param['number_of_gpus'], help='')
parser.add_argument('--optimizer', required=False, default=default_param['optimizer'], help='')
parser.add_argument('--output_folder', required=False, default=default_param['output_folder'], help='')
parser.add_argument('--patience', required=False, default=default_param['patience'], help='')
parser.add_argument('--plot_format', required=False, default=default_param['plot_format'], help='')
parser.add_argument('--pretrained_model_folder', required=False, default=default_param['pretrained_model_folder'], help='')
parser.add_argument('--reload_character_embeddings', required=False, default=default_param['reload_character_embeddings'], help='')
parser.add_argument('--reload_character_lstm', required=False, default=default_param['reload_character_lstm'], help='')
parser.add_argument('--reload_crf', required=False, default=default_param['reload_crf'], help='')
parser.add_argument('--reload_feedforward', required=False, default=default_param['reload_feedforward'], help='')
parser.add_argument('--reload_token_embeddings', required=False, default=default_param['reload_token_embeddings'], help='')
parser.add_argument('--reload_token_lstm', required=False, default=default_param['reload_token_lstm'], help='')
parser.add_argument('--remap_unknown_tokens_to_unk', required=False, default=default_param['remap_unknown_tokens_to_unk'], help='')
parser.add_argument('--spacylanguage', required=False, default=default_param['spacylanguage'], help='')
parser.add_argument('--tagging_format', required=False, default=default_param['tagging_format'], help='')
parser.add_argument('--token_embedding_dimension', required=False, default=default_param['token_embedding_dimension'], help='')
parser.add_argument('--token_lstm_hidden_state_dimension', required=False, default=default_param['token_lstm_hidden_state_dimension'], help='')
parser.add_argument('--token_pretrained_embedding_filepath', required=False, default=default_param['token_pretrained_embedding_filepath'], help='')
parser.add_argument('--tokenizer', required=False, default=default_param['tokenizer'], help='')
parser.add_argument('--train_model', required=False, default=default_param['train_model'], help='')
parser.add_argument('--use_character_lstm', required=False, default=default_param['use_character_lstm'], help='')
parser.add_argument('--use_crf', required=False, default=default_param['use_crf'], help='')
parser.add_argument('--use_pretrained_model', required=False, default=default_param['use_pretrained_model'], help='')
parser.add_argument('--verbose', required=False, default=default_param['verbose'], help='')
parser.add_argument('--parameters_filepath', required=False, default=None, help='The parameters file')
parser.add_argument('--character_embedding_dimension', required=False, default=None, help='')
parser.add_argument('--character_lstm_hidden_state_dimension', required=False, default=None, help='')
parser.add_argument('--check_for_digits_replaced_with_zeros', required=False, default=None, help='')
parser.add_argument('--check_for_lowercase', required=False, default=None, help='')
parser.add_argument('--dataset_text_folder', required=False, default=None, help='')
parser.add_argument('--debug', required=False, default=None, help='')
parser.add_argument('--dropout_rate', required=False, default=None, help='')
parser.add_argument('--experiment_name', required=False, default=None, help='')
parser.add_argument('--freeze_token_embeddings', required=False, default=None, help='')
parser.add_argument('--gradient_clipping_value', required=False, default=None, help='')
parser.add_argument('--learning_rate', required=False, default=None, help='')
parser.add_argument('--load_only_pretrained_token_embeddings', required=False, default=None, help='')
parser.add_argument('--load_all_pretrained_token_embeddings', required=False, default=None, help='')
parser.add_argument('--main_evaluation_mode', required=False, default=None, help='')
parser.add_argument('--maximum_number_of_epochs', required=False, default=None, help='')
parser.add_argument('--number_of_cpu_threads', required=False, default=None, help='')
parser.add_argument('--number_of_gpus', required=False, default=None, help='')
parser.add_argument('--optimizer', required=False, default=None, help='')
parser.add_argument('--output_folder', required=False, default=None, help='')
parser.add_argument('--patience', required=False, default=None, help='')
parser.add_argument('--plot_format', required=False, default=None, help='')
parser.add_argument('--pretrained_model_folder', required=False, default=None, help='')
parser.add_argument('--reload_character_embeddings', required=False, default=None, help='')
parser.add_argument('--reload_character_lstm', required=False, default=None, help='')
parser.add_argument('--reload_crf', required=False, default=None, help='')
parser.add_argument('--reload_feedforward', required=False, default=None, help='')
parser.add_argument('--reload_token_embeddings', required=False, default=None, help='')
parser.add_argument('--reload_token_lstm', required=False, default=None, help='')
parser.add_argument('--remap_unknown_tokens_to_unk', required=False, default=None, help='')
parser.add_argument('--spacylanguage', required=False, default=None, help='')
parser.add_argument('--tagging_format', required=False, default=None, help='')
parser.add_argument('--token_embedding_dimension', required=False, default=None, help='')
parser.add_argument('--token_lstm_hidden_state_dimension', required=False, default=None, help='')
parser.add_argument('--token_pretrained_embedding_filepath', required=False, default=None, help='')
parser.add_argument('--tokenizer', required=False, default=None, help='')
parser.add_argument('--train_model', required=False, default=None, help='')
parser.add_argument('--use_character_lstm', required=False, default=None, help='')
parser.add_argument('--use_crf', required=False, default=None, help='')
parser.add_argument('--use_pretrained_model', required=False, default=None, help='')
parser.add_argument('--verbose', required=False, default=None, help='')

# load data to local folder
parser.add_argument('--fetch_data', required=False, default='', help='')
Expand All @@ -78,9 +78,9 @@ def parse_arguments(arguments=None):
sys.exit(0)

# http://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary
arguments = vars(arguments)
return arguments
arguments = vars(arguments)

return {k: v for k, v in arguments.items() if v is not None}

def main(argv=sys.argv):
''' NeuroNER main method
Expand Down
113 changes: 26 additions & 87 deletions neuroner/neuromodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,63 +97,6 @@ def _fetch(name,content_type=None):
msg = "{} not found in {} package.".format(name,package_name)
print(msg)

# if pkg_resources.resource_isdir(package_name, resource_path):

# # get list of files
# files = pkg_resources.resource_listdir(package_name, resource_path)
# fileset = {}

# # load data
# for f in files:
# resource_path = '/'.join((content_type,name,f))
# fileset[f] = pkg_resources.resource_string(package_name,
# resource_path)

# # create containing dir
# container_dir = os.path.join('.',content_type,name)

# # write to local dir
# if os.path.isdir(container_dir):
# msg = "Directory '{}' already exists.".format(container_dir)
# print(msg)
# else:
# _make_local_dir(container_dir)
# for f,contents in fileset.items():
# _write_file(os.path.join(container_dir,f),contents)

# else:
# msg = "{} not found in {} package.".format(name,package_name)
# print(msg)

# def _make_local_dir(path):
# """
# Make any required local directories to prepare for downloading.

# Args:
# path (str): name of directory to create
# """
# if os.path.isdir(path):
# msg = "Directory '{}' already exists.".format(path)
# warnings.warn(msg)
# else:
# os.makedirs(path)
# print('Created directory: {}'.format(path))

# def _write_file(filename,content):
# """
# Write to file. Assumes directory already exists.

# Args:
# filename (str): full path to file
# content (obj): data to write to file.
# """
# if os.path.isfile(filename):
# msg = "File '{}' already exists.".format(filename)
# warnings.warn(msg)
# else:
# with open(filename, 'wb') as f:
# f.write(content)

def _get_default_param():
"""
Get the default parameters.
Expand Down Expand Up @@ -255,46 +198,53 @@ def _clean_param_dtypes(param):

def load_parameters(**kwargs):
'''
Load parameters from the ini file if specified, take into account any command
Load parameters from the ini file if specified, take into account any command
line argument, and ensure that each parameter is cast to the correct type.
Command line arguments take precedence over parameters specified in the parameter file.
'''
param = _get_default_param()
param = {}
param_default = _get_default_param()

# use parameter path if provided, otherwise use default
try:
parameters_filepath = kwargs['parameters_filepath']
try:
if kwargs['parameters_filepath']:
parameters_filepath = kwargs['parameters_filepath']
except:
parameters_filepath = param['parameters_filepath']
parameters_filepath = param_default['parameters_filepath']

param_config, param_file_txt = _get_config_param(parameters_filepath)

# Parameter file settings should overwrite default settings
for k,v in param_config.items():
param[k] = v
for k, v in param_config.items():
param[k] = v

# Command line args should overwrite settings in the parameter file
for k,v in kwargs.items():
for k, v in kwargs.items():
param[k] = v

# if loading a pretrained model, set to pretrain hyperparameters
# Any missing args can be set to default
for k, v in param_default.items():
if k not in param:
param[k] = param_default[k]

# if loading a pretrained model, set to pretrain hyperparameters
if param['use_pretrained_model']:

pretrain_path = os.path.join(param['pretrained_model_folder'],
pretrain_path = os.path.join(param['pretrained_model_folder'],
'parameters.ini')

if os.path.isfile(pretrain_path):
pretrain_param, _ = _get_config_param(pretrain_path)
pretrain_list = ['use_character_lstm', 'character_embedding_dimension',
'character_lstm_hidden_state_dimension', 'token_embedding_dimension',

pretrain_list = ['use_character_lstm', 'character_embedding_dimension',
'character_lstm_hidden_state_dimension', 'token_embedding_dimension',
'token_lstm_hidden_state_dimension', 'use_crf']

for name in pretrain_list:
if str(param[name]) != str(pretrain_param[name]):
msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}'
for consistency with the pretrained model""".format(name,
msg = """WARNING: parameter '{0}' was overwritten from '{1}' to '{2}'
for consistency with the pretrained model""".format(name,
param[name], pretrain_param[name])
print(msg)
param[name] = pretrain_param[name]
Expand Down Expand Up @@ -481,8 +431,8 @@ class NeuroNER(object):

prediction_count = 0

def __init__(self,**kwargs):
def __init__(self, **kwargs):

# Set parameters
self.parameters, self.conf_parameters = load_parameters(**kwargs)
self.dataset_filepaths, self.dataset_brat_folders = self._get_valid_dataset_filepaths(self.parameters)
Expand All @@ -491,7 +441,7 @@ def __init__(self,**kwargs):
# Load dataset
self.modeldata = dataset.Dataset(verbose=self.parameters['verbose'], debug=self.parameters['debug'])
token_to_vector = self.modeldata.load_dataset(self.dataset_filepaths, self.parameters)

# Launch session. Automatically choose a device
# if the specified one doesn't exist
session_conf = tf.ConfigProto(
Expand Down Expand Up @@ -533,17 +483,6 @@ def _create_stats_graph_folder(self, parameters):
stats_graph_folder = os.path.join(parameters['output_folder'], model_name)
utils.create_folder_if_not_exists(stats_graph_folder)
return stats_graph_folder, experiment_timestamp

def _load_parameters(self,**kwargs):
"""
Load parameters from the ini file if specified, take into account any command line argument,
and ensure that each parameter is cast to the correct type.
Command line arguments take precedence over parameters specified in the parameter file.
"""
param, param_file_txt = load_parameters(**kwargs)

return param, param_file_txt

def _get_valid_dataset_filepaths(self, parameters, dataset_types=['train', 'valid', 'test', 'deploy']):
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='1.0-dev3',
version='1.0-dev4',

description='NeuroNER',
long_description=long_description,
Expand Down

0 comments on commit 5c5a071

Please sign in to comment.