Added some initial hps and enlarge the search space. (#901)

* update greedy tuner * update * fix * oracle * fix * use initial hps * update * flake8 * update * update * update * flake8 * update
keras-team · Jan 16, 2020 · b39e814 · b39e814
1 parent e4905f5
commit b39e814
Show file tree

Hide file tree

Showing 14 changed files with 328 additions and 271 deletions.
diff --git a/autokeras/__init__.py b/autokeras/__init__.py
@@ -1,5 +1,4 @@
 from autokeras.auto_model import AutoModel
-from autokeras.const import Constant
 from autokeras.hypermodel.base import Block
 from autokeras.hypermodel.base import Head
 from autokeras.hypermodel.base import HyperBlock

diff --git a/autokeras/auto_model.py b/autokeras/auto_model.py
@@ -1,3 +1,4 @@
+import numpy as np
 import tensorflow as tf
 from tensorflow.python.util import nest
 
@@ -80,6 +81,9 @@ def __init__(self,
         self.inputs = nest.flatten(inputs)
         self.outputs = nest.flatten(outputs)
         self.seed = seed
+        if seed:
+            np.random.seed(seed)
+            tf.random.set_seed(seed)
         # TODO: Support passing a tuner instance.
         if isinstance(tuner, str):
             tuner = tuner_module.get_tuner_class(tuner)

diff --git a/autokeras/const.py b/autokeras/const.py
@@ -1,4 +1,35 @@
-
-class Constant(object):
-    # Text
-    VOCABULARY_SIZE = 20000
+INITIAL_HPS = {
+    'image_classifier': [{
+        'image_block_1/block_type': 'vanilla',
+        'image_block_1/normalize': True,
+        'image_block_1/augment': False,
+        'image_block_1_vanilla/kernel_size': 3,
+        'image_block_1_vanilla/num_blocks': 1,
+        'image_block_1_vanilla/separable': False,
+        'image_block_1_vanilla/dropout_rate': 0.25,
+        'image_block_1_vanilla/filters_0_1': 32,
+        'image_block_1_vanilla/filters_0_2': 64,
+        'spatial_reduction_1/reduction_type': 'flatten',
+        'dense_block_1/num_layers': 1,
+        'dense_block_1/use_batchnorm': False,
+        'dense_block_1/dropout_rate': 0,
+        'dense_block_1/units_0': 128,
+        'classification_head_1/dropout_rate': 0.5,
+        'optimizer': 'adam'
+    }, {
+        'image_block_1/block_type': 'resnet',
+        'image_block_1/normalize': True,
+        'image_block_1/augment': True,
+        'image_block_1_resnet/version': 'v2',
+        'image_block_1_resnet/pooling': 'avg',
+        'image_block_1_resnet/conv3_depth': 4,
+        'image_block_1_resnet/conv4_depth': 6,
+        'dense_block_1/num_layers': 2,
+        'dense_block_1/use_batchnorm': False,
+        'dense_block_1/dropout_rate': 0,
+        'dense_block_1/units_0': 32,
+        'dense_block_1/units_1': 32,
+        'classification_head_1/dropout_rate': 0,
+        'optimizer': 'adam'
+    }],
+}
diff --git a/autokeras/hypermodel/block.py b/autokeras/hypermodel/block.py
@@ -52,10 +52,11 @@ def build(self, hp, inputs=None):
         num_layers = self.num_layers or hp.Choice('num_layers', [1, 2, 3], default=2)
         use_batchnorm = self.use_batchnorm
         if use_batchnorm is None:
-            use_batchnorm = hp.Choice('use_batchnorm', [True, False], default=False)
-        dropout_rate = self.dropout_rate or hp.Choice('dropout_rate',
-                                                      [0.0, 0.25, 0.5],
-                                                      default=0)
+            use_batchnorm = hp.Boolean('use_batchnorm', default=False)
+        if self.dropout_rate is not None:
+            dropout_rate = self.dropout_rate
+        else:
+            dropout_rate = hp.Choice('dropout_rate', [0.0, 0.25, 0.5], default=0)
 
         for i in range(num_layers):
             units = hp.Choice(
@@ -66,7 +67,8 @@ def build(self, hp, inputs=None):
             if use_batchnorm:
                 output_node = tf.keras.layers.BatchNormalization()(output_node)
             output_node = tf.keras.layers.ReLU()(output_node)
-            output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
+            if dropout_rate > 0:
+                output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
         return output_node
 
 
@@ -121,7 +123,7 @@ def build(self, hp, inputs=None):
 
         bidirectional = self.bidirectional
         if bidirectional is None:
-            bidirectional = hp.Choice('bidirectional', [True, False], default=True)
+            bidirectional = hp.Boolean('bidirectional', default=True)
         layer_type = self.layer_type or hp.Choice('layer_type',
                                                   ['gru', 'lstm'],
                                                   default='lstm')
@@ -157,24 +159,30 @@ class ConvBlock(base.Block):
             tuned automatically.
         separable: Boolean. Whether to use separable conv layers.
             If left unspecified, it will be tuned automatically.
+        dropout_rate: Float. Between 0 and 1. The dropout rate for after the
+            convolutional layers. If left unspecified, it will be tuned
+            automatically.
     """
 
     def __init__(self,
                  kernel_size=None,
                  num_blocks=None,
                  separable=None,
+                 dropout_rate=None,
                  **kwargs):
         super().__init__(**kwargs)
         self.kernel_size = kernel_size
         self.num_blocks = num_blocks
         self.separable = separable
+        self.dropout_rate = dropout_rate
 
     def get_config(self):
         config = super().get_config()
         config.update({
             'kernel_size': self.kernel_size,
             'num_blocks': self.num_blocks,
-            'separable': self.separable})
+            'separable': self.separable,
+            'dropout_rate': self.dropout_rate})
         return config
 
     def build(self, hp, inputs=None):
@@ -191,14 +199,19 @@ def build(self, hp, inputs=None):
                                                   default=2)
         separable = self.separable
         if separable is None:
-            separable = hp.Choice('separable', [True, False], default=False)
+            separable = hp.Boolean('separable', default=False)
 
         if separable:
             conv = utils.get_sep_conv(input_node.shape)
         else:
             conv = utils.get_conv(input_node.shape)
         pool = utils.get_max_pooling(input_node.shape)
 
+        if self.dropout_rate is not None:
+            dropout_rate = self.dropout_rate
+        else:
+            dropout_rate = hp.Choice('dropout_rate', [0.0, 0.25, 0.5], default=0)
+
         for i in range(num_blocks):
             output_node = conv(
                 hp.Choice('filters_{i}_1'.format(i=i),
@@ -217,6 +230,8 @@ def build(self, hp, inputs=None):
             output_node = pool(
                 kernel_size - 1,
                 padding=self._get_padding(kernel_size - 1, output_node))(output_node)
+            if dropout_rate > 0:
+                output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
         return output_node
 
     @staticmethod
@@ -546,9 +561,10 @@ def build(self, hp, inputs=None):
                 input_length=input_node.shape[1],
                 trainable=True)
         output_node = layer(input_node)
-        dropout_rate = self.dropout_rate or hp.Choice('dropout_rate',
-                                                      [0.0, 0.25, 0.5],
-                                                      default=0.25)
+        if self.dropout_rate is not None:
+            dropout_rate = self.dropout_rate
+        else:
+            dropout_rate = hp.Choice('dropout_rate', [0.0, 0.25, 0.5], default=0.25)
         if dropout_rate > 0:
             output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
         return output_node
diff --git a/autokeras/hypermodel/head.py b/autokeras/hypermodel/head.py
@@ -123,13 +123,17 @@ def build(self, hp, inputs=None):
         input_node = inputs[0]
         output_node = input_node
 
+        # Reduce the tensor to a vector.
         if len(output_node.shape) > 2:
-            dropout_rate = self.dropout_rate or hp.Choice('dropout_rate',
-                                                          [0.0, 0.25, 0.5],
-                                                          default=0)
-            if dropout_rate > 0:
-                output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
             output_node = block_module.SpatialReduction().build(hp, output_node)
+
+        if self.dropout_rate is not None:
+            dropout_rate = self.dropout_rate
+        else:
+            dropout_rate = hp.Choice('dropout_rate', [0.0, 0.25, 0.5], default=0)
+
+        if dropout_rate > 0:
+            output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
         output_node = tf.keras.layers.Dense(self.output_shape[-1])(output_node)
         if self.loss == 'binary_crossentropy':
             output_node = Sigmoid(name=self.name)(output_node)

diff --git a/autokeras/hypermodel/hyperblock.py b/autokeras/hypermodel/hyperblock.py
@@ -25,20 +25,17 @@ def __init__(self,
                  block_type=None,
                  normalize=None,
                  augment=None,
-                 seed=None,
                  **kwargs):
         super().__init__(**kwargs)
         self.block_type = block_type
         self.normalize = normalize
         self.augment = augment
-        self.seed = seed
 
     def get_config(self):
         config = super().get_config()
         config.update({'block_type': self.block_type,
                        'normalize': self.normalize,
-                       'augment': self.augment,
-                       'seed': self.seed})
+                       'augment': self.augment})
         return config
 
     def build(self, hp, inputs=None):
@@ -51,10 +48,10 @@ def build(self, hp, inputs=None):
 
         normalize = self.normalize
         if normalize is None:
-            normalize = hp.Choice('normalize', [True, False], default=True)
+            normalize = hp.Boolean('normalize', default=True)
         augment = self.augment
         if augment is None:
-            augment = hp.Choice('augment', [True, False], default=False)
+            augment = hp.Boolean('augment', default=False)
         if normalize:
             output_node = preprocessor_module.Normalization()(output_node)
         if augment:
@@ -77,8 +74,9 @@ class TextBlock(base.HyperBlock):
         vectorizer: String. 'sequence' or 'ngram'. If it is 'sequence',
             TextToIntSequence will be used. If it is 'ngram', TextToNgramVector will
             be used. If unspecified, it will be tuned automatically.
-        pretraining: Boolean. Whether to use pretraining weights in the N-gram
-            vectorizer. If unspecified, it will be tuned automatically.
+        pretraining: String. 'random' (use random weights instead any pretrained
+            model), 'glove', 'fasttext' or 'word2vec'. Use pretrained word embedding.
+            If left unspecified, it will be tuned automatically.
     """
 
     def __init__(self, vectorizer=None, pretraining=None, **kwargs):

diff --git a/autokeras/hypermodel/preprocessor.py b/autokeras/hypermodel/preprocessor.py
@@ -7,7 +7,6 @@
 from sklearn.preprocessing import normalize
 from tensorflow.python.util import nest
 
-from autokeras import const
 from autokeras import encoder
 from autokeras import utils
 from autokeras.hypermodel import base
@@ -88,14 +87,26 @@ def set_state(self, state):
 
 
 class TextToIntSequence(base.Preprocessor):
-    """Convert raw texts to sequences of word indices."""
+    """Convert raw texts to sequences of word indices.
 
-    def __init__(self, max_len=None, **kwargs):
+    # Arguments
+        max_len: Int. The maximum length of a sentence. If unspecified, the length of
+            the longest sentence will be used.
+        num_words: Int. The size of the maximum number of words to keep, based
+            on word frequency. Only the most common num_words-1 words will be kept.
+            Defaults to 20000.
+    """
+
+    def __init__(self,
+                 max_len=None,
+                 num_words=20000,
+                 **kwargs):
         super().__init__(**kwargs)
         self.max_len = max_len
         self.max_len_in_data = 0
+        self.num_words = num_words
         self.tokenizer = tf.keras.preprocessing.text.Tokenizer(
-            num_words=const.Constant.VOCABULARY_SIZE)
+            num_words=num_words)
         self.max_len_to_use = None
         self.max_features = None
 
@@ -127,7 +138,10 @@ def output_shape(self):
 
     def get_config(self):
         config = super().get_config()
-        config.update({'max_len': self.max_len})
+        config.update({
+            'max_len': self.max_len,
+            'num_words': self.num_words,
+        })
         return config
 
     def get_state(self):

diff --git a/autokeras/meta_model.py b/autokeras/meta_model.py
@@ -28,7 +28,7 @@ def assemble(inputs, outputs, dataset, seed=None):
         if isinstance(input_node, node.TextInput):
             assemblers.append(TextAssembler())
         if isinstance(input_node, node.ImageInput):
-            assemblers.append(ImageAssembler(seed=seed))
+            assemblers.append(ImageAssembler())
         if isinstance(input_node, node.StructuredDataInput):
             assemblers.append(StructuredDataAssembler(seed=seed))
         if isinstance(input_node, node.TimeSeriesInput):
@@ -125,9 +125,8 @@ def assemble(self, input_node):
 class ImageAssembler(Assembler):
     """Assembles the ImageBlock based on training dataset."""
 
-    def __init__(self, seed=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        self.seed = seed
         self._shape = None
         self._num_samples = 0
 
@@ -136,7 +135,7 @@ def update(self, x):
         self._num_samples += 1
 
     def assemble(self, input_node):
-        block = hyperblock.ImageBlock(seed=self.seed)
+        block = hyperblock.ImageBlock()
         if max(self._shape[0], self._shape[1]) < 32:
             if self._num_samples < 10000:
                 self.hps.append(hp_module.Choice(