From 21994919156aac15558f77555538346fb702bcbc Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Mon, 26 Nov 2018 22:16:04 -0600 Subject: [PATCH] [MRG] default architectures updated (#337) * cifar * cifar * mixup * update * update * modify * strategy updated * generate until training finished * visualize * bug fix * examples --- autokeras/bayesian.py | 43 +++- autokeras/constant.py | 2 +- autokeras/net_transformer.py | 63 +++--- autokeras/nn/generator.py | 74 ++++--- autokeras/nn/graph.py | 296 +++++++++++++-------------- autokeras/nn/layer_transformer.py | 75 ++++--- autokeras/nn/layers.py | 44 +++- autokeras/nn/model_trainer.py | 48 ++++- autokeras/search.py | 15 +- autokeras/utils.py | 4 +- examples/mnist.py | 2 +- examples/visualize.py | 30 +++ tests/common.py | 16 +- tests/image/temp_test.py | 49 ----- tests/image/test_image_supervised.py | 1 - tests/nn/test_generator.py | 2 +- tests/nn/test_graph.py | 120 +++-------- tests/nn/test_layer_transformer.py | 17 +- tests/test_bayesian.py | 14 +- tests/test_net_transformer.py | 24 --- tests/test_search.py | 15 +- 21 files changed, 465 insertions(+), 489 deletions(-) create mode 100644 examples/visualize.py delete mode 100644 tests/image/temp_test.py diff --git a/autokeras/bayesian.py b/autokeras/bayesian.py index 4697d6d88..0b172f94e 100644 --- a/autokeras/bayesian.py +++ b/autokeras/bayesian.py @@ -13,11 +13,34 @@ from autokeras.constant import Constant from autokeras.net_transformer import transform +from autokeras.nn.layers import is_layer def layer_distance(a, b): """The distance between two layers.""" - return abs(a - b) * 1.0 / max(a, b) + if type(a) != type(b): + return 1.0 + if is_layer(a, 'Conv'): + att_diff = [(a.filters, b.filters), + (a.kernel_size, b.kernel_size), + (a.stride, b.stride)] + return attribute_difference(att_diff) + if is_layer(a, 'Pooling'): + att_diff = [(a.padding, b.padding), + (a.kernel_size, b.kernel_size), + (a.stride, b.stride)] + return attribute_difference(att_diff) + return 0.0 + + +def attribute_difference(att_diff): + ret = 0 + for a_value, b_value in att_diff: + if max(a_value, b_value) == 0: + ret += 0 + else: + ret += abs(a_value - b_value) * 1.0 / max(a_value, b_value) + return ret * 1.0 / len(att_diff) def layers_distance(list_a, list_b): @@ -64,9 +87,7 @@ def edit_distance(x, y): The edit-distance between x and y. """ - ret = 0 - ret += layers_distance(x.conv_widths, y.conv_widths) - ret += layers_distance(x.dense_widths, y.dense_widths) + ret = layers_distance(x.layers, y.layers) ret += Constant.KERNEL_LAMBDA * skip_connections_distance(x.skip_connections, y.skip_connections) return ret @@ -77,6 +98,7 @@ class IncrementalGaussianProcess: Attributes: alpha: A hyperparameter. """ + def __init__(self): self.alpha = 1e-10 self._distance_matrix = None @@ -266,6 +288,7 @@ class BayesianOptimizer: beta: The beta in acquisition function. (refer to our paper) search_tree: The network morphism search tree. """ + def __init__(self, searcher, t_min, metric, beta): self.searcher = searcher self.t_min = t_min @@ -284,12 +307,13 @@ def fit(self, x_queue, y_queue): """ self.gpr.fit(x_queue, y_queue) - def generate(self, descriptors, timeout): + def generate(self, descriptors, timeout, multiprocessing_queue): """Generate new architecture. Args: descriptors: All the searched neural architectures. timeout: An integer. The time limit in seconds. + multiprocessing_queue: the Queue for multiprocessing return value. Returns: graph: An instance of Graph. A morphed neural network with weights. @@ -318,11 +342,13 @@ def generate(self, descriptors, timeout): pq.put(elem_class(metric_value, model_id, graph)) t = 1.0 - t_min = self.t_min + # t_min = self.t_min alpha = 0.9 opt_acq = self._get_init_opt_acq_value() remaining_time = timeout - while not pq.empty() and t > t_min and remaining_time > 0: + while not pq.empty() and remaining_time > 0: + if multiprocessing_queue.qsize() != 0: + break elem = pq.get() if self.metric.higher_better(): temp_exp = min((elem.metric_value - opt_acq) / t, 1.0) @@ -379,6 +405,7 @@ def add_child(self, father_id, model_id): @total_ordering class Elem: """Elements to be sorted according to metric value.""" + def __init__(self, metric_value, father_id, graph): self.father_id = father_id self.graph = graph @@ -393,6 +420,7 @@ def __lt__(self, other): class ReverseElem(Elem): """Elements to be reversely sorted according to metric value.""" + def __lt__(self, other): return self.metric_value > other.metric_value @@ -407,6 +435,7 @@ def contain(descriptors, target_descriptor): class SearchTree: """The network morphism search tree.""" + def __init__(self): self.root = None self.adj_list = {} diff --git a/autokeras/constant.py b/autokeras/constant.py index 5bbacc354..7a6c28e86 100644 --- a/autokeras/constant.py +++ b/autokeras/constant.py @@ -14,7 +14,7 @@ class Constant: N_NEIGHBOURS = 8 MAX_MODEL_SIZE = (1 << 25) MAX_LAYER_WIDTH = 4096 - MAX_LAYERS = 100 + MAX_LAYERS = 500 # Model Defaults diff --git a/autokeras/net_transformer.py b/autokeras/net_transformer.py index 98ca81432..6bd042025 100644 --- a/autokeras/net_transformer.py +++ b/autokeras/net_transformer.py @@ -5,7 +5,9 @@ from autokeras.nn.graph import NetworkDescriptor from autokeras.constant import Constant -from autokeras.nn.layers import is_layer +from autokeras.nn.layer_transformer import init_dense_weight, init_conv_weight, init_bn_weight +from autokeras.nn.layers import is_layer, StubDense, get_dropout_class, StubReLU, get_conv_class, \ + get_batch_norm_class, get_pooling_class def to_wider_graph(graph): @@ -53,6 +55,37 @@ def to_skip_connection_graph(graph): return graph +def create_new_layer(input_shape, n_dim): + dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU] + conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU] + if len(input_shape) == 1: + # It is in the dense layer part. + layer_class = sample(dense_deeper_classes, 1)[0] + else: + # It is in the conv layer part. + layer_class = sample(conv_deeper_classes, 1)[0] + + if layer_class == StubDense: + new_layer = StubDense(input_shape[0], input_shape[0]) + + elif layer_class == get_dropout_class(n_dim): + new_layer = layer_class(Constant.DENSE_DROPOUT_RATE) + + elif layer_class == get_conv_class(n_dim): + new_layer = layer_class(input_shape[-1], input_shape[-1], sample((1, 3, 5), 1)[0], stride=1) + + elif layer_class == get_batch_norm_class(n_dim): + new_layer = layer_class(input_shape[-1]) + + elif layer_class == get_pooling_class(n_dim): + new_layer = layer_class(sample((1, 3, 5), 1)[0]) + + else: + new_layer = layer_class() + + return new_layer + + def to_deeper_graph(graph): weighted_layer_ids = graph.deep_layer_ids() if len(weighted_layer_ids) >= Constant.MAX_LAYERS: @@ -62,21 +95,11 @@ def to_deeper_graph(graph): for layer_id in deeper_layer_ids: layer = graph.layer_list[layer_id] - if is_layer(layer, 'Conv'): - graph.to_conv_deeper_model(layer_id, 3) - else: - graph.to_dense_deeper_model(layer_id) + new_layer = create_new_layer(layer.output.shape, graph.n_dim) + graph.to_deeper_model(layer_id, new_layer) return graph -def legal_graph(graph): - descriptor = graph.extract_descriptor() - skips = descriptor.skip_connections - if len(skips) != len(set(skips)): - return False - return True - - def transform(graph): graphs = [] for i in range(Constant.N_NEIGHBOURS * 2): @@ -95,16 +118,4 @@ def transform(graph): if len(graphs) >= Constant.N_NEIGHBOURS: break - return list(filter(lambda x: legal_graph(x), graphs)) - - -def default_transform(graph): - graph = deepcopy(graph) - graph.to_conv_deeper_model(1, 3) - graph.to_conv_deeper_model(1, 3) - graph.to_conv_deeper_model(5, 3) - graph.to_conv_deeper_model(9, 3) - graph.to_add_skip_model(1, 18) - graph.to_add_skip_model(18, 24) - graph.to_add_skip_model(24, 27) - return [graph] + return graphs diff --git a/autokeras/nn/generator.py b/autokeras/nn/generator.py index 9e5cca87d..3476824ba 100644 --- a/autokeras/nn/generator.py +++ b/autokeras/nn/generator.py @@ -15,6 +15,7 @@ class NetworkGenerator: n_output_node: Number of output nodes in the network. input_shape: A tuple to represent the input shape. """ + def __init__(self, n_output_node, input_shape): """Initialize the instance. @@ -77,10 +78,16 @@ def generate(self, model_len=Constant.MODEL_LEN, model_width=Constant.MODEL_WIDT graph = Graph(self.input_shape, False) temp_input_channel = self.input_shape[-1] output_node_id = 0 + stride = 1 for i in range(model_len): output_node_id = graph.add_layer(StubReLU(), output_node_id) - output_node_id = graph.add_layer(self.conv(temp_input_channel, model_width, kernel_size=3), output_node_id) - output_node_id = graph.add_layer(self.batch_norm(model_width), output_node_id) + output_node_id = graph.add_layer(self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id) + output_node_id = graph.add_layer(self.conv(temp_input_channel, + model_width, + kernel_size=3, + stride=stride), output_node_id) + # if stride == 1: + # stride = 2 temp_input_channel = model_width if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): output_node_id = graph.add_layer(self.pooling(), output_node_id) @@ -143,14 +150,14 @@ def generate(self, model_len=Constant.MLP_MODEL_LEN, model_width=Constant.MLP_MO class ResNetGenerator(NetworkGenerator): def __init__(self, n_output_node, input_shape): super(ResNetGenerator, self).__init__(n_output_node, input_shape) - self.layers = [3, 4, 6, 3] + # self.layers = [2, 2, 2, 2] + self.in_planes = 64 self.block_expansion = 1 self.n_dim = len(self.input_shape) - 1 if len(self.input_shape) > 4: raise ValueError('The input dimension is too high.') elif len(self.input_shape) < 2: raise ValueError('The input dimension is too low.') - self.inplanes = 64 self.conv = get_conv_class(self.n_dim) self.dropout = get_dropout_class(self.n_dim) self.global_avg_pooling = get_global_avg_pooling_class(self.n_dim) @@ -158,45 +165,48 @@ def __init__(self, n_output_node, input_shape): self.pooling = get_pooling_class(self.n_dim) self.batch_norm = get_batch_norm_class(self.n_dim) - def generate(self, model_len, model_width): + def generate(self, model_len=Constant.MODEL_LEN, model_width=Constant.MODEL_WIDTH): graph = Graph(self.input_shape, False) temp_input_channel = self.input_shape[-1] output_node_id = 0 - output_node_id = graph.add_layer(StubReLU(), output_node_id) - output_node_id = graph.add_layer(self.conv(temp_input_channel, model_width, kernel_size=7), output_node_id) + # output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(self.conv(temp_input_channel, model_width, kernel_size=3), output_node_id) output_node_id = graph.add_layer(self.batch_norm(model_width), output_node_id) - output_node_id = graph.add_layer(self.pooling(kernel_size=3, stride=2, padding=1), output_node_id) - for layer in self.layers: - output_node_id = self._make_layer(graph, model_width, layer, output_node_id) - model_width *= 2 + # output_node_id = graph.add_layer(self.pooling(kernel_size=3, stride=2, padding=1), output_node_id) + + output_node_id = self._make_layer(graph, model_width, 2, output_node_id, 1) + model_width *= 2 + output_node_id = self._make_layer(graph, model_width, 2, output_node_id, 2) + model_width *= 2 + output_node_id = self._make_layer(graph, model_width, 2, output_node_id, 2) + model_width *= 2 + output_node_id = self._make_layer(graph, model_width, 2, output_node_id, 2) + output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id) - graph.add_layer(StubDense(int(model_width / 2) * self.block_expansion, self.n_output_node), output_node_id) + graph.add_layer(StubDense(model_width * self.block_expansion, self.n_output_node), output_node_id) return graph - def _make_layer(self, graph, planes, blocks, node_id): - downsample = None - if self.inplanes != planes * self.block_expansion: - downsample = [ - self.conv(self.inplanes, planes * self.block_expansion, kernel_size=1), - self.batch_norm(planes * self.block_expansion), - ] - out = self._make_block(graph, self.inplanes, planes, node_id, downsample) - self.inplanes = planes * self.block_expansion - for _ in range(1, blocks): - out = self._make_block(graph, self.inplanes, planes, out) + def _make_layer(self, graph, planes, blocks, node_id, stride): + strides = [stride] + [1] * (blocks - 1) + out = node_id + for current_stride in strides: + out = self._make_block(graph, self.in_planes, planes, out, current_stride) + self.in_planes = planes * self.block_expansion return out - def _make_block(self, graph, inplanes, planes, node_id, downsample=None): - residual_node_id = node_id - out = graph.add_layer(StubReLU(), node_id) - out = graph.add_layer(self.conv(inplanes, planes, kernel_size=1), out) + def _make_block(self, graph, in_planes, planes, node_id, stride=1): + out = graph.add_layer(self.batch_norm(in_planes), node_id) + out = graph.add_layer(StubReLU(), out) + residual_node_id = out + out = graph.add_layer(self.conv(in_planes, planes, kernel_size=3, stride=stride), out) out = graph.add_layer(self.batch_norm(planes), out) out = graph.add_layer(StubReLU(), out) out = graph.add_layer(self.conv(planes, planes, kernel_size=3), out) - out = graph.add_layer(self.batch_norm(planes), out) - if downsample is not None: - downsample_out = graph.add_layer(StubReLU(), node_id) - downsample_out = graph.add_layer(downsample[0], downsample_out) - residual_node_id = graph.add_layer(downsample[1], downsample_out) + + residual_node_id = graph.add_layer(StubReLU(), residual_node_id) + residual_node_id = graph.add_layer(self.conv(in_planes, + planes * self.block_expansion, + kernel_size=1, + stride=stride), residual_node_id) out = graph.add_layer(StubAdd(), (out, residual_node_id)) return out diff --git a/autokeras/nn/graph.py b/autokeras/nn/graph.py index 25a7fd8fd..92c4040c9 100644 --- a/autokeras/nn/graph.py +++ b/autokeras/nn/graph.py @@ -1,17 +1,16 @@ from collections import Iterable -from copy import deepcopy +from copy import deepcopy, copy from queue import Queue import keras import numpy as np import torch -from autokeras.constant import Constant from autokeras.nn.layer_transformer import wider_bn, wider_next_conv, wider_next_dense, wider_pre_dense, \ - wider_pre_conv, deeper_conv_block, dense_to_deeper_block, add_noise + wider_pre_conv, add_noise, init_dense_weight, init_conv_weight, init_bn_weight from autokeras.nn.layers import StubConcatenate, StubAdd, is_layer, layer_width, \ to_real_keras_layer, set_torch_weight_to_stub, set_stub_weight_to_torch, set_stub_weight_to_keras, \ - set_keras_weight_to_stub, StubReLU, get_conv_class, get_batch_norm_class + set_keras_weight_to_stub, get_conv_class, get_pooling_class, StubReLU class NetworkDescriptor: @@ -24,22 +23,11 @@ class NetworkDescriptor: def __init__(self): self.skip_connections = [] - self.conv_widths = [] - self.dense_widths = [] - - @property - def n_dense(self): - return len(self.dense_widths) + self.layers = [] @property - def n_conv(self): - return len(self.conv_widths) - - def add_conv_width(self, width): - self.conv_widths.append(width) - - def add_dense_width(self, width): - self.dense_widths.append(width) + def n_layers(self): + return len(self.layers) def add_skip_connection(self, u, v, connection_type): """ Add a skip-connection to the descriptor. @@ -59,7 +47,10 @@ def to_json(self): skip_list = [] for u, v, connection_type in self.skip_connections: skip_list.append({'from': u, 'to': v, 'type': connection_type}) - return {'node_list': self.conv_widths, 'skip_list': skip_list} + return {'node_list': self.layers, 'skip_list': skip_list} + + def add_layer(self, layer): + self.layers.append(layer) class Node: @@ -68,6 +59,7 @@ class Node: Attributes: shape: A tuple describing the shape of the tensor. """ + def __init__(self, shape): self.shape = shape @@ -99,6 +91,7 @@ class Graph: of (tensor identifier, layer identifier). reverse_adj_list: A reverse adjacent list in the same format as adj_list. operation_history: A list saving all the network morphism operations. + n_dim: An integer. If it uses Conv1d, n_dim should be 1. vis: A dictionary of temporary storage for whether an local operation has been done during the network morphism. """ @@ -125,8 +118,6 @@ def __init__(self, input_shape, weighted=True): self.reverse_adj_list = {} self.operation_history = [] self.n_dim = len(input_shape) - 1 - self.conv = get_conv_class(self.n_dim) - self.batch_norm = get_batch_norm_class(self.n_dim) self.vis = None self._add_node(Node(input_shape)) @@ -266,7 +257,14 @@ def _get_pooling_layers(self, start_node_id, end_node_id): layer_list = [] node_list = [start_node_id] self._depth_first_search(end_node_id, layer_list, node_list) - return filter(lambda layer_id: is_layer(self.layer_list[layer_id], 'Pooling'), layer_list) + ret = [] + for layer_id in layer_list: + layer = self.layer_list[layer_id] + if is_layer(layer, 'Pooling'): + ret.append((layer.kernel_size, layer.stride, layer.padding)) + elif is_layer(layer, 'Conv') and layer.stride != 1: + ret.append((int((layer.kernel_size + 1) / 2), layer.stride, 0)) + return ret def _depth_first_search(self, target_id, layer_id_list, node_list): """Search for all the layers and nodes down the path. @@ -357,21 +355,27 @@ def _upper_layer_width(self, u): return self._upper_layer_width(a) + self._upper_layer_width(b) else: return self._upper_layer_width(v) - return self.node_list[0][-1] + return self.node_list[0].shape[-1] - def to_conv_deeper_model(self, target_id, kernel_size): + def to_deeper_model(self, target_id, new_layer): """Insert a relu-conv-bn block after the target block. Args: target_id: A convolutional layer ID. The new block should be inserted after the block. - kernel_size: An integer. The kernel size of the new convolutional layer. + new_layer: An instance of StubLayer subclasses. """ - self.operation_history.append(('to_conv_deeper_model', target_id, kernel_size)) - target = self.layer_list[target_id] - new_layers = deeper_conv_block(target, kernel_size, self.weighted) - output_id = self._conv_block_end_node(target_id) + self.operation_history.append(('to_deeper_model', target_id, new_layer)) + input_id = self.layer_id_to_input_node_ids[target_id][0] + output_id = self.layer_id_to_output_node_ids[target_id][0] + if self.weighted: + if is_layer(new_layer, 'Dense'): + init_dense_weight(new_layer) + elif is_layer(new_layer, 'Conv'): + init_conv_weight(new_layer) + elif is_layer(new_layer, 'BatchNormalization'): + init_bn_weight(new_layer) - self._insert_new_layers(new_layers, output_id) + self._insert_new_layers([new_layer], input_id, output_id) def to_wider_model(self, pre_layer_id, n_add): """Widen the last dimension of the output of the pre_layer. @@ -386,52 +390,22 @@ def to_wider_model(self, pre_layer_id, n_add): dim = layer_width(pre_layer) self.vis = {} self._search(output_id, dim, dim, n_add) + # Update the tensor shapes. for u in self.topological_order: for v, layer_id in self.adj_list[u]: self.node_list[v].shape = self.layer_list[layer_id].output_shape - def to_dense_deeper_model(self, target_id): - """Insert a dense layer after the target layer. - - Args: - target_id: The ID of a dense layer. - """ - self.operation_history.append(('to_dense_deeper_model', target_id)) - target = self.layer_list[target_id] - new_layers = dense_to_deeper_block(target, self.weighted) - output_id = self._dense_block_end_node(target_id) - - self._insert_new_layers(new_layers, output_id) - - def _insert_new_layers(self, new_layers, start_node_id): + def _insert_new_layers(self, new_layers, start_node_id, end_node_id): """Insert the new_layers after the node with start_node_id.""" - new_node_id = self._add_node(deepcopy(self.node_list[self.adj_list[start_node_id][0][0]])) + new_node_id = self._add_node(deepcopy(self.node_list[end_node_id])) temp_output_id = new_node_id for layer in new_layers[:-1]: temp_output_id = self.add_layer(layer, temp_output_id) - self._add_edge(new_layers[-1], temp_output_id, self.adj_list[start_node_id][0][0]) + self._add_edge(new_layers[-1], temp_output_id, end_node_id) new_layers[-1].input = self.node_list[temp_output_id] - new_layers[-1].output = self.node_list[self.adj_list[start_node_id][0][0]] - self._redirect_edge(start_node_id, self.adj_list[start_node_id][0][0], new_node_id) - - def _block_end_node(self, layer_id, block_size): - ret = self.layer_id_to_output_node_ids[layer_id][0] - for i in range(block_size - 2): - ret = self.adj_list[ret][0][0] - return ret - - def _dense_block_end_node(self, layer_id): - return self.layer_id_to_input_node_ids[layer_id][0] - - def _conv_block_end_node(self, layer_id): - """Get the input node ID of the last layer in the block by layer ID. - Return the input node ID of the last layer in the convolutional block. - - Args: - layer_id: the convolutional layer ID. - """ - return self._block_end_node(layer_id, Constant.CONV_BLOCK_DISTANCE) + new_layers[-1].output = self.node_list[end_node_id] + self._redirect_edge(start_node_id, end_node_id, new_node_id) def to_add_skip_model(self, start_id, end_id): """Add a weighted add skip-connection from after start node to end node. @@ -441,27 +415,22 @@ def to_add_skip_model(self, start_id, end_id): end_id: The convolutional layer ID, after which to end the skip-connection. """ self.operation_history.append(('to_add_skip_model', start_id, end_id)) - conv_block_input_id = self._conv_block_end_node(start_id) - conv_block_input_id = self.adj_list[conv_block_input_id][0][0] + filters_end = self.layer_list[end_id].output.shape[-1] + filters_start = self.layer_list[start_id].output.shape[-1] + conv_block_input_id = self.layer_id_to_output_node_ids[start_id][0] - block_last_layer_input_id = self._conv_block_end_node(end_id) + block_last_layer_input_id = self.layer_id_to_input_node_ids[end_id][0] + block_last_layer_output_id = self.layer_id_to_output_node_ids[end_id][0] - # Add the pooling layer chain. - layer_list = self._get_pooling_layers(conv_block_input_id, block_last_layer_input_id) - skip_output_id = conv_block_input_id - for index, layer_id in enumerate(layer_list): - skip_output_id = self.add_layer(deepcopy(self.layer_list[layer_id]), skip_output_id) + skip_output_id = self._insert_pooling_layer_chain(block_last_layer_input_id, conv_block_input_id) # Add the conv layer - new_relu_layer = StubReLU() - skip_output_id = self.add_layer(new_relu_layer, skip_output_id) - new_conv_layer = self.conv(self.layer_list[start_id].filters, self.layer_list[end_id].filters, 1) + new_conv_layer = get_conv_class(self.n_dim)(filters_start, + filters_end, + 1) skip_output_id = self.add_layer(new_conv_layer, skip_output_id) - new_bn_layer = self.batch_norm(self.layer_list[end_id].filters) - skip_output_id = self.add_layer(new_bn_layer, skip_output_id) # Add the add layer. - block_last_layer_output_id = self.adj_list[block_last_layer_input_id][0][0] add_input_node_id = self._add_node(deepcopy(self.node_list[block_last_layer_output_id])) add_layer = StubAdd() @@ -474,20 +443,11 @@ def to_add_skip_model(self, start_id, end_id): # Set weights to the additional conv layer. if self.weighted: - filters_end = self.layer_list[end_id].filters - filters_start = self.layer_list[start_id].filters filter_shape = (1,) * self.n_dim weights = np.zeros((filters_end, filters_start) + filter_shape) bias = np.zeros(filters_end) new_conv_layer.set_weights((add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - n_filters = filters_end - new_weights = [add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1]))] - new_bn_layer.set_weights(new_weights) - def to_concat_skip_model(self, start_id, end_id): """Add a weighted add concatenate connection from after start node to end node. @@ -496,18 +456,15 @@ def to_concat_skip_model(self, start_id, end_id): end_id: The convolutional layer ID, after which to end the skip-connection. """ self.operation_history.append(('to_concat_skip_model', start_id, end_id)) - conv_block_input_id = self._conv_block_end_node(start_id) - conv_block_input_id = self.adj_list[conv_block_input_id][0][0] + filters_end = self.layer_list[end_id].output.shape[-1] + filters_start = self.layer_list[start_id].output.shape[-1] + conv_block_input_id = self.layer_id_to_output_node_ids[start_id][0] - block_last_layer_input_id = self._conv_block_end_node(end_id) + block_last_layer_input_id = self.layer_id_to_input_node_ids[end_id][0] + block_last_layer_output_id = self.layer_id_to_output_node_ids[end_id][0] - # Add the pooling layer chain. - pooling_layer_list = self._get_pooling_layers(conv_block_input_id, block_last_layer_input_id) - skip_output_id = conv_block_input_id - for index, layer_id in enumerate(pooling_layer_list): - skip_output_id = self.add_layer(deepcopy(self.layer_list[layer_id]), skip_output_id) + skip_output_id = self._insert_pooling_layer_chain(block_last_layer_input_id, conv_block_input_id) - block_last_layer_output_id = self.adj_list[block_last_layer_input_id][0][0] concat_input_node_id = self._add_node(deepcopy(self.node_list[block_last_layer_output_id])) self._redirect_edge(block_last_layer_input_id, block_last_layer_output_id, concat_input_node_id) @@ -520,21 +477,14 @@ def to_concat_skip_model(self, start_id, end_id): self.node_list[concat_output_node_id].shape = concat_layer.output_shape # Add the concatenate layer. - new_relu_layer = StubReLU() - concat_output_node_id = self.add_layer(new_relu_layer, concat_output_node_id) - new_conv_layer = self.conv(self.layer_list[start_id].filters + self.layer_list[end_id].filters, - self.layer_list[end_id].filters, 1) - concat_output_node_id = self.add_layer(new_conv_layer, concat_output_node_id) - new_bn_layer = self.batch_norm(self.layer_list[end_id].filters) - - self._add_edge(new_bn_layer, concat_output_node_id, block_last_layer_output_id) - new_bn_layer.input = self.node_list[concat_output_node_id] - new_bn_layer.output = self.node_list[block_last_layer_output_id] - self.node_list[block_last_layer_output_id].shape = new_bn_layer.output_shape + new_conv_layer = get_conv_class(self.n_dim)(filters_start + filters_end, + filters_end, 1) + self._add_edge(new_conv_layer, concat_output_node_id, block_last_layer_output_id) + new_conv_layer.input = self.node_list[concat_output_node_id] + new_conv_layer.output = self.node_list[block_last_layer_output_id] + self.node_list[block_last_layer_output_id].shape = new_conv_layer.output_shape if self.weighted: - filters_end = self.layer_list[end_id].filters - filters_start = self.layer_list[start_id].filters filter_shape = (1,) * self.n_dim weights = np.zeros((filters_end, filters_end) + filter_shape) for i in range(filters_end): @@ -547,47 +497,55 @@ def to_concat_skip_model(self, start_id, end_id): bias = np.zeros(filters_end) new_conv_layer.set_weights((add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - n_filters = filters_end - new_weights = [add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1]))] - new_bn_layer.set_weights(new_weights) + def _insert_pooling_layer_chain(self, block_last_layer_input_id, conv_block_input_id): + skip_output_id = conv_block_input_id + for kernel_size, stride, padding in self._get_pooling_layers(conv_block_input_id, block_last_layer_input_id): + skip_output_id = self.add_layer(get_pooling_class(self.n_dim)(kernel_size, + stride=stride, + padding=padding), skip_output_id) + skip_output_id = self.add_layer(StubReLU(), skip_output_id) + return skip_output_id def extract_descriptor(self): """Extract the the description of the Graph as an instance of NetworkDescriptor.""" + main_chain = self.get_main_chain() + index_in_main_chain = {} + for index, u in enumerate(main_chain): + index_in_main_chain[u] = index + ret = NetworkDescriptor() - topological_node_list = self.topological_order - for u in topological_node_list: + for u in main_chain: for v, layer_id in self.adj_list[u]: - layer = self.layer_list[layer_id] - if is_layer(layer, 'Conv') and layer.kernel_size not in [1, (1,), (1, 1), (1, 1, 1)]: - ret.add_conv_width(layer_width(layer)) - if is_layer(layer, 'Dense'): - ret.add_dense_width(layer_width(layer)) - - # The position of each node, how many Conv and Dense layers before it. - pos = [0] * len(topological_node_list) - for v in topological_node_list: - layer_count = 0 - for u, layer_id in self.reverse_adj_list[v]: - layer = self.layer_list[layer_id] - weighted = 0 - if (is_layer(layer, 'Conv') and layer.kernel_size not in [1, (1,), (1, 1), (1, 1, 1)]) \ - or is_layer(layer, 'Dense'): - weighted = 1 - layer_count = max(pos[u] + weighted, layer_count) - pos[v] = layer_count - - for u in topological_node_list: - for v, layer_id in self.adj_list[u]: - if pos[u] == pos[v]: + if v not in index_in_main_chain: continue layer = self.layer_list[layer_id] - if is_layer(layer, 'Concatenate'): - ret.add_skip_connection(pos[u], pos[v], NetworkDescriptor.CONCAT_CONNECT) - if is_layer(layer, 'Add'): - ret.add_skip_connection(pos[u], pos[v], NetworkDescriptor.ADD_CONNECT) + copied_layer = copy(layer) + copied_layer.weights = None + ret.add_layer(deepcopy(copied_layer)) + + for u in index_in_main_chain: + for v, layer_id in self.adj_list[u]: + if v not in index_in_main_chain: + temp_u = u + temp_v = v + temp_layer_id = layer_id + skip_type = None + while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain): + if is_layer(self.layer_list[temp_layer_id], 'Concatenate'): + skip_type = NetworkDescriptor.CONCAT_CONNECT + if is_layer(self.layer_list[temp_layer_id], 'Add'): + skip_type = NetworkDescriptor.ADD_CONNECT + temp_u = temp_v + temp_v, temp_layer_id = self.adj_list[temp_v][0] + ret.add_skip_connection(index_in_main_chain[u], index_in_main_chain[temp_u], skip_type) + + elif index_in_main_chain[v] - index_in_main_chain[u] != 1: + skip_type = None + if is_layer(self.layer_list[layer_id], 'Concatenate'): + skip_type = NetworkDescriptor.CONCAT_CONNECT + if is_layer(self.layer_list[layer_id], 'Add'): + skip_type = NetworkDescriptor.ADD_CONNECT + ret.add_skip_connection(index_in_main_chain[u], index_in_main_chain[v], skip_type) return ret @@ -615,29 +573,67 @@ def _layer_ids_in_order(self, layer_ids): def _layer_ids_by_type(self, type_str): return list(filter(lambda layer_id: is_layer(self.layer_list[layer_id], type_str), range(self.n_layers))) + def get_main_chain_layers(self): + """Return a list of layer IDs in the main chain.""" + main_chain = self.get_main_chain() + ret = [] + for u in range(self.n_nodes): + for v, layer_id in self.adj_list[u]: + if v in main_chain and u in main_chain: + ret.append(layer_id) + return ret + def _conv_layer_ids_in_order(self): - return self._layer_ids_in_order( - list(filter(lambda layer_id: self.layer_list[layer_id].kernel_size != 1, - self._layer_ids_by_type('Conv')))) + return list(filter(lambda layer_id: is_layer(self.layer_list[layer_id], 'Conv'), self.get_main_chain_layers())) def _dense_layer_ids_in_order(self): return self._layer_ids_in_order(self._layer_ids_by_type('Dense')) def deep_layer_ids(self): - return self._conv_layer_ids_in_order() + self._dense_layer_ids_in_order()[:-1] + ret = [] + for layer_id in self.get_main_chain_layers(): + if is_layer(self.layer_list[layer_id], 'GlobalAveragePooling'): + break + ret.append(layer_id) + return ret def wide_layer_ids(self): return self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1] def skip_connection_layer_ids(self): - return self._conv_layer_ids_in_order()[:-1] + return self.deep_layer_ids()[:-1] def size(self): return sum(list(map(lambda x: x.size(), self.layer_list))) + def get_main_chain(self): + """Returns the main chain node ID list.""" + pre_node = {} + distance = {} + for i in range(self.n_nodes): + distance[i] = 0 + pre_node[i] = i + for i in range(self.n_nodes - 1): + for u in range(self.n_nodes): + for v, layer_id in self.adj_list[u]: + if distance[u] + 1 > distance[v]: + distance[v] = distance[u] + 1 + pre_node[v] = u + temp_id = 0 + for i in range(self.n_nodes): + if distance[i] > distance[temp_id]: + temp_id = i + ret = [temp_id] + while pre_node[temp_id] != temp_id: + temp_id = pre_node[temp_id] + ret.append(temp_id) + ret.reverse() + return ret + class TorchModel(torch.nn.Module): """A neural network class using pytorch constructed from an instance of Graph.""" + def __init__(self, graph): super(TorchModel, self).__init__() self.graph = graph diff --git a/autokeras/nn/layer_transformer.py b/autokeras/nn/layer_transformer.py index 8a3b16639..4b04659f8 100644 --- a/autokeras/nn/layer_transformer.py +++ b/autokeras/nn/layer_transformer.py @@ -5,44 +5,6 @@ NOISE_RATIO = 1e-4 -def deeper_conv_block(conv_layer, kernel_size, weighted=True): - n_dim = get_n_dim(conv_layer) - filter_shape = (kernel_size,) * 2 - n_filters = conv_layer.filters - weight = np.zeros((n_filters, n_filters) + filter_shape) - center = tuple(map(lambda x: int((x - 1) / 2), filter_shape)) - for i in range(n_filters): - filter_weight = np.zeros((n_filters,) + filter_shape) - index = (i,) + center - filter_weight[index] = 1 - weight[i, ...] = filter_weight - bias = np.zeros(n_filters) - new_conv_layer = get_conv_class(n_dim)(conv_layer.filters, n_filters, kernel_size=kernel_size) - bn = get_batch_norm_class(n_dim)(n_filters) - - if weighted: - new_conv_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - new_weights = [add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), - add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1]))] - bn.set_weights(new_weights) - - return [StubReLU(), - new_conv_layer, - bn] - - -def dense_to_deeper_block(dense_layer, weighted=True): - units = dense_layer.units - weight = np.eye(units) - bias = np.zeros(units) - new_dense_layer = StubDense(units, units) - if weighted: - new_dense_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - return [StubReLU(), new_dense_layer] - - def wider_pre_dense(layer, n_add, weighted=True): if not weighted: return StubDense(layer.input_units, layer.units + n_add) @@ -71,7 +33,10 @@ def wider_pre_dense(layer, n_add, weighted=True): def wider_pre_conv(layer, n_add_filters, weighted=True): n_dim = get_n_dim(layer) if not weighted: - return get_conv_class(n_dim)(layer.input_channel, layer.filters + n_add_filters, kernel_size=layer.kernel_size) + return get_conv_class(n_dim)(layer.input_channel, + layer.filters + n_add_filters, + kernel_size=layer.kernel_size, + stride=layer.stride) n_pre_filters = layer.filters rand = np.random.randint(n_pre_filters, size=n_add_filters) @@ -155,3 +120,35 @@ def add_noise(weights, other_weights): noise_range = NOISE_RATIO * w_range noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape) return np.add(noise, weights) + + +def init_dense_weight(layer): + units = layer.units + weight = np.eye(units) + bias = np.zeros(units) + layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) + + +def init_conv_weight(layer): + n_filters = layer.filters + filter_shape = (layer.kernel_size, ) * get_n_dim(layer) + weight = np.zeros((n_filters, n_filters) + filter_shape) + + center = tuple(map(lambda x: int((x - 1) / 2), filter_shape)) + for i in range(n_filters): + filter_weight = np.zeros((n_filters,) + filter_shape) + index = (i,) + center + filter_weight[index] = 1 + weight[i, ...] = filter_weight + bias = np.zeros(n_filters) + + layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) + + +def init_bn_weight(layer): + n_filters = layer.num_features + new_weights = [add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1]))] + layer.set_weights(new_weights) diff --git a/autokeras/nn/layers.py b/autokeras/nn/layers.py index e48fc5542..50c3e68cd 100644 --- a/autokeras/nn/layers.py +++ b/autokeras/nn/layers.py @@ -69,6 +69,9 @@ def output_shape(self): def to_real_layer(self): pass + def __str__(self): + return type(self).__name__[4:] + class StubWeightBiasLayer(StubLayer): def import_weights(self, torch_layer): @@ -150,17 +153,21 @@ def to_real_layer(self): class StubConv(StubWeightBiasLayer): - def __init__(self, input_channel, filters, kernel_size, input_node=None, output_node=None): + def __init__(self, input_channel, filters, kernel_size, stride=1, output_node=None, input_node=None): super().__init__(input_node, output_node) self.input_channel = input_channel self.filters = filters self.kernel_size = kernel_size + self.stride = stride + self.padding = int(self.kernel_size / 2) @property def output_shape(self): - ret = self.input.shape[:-1] - ret = ret + (self.filters,) - return ret + ret = list(self.input.shape[:-1]) + for index, dim in enumerate(ret): + ret[index] = int((dim + 2 * self.padding - self.kernel_size) / self.stride) + 1 + ret = ret + [self.filters] + return tuple(ret) def import_weights_keras(self, keras_layer): self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1])) @@ -175,13 +182,20 @@ def size(self): def to_real_layer(self): pass + def __str__(self): + return super().__str__() + '(' + ', '.join(str(item) for item in [self.input_channel, + self.filters, + self.kernel_size, + self.stride]) + ')' + class StubConv1d(StubConv): def to_real_layer(self): return torch.nn.Conv1d(self.input_channel, self.filters, self.kernel_size, - padding=int(self.kernel_size / 2)) + stride=self.stride, + padding=self.padding) class StubConv2d(StubConv): @@ -189,7 +203,8 @@ def to_real_layer(self): return torch.nn.Conv2d(self.input_channel, self.filters, self.kernel_size, - padding=int(self.kernel_size / 2)) + stride=self.stride, + padding=self.padding) class StubConv3d(StubConv): @@ -197,7 +212,8 @@ def to_real_layer(self): return torch.nn.Conv3d(self.input_channel, self.filters, self.kernel_size, - padding=int(self.kernel_size / 2)) + stride=self.stride, + padding=self.padding) class StubAggregateLayer(StubLayer): @@ -252,7 +268,12 @@ def to_real_layer(self): class StubPooling(StubLayer): - def __init__(self, kernel_size=2, input_node=None, output_node=None, stride=None, padding=0): + def __init__(self, + kernel_size=Constant.POOLING_KERNEL_SIZE, + input_node=None, + output_node=None, + stride=None, + padding=0): super().__init__(input_node, output_node) self.kernel_size = kernel_size self.stride = stride or kernel_size @@ -273,17 +294,17 @@ def to_real_layer(self): class StubPooling1d(StubPooling): def to_real_layer(self): - return torch.nn.MaxPool1d(Constant.POOLING_KERNEL_SIZE) + return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride) class StubPooling2d(StubPooling): def to_real_layer(self): - return torch.nn.MaxPool2d(Constant.POOLING_KERNEL_SIZE) + return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride) class StubPooling3d(StubPooling): def to_real_layer(self): - return torch.nn.MaxPool3d(Constant.POOLING_KERNEL_SIZE) + return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride) class StubGlobalPooling(StubLayer): @@ -376,6 +397,7 @@ def layer_width(layer): return layer.units if is_layer(layer, 'Conv'): return layer.filters + print(layer) raise TypeError('The layer should be either Dense or Conv layer.') diff --git a/autokeras/nn/model_trainer.py b/autokeras/nn/model_trainer.py index f48db9926..3a9e5f841 100644 --- a/autokeras/nn/model_trainer.py +++ b/autokeras/nn/model_trainer.py @@ -77,6 +77,7 @@ def __init__(self, model, path, **kwargs): self.model.to(self.device) self.optimizer = None self.early_stop = None + self.scheduler = None self.current_epoch = 0 self.current_metric_value = 0 self.temp_model_path = os.path.join(path, 'temp_model') @@ -108,9 +109,16 @@ def train_model(self, test_metric_value_list = [] test_loss_list = [] - self.optimizer = torch.optim.Adam(self.model.parameters()) + self.optimizer = torch.optim.SGD( + self.model.parameters(), + 0.025, + momentum=0.9, + weight_decay=3e-4) + # self.optimizer = torch.optim.Adam(self.model.parameters()) + self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, max_iter_num) for epoch in range(max_iter_num): + self.scheduler.step() self._train() test_loss, metric_value = self._test() self.current_metric_value = metric_value @@ -138,16 +146,7 @@ def _train(self): self.current_epoch += 1 if self.verbose: - progress_bar = tqdm(total=len(loader), - desc='Epoch-' - + str(self.current_epoch) - + ', Current Metric - ' - + str(self.current_metric_value), - file=sys.stdout, - leave=False, - ncols=100, - position=0, - unit=' batch') + progress_bar = self.init_progress_bar(len(loader)) else: progress_bar = None @@ -171,6 +170,12 @@ def _test(self): all_targets = [] all_predicted = [] loader = self.test_loader + + if self.verbose: + progress_bar = self.init_progress_bar(len(loader)) + else: + progress_bar = None + with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(deepcopy(loader)): inputs, targets = inputs.to(self.device), targets.to(self.device) @@ -180,6 +185,13 @@ def _test(self): all_predicted.append(outputs.cpu().numpy()) all_targets.append(targets.cpu().numpy()) + if self.verbose: + if batch_idx % 10 == 0: + progress_bar.update(10) + + if self.verbose: + progress_bar.close() + all_predicted = reduce(lambda x, y: np.concatenate((x, y)), all_predicted) all_targets = reduce(lambda x, y: np.concatenate((x, y)), all_targets) return test_loss, self.metric.compute(all_predicted, all_targets) @@ -190,6 +202,18 @@ def _save_model(self): def _load_model(self): self.model.load_state_dict(torch.load(self.temp_model_path)) + def init_progress_bar(self, loader_len): + return tqdm(total=loader_len, + desc='Epoch-' + + str(self.current_epoch) + + ', Current Metric - ' + + str(self.current_metric_value), + file=sys.stdout, + leave=False, + ncols=100, + position=0, + unit=' batch') + class GANModelTrainer(ModelTrainerBase): """A ModelTrainer especially for the GAN. @@ -202,6 +226,7 @@ class GANModelTrainer(ModelTrainerBase): optimizer_d: Optimizer for discriminator. optimizer_g: Optimizer for generator. """ + def __init__(self, g_model, d_model, @@ -325,6 +350,7 @@ class EarlyStop: _done: Whether condition met. _min_loss_dec: A threshold for loss improvement. """ + def __init__(self, max_no_improvement_num=Constant.MAX_NO_IMPROVEMENT_NUM, min_loss_dec=Constant.MIN_LOSS_DEC): super().__init__() self.training_losses = [] diff --git a/autokeras/search.py b/autokeras/search.py index 3ac9cc7c6..e6bf36f85 100644 --- a/autokeras/search.py +++ b/autokeras/search.py @@ -151,12 +151,6 @@ def init_search(self): self.model_count += 1 self.training_queue.append((graph, -1, model_id)) self.descriptors.append(graph.extract_descriptor()) - # if graph is not None and model_id is not None: - # for child_graph in default_transform(graph): - # child_id = self.model_count - # self.model_count += 1 - # self.training_queue.append((child_graph, model_id, child_id)) - # self.descriptors.append(child_graph.extract_descriptor()) if self.verbose: print('Initialization finished.') @@ -205,7 +199,7 @@ def search(self, train_data, test_data, timeout=60 * 60 * 24): searched = True remaining_time = timeout - (time.time() - start_time) - generated_other_info, generated_graph = self.generate(remaining_time) + generated_other_info, generated_graph = self.generate(remaining_time, q) new_model_id = self.model_count self.model_count += 1 self.training_queue.append((generated_graph, generated_other_info, new_model_id)) @@ -251,11 +245,12 @@ def update(self, other_info, graph, metric_value, model_id): self.bo.fit([graph.extract_descriptor()], [metric_value]) self.bo.add_child(father_id, model_id) - def generate(self, remaining_time): + def generate(self, remaining_time, multiprocessing_queue): """Generate the next neural architecture. Args: remaining_time: The remaining time in seconds. + multiprocessing_queue: the Queue for multiprocessing return value. Returns: other_info: Anything to be saved in the training queue together with the architecture. @@ -263,7 +258,7 @@ def generate(self, remaining_time): """ generated_graph, new_father_id = self.bo.generate(self.descriptors, - remaining_time) + remaining_time, multiprocessing_queue) if new_father_id is None: new_father_id = 0 generated_graph = self.generators[0](self.n_classes, self.input_shape). \ @@ -282,7 +277,7 @@ def export_json(self, path): tree = self.bo.search_tree.get_dict() # Saving the data to file. - data['networks'] = networks + # data['networks'] = networks data['tree'] = tree import json with open(path, 'w') as fp: diff --git a/autokeras/utils.py b/autokeras/utils.py index 896440949..cd4bb85d8 100644 --- a/autokeras/utils.py +++ b/autokeras/utils.py @@ -144,9 +144,9 @@ def verbose_print(new_father_id, new_graph): print('+' + '-' * len(line) + '+') for i in range(len(new_graph.operation_history)): if i == len(new_graph.operation_history) // 2: - r = [new_father_id, new_graph.operation_history[i]] + r = [new_father_id, ' '.join(str(item) for item in new_graph.operation_history[i])] else: - r = [' ', new_graph.operation_history[i]] + r = [' ', ' '.join(str(item) for item in new_graph.operation_history[i])] line = '|'.join(str(x).center(cell_size[i]) for i, x in enumerate(r)) print('|' + line + '|') print('+' + '-' * len(line) + '+') diff --git a/examples/mnist.py b/examples/mnist.py index ad7b963d7..b2ad5ef11 100644 --- a/examples/mnist.py +++ b/examples/mnist.py @@ -6,7 +6,7 @@ x_train = x_train.reshape(x_train.shape+(1,)) x_test = x_test.reshape(x_test.shape+(1,)) clf = ImageClassifier(verbose=True, augment=False) - clf.fit(x_train, y_train, time_limit=6 * 60) + clf.fit(x_train, y_train, time_limit=30 * 60) clf.final_fit(x_train, y_train, x_test, y_test, retrain=True) y = clf.evaluate(x_test, y_test) diff --git a/examples/visualize.py b/examples/visualize.py new file mode 100644 index 000000000..987036b73 --- /dev/null +++ b/examples/visualize.py @@ -0,0 +1,30 @@ +import os +from graphviz import Digraph + +from autokeras.utils import pickle_from_file + + +def to_pdf(graph, path): + dot = Digraph(comment='The Round Table') + + for index, node in enumerate(graph.node_list): + dot.node(str(index), str(node.shape)) + + for u in range(graph.n_nodes): + for v, layer_id in graph.adj_list[u]: + dot.edge(str(u), str(v), str(graph.layer_list[layer_id])) + + dot.render(path) + + +def visualize(path): + cnn_module = pickle_from_file(os.path.join(path, 'module')) + cnn_module.searcher.path = path + for item in cnn_module.searcher.history: + model_id = item['model_id'] + graph = cnn_module.searcher.load_model_by_id(model_id) + to_pdf(graph, os.path.join(path, str(model_id))) + + +if __name__ == '__main__': + visualize('') diff --git a/tests/common.py b/tests/common.py index 1ec66adeb..0a307327a 100644 --- a/tests/common.py +++ b/tests/common.py @@ -248,6 +248,7 @@ def __init__(self, target=None, args=None): self.target = target self.args = args self.result = None + self.count = 0 def join(self): pass @@ -267,7 +268,18 @@ def get_context(self, start_method='fork'): return self def Queue(self): - return queue.Queue() + class MockQueue(queue.Queue): + def __init__(self): + super().__init__() + self.count = 0 + + def qsize(self): + self.count += 1 + if self.count > 8: + return 1 + return 0 + # (0.5, 0.8, get_pooling_model()) + return MockQueue() def Process(self, target, args): self.target = target @@ -287,7 +299,7 @@ def start(self): def simple_transform(graph): - graph.to_wider_model(5, 64) + graph.to_wider_model(6, 64) return [deepcopy(graph)] diff --git a/tests/image/temp_test.py b/tests/image/temp_test.py deleted file mode 100644 index ef2056294..000000000 --- a/tests/image/temp_test.py +++ /dev/null @@ -1,49 +0,0 @@ -from unittest.mock import patch - -from autokeras.image.image_supervised import * -from tests.common import MockProcess, mock_train, TEST_TEMP_DIR - - -@patch('torch.multiprocessing.get_context', side_effect=MockProcess) -@patch('autokeras.search.ModelTrainer.train_model', side_effect=mock_train) -def test_fit_predict(_, _1): - Constant.MAX_ITER_NUM = 1 - Constant.MAX_MODEL_NUM = 4 - Constant.SEARCH_MAX_ITER = 1 - Constant.T_MIN = 0.8 - Constant.DATA_AUGMENTATION = False - - clf = ImageClassifier(path=TEST_TEMP_DIR, verbose=True) - train_x = np.random.rand(100, 25, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - results = clf.predict(train_x) - assert all(map(lambda result: result in train_y, results)) - - clf = ImageClassifier1D(path=TEST_TEMP_DIR, verbose=True) - train_x = np.random.rand(100, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - results = clf.predict(train_x) - assert all(map(lambda result: result in train_y, results)) - - clf = ImageClassifier3D(path=TEST_TEMP_DIR, verbose=True) - train_x = np.random.rand(100, 25, 25, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - results = clf.predict(train_x) - assert all(map(lambda result: result in train_y, results)) - - clf = ImageRegressor1D(path=TEST_TEMP_DIR, verbose=True) - train_x = np.random.rand(100, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - results = clf.predict(train_x) - assert len(results) == len(train_y) - - clf = ImageRegressor3D(path=TEST_TEMP_DIR, verbose=True) - train_x = np.random.rand(100, 25, 25, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - results = clf.predict(train_x) - assert len(results) == len(train_y) diff --git a/tests/image/test_image_supervised.py b/tests/image/test_image_supervised.py index ad3ed2adc..9a461d572 100644 --- a/tests/image/test_image_supervised.py +++ b/tests/image/test_image_supervised.py @@ -35,7 +35,6 @@ def test_fit_predict(_, _1): Constant.SEARCH_MAX_ITER = 1 Constant.T_MIN = 0.8 Constant.DATA_AUGMENTATION = False - clean_dir(TEST_TEMP_DIR) clf = ImageClassifier(path=TEST_TEMP_DIR, verbose=True) train_x = np.random.rand(100, 25, 25, 1) diff --git a/tests/nn/test_generator.py b/tests/nn/test_generator.py index e936ebe11..261cd553b 100644 --- a/tests/nn/test_generator.py +++ b/tests/nn/test_generator.py @@ -9,7 +9,7 @@ def test_default_cnn_generator(): graph = generator.generate() model = graph.produce_model() inputs = torch.Tensor(np.ones((100, 1, 28, 28))) - print(model(inputs).size()) + model(inputs).size() assert isinstance(model, TorchModel) diff --git a/tests/nn/test_graph.py b/tests/nn/test_graph.py index 21dedbd9d..7af4a9462 100644 --- a/tests/nn/test_graph.py +++ b/tests/nn/test_graph.py @@ -1,59 +1,10 @@ -from autokeras.nn.generator import CnnGenerator +from autokeras.nn.generator import CnnGenerator, ResNetGenerator from autokeras.nn.graph import * -from autokeras.net_transformer import legal_graph +from autokeras.nn.layers import StubBatchNormalization from tests.common import get_conv_data, get_add_skip_model, get_conv_dense_model, get_pooling_model, \ get_concat_skip_model -def test_conv_deeper_stub(): - graph = get_conv_dense_model() - layer_num = graph.n_layers - graph.to_conv_deeper_model(4, 3) - - assert graph.n_layers == layer_num + 3 - - -def test_conv_deeper(): - graph = get_conv_dense_model() - model = graph.produce_model() - graph = deepcopy(graph) - graph.to_conv_deeper_model(4, 3) - new_model = graph.produce_model() - input_data = torch.Tensor(get_conv_data()) - - model.eval() - new_model.eval() - output1 = model(input_data) - output2 = new_model(input_data) - - assert (output1 - output2).abs().sum() < 1e-1 - - -def test_dense_deeper_stub(): - graph = get_conv_dense_model() - graph.weighted = False - layer_num = graph.n_layers - graph.to_dense_deeper_model(9) - - assert graph.n_layers == layer_num + 2 - - -def test_dense_deeper(): - graph = get_conv_dense_model() - model = graph.produce_model() - graph = deepcopy(graph) - graph.to_dense_deeper_model(9) - new_model = graph.produce_model() - input_data = torch.Tensor(get_conv_data()) - - model.eval() - new_model.eval() - output1 = model(input_data) - output2 = new_model(input_data) - - assert (output1 - output2).abs().sum() < 1e-3 - - def test_conv_wider_stub(): graph = get_add_skip_model() graph.weighted = False @@ -112,7 +63,7 @@ def test_skip_add_over_pooling_stub(): layer_num = graph.n_layers graph.to_add_skip_model(1, 8) - assert graph.n_layers == layer_num + 5 + assert graph.n_layers == layer_num + 4 def test_skip_add_over_pooling(): @@ -138,7 +89,7 @@ def test_skip_concat_over_pooling_stub(): layer_num = graph.n_layers graph.to_concat_skip_model(1, 11) - assert graph.n_layers == layer_num + 5 + assert graph.n_layers == layer_num + 4 def test_skip_concat_over_pooling(): @@ -161,22 +112,21 @@ def test_skip_concat_over_pooling(): def test_extract_descriptor_add(): descriptor = get_add_skip_model().extract_descriptor() - assert descriptor.n_conv == 5 - assert descriptor.n_dense == 2 - assert descriptor.skip_connections == [(2, 3, NetworkDescriptor.ADD_CONNECT), (3, 4, NetworkDescriptor.ADD_CONNECT)] + assert len(descriptor.layers) == 24 + assert descriptor.skip_connections == [(6, 10, NetworkDescriptor.ADD_CONNECT), + (10, 14, NetworkDescriptor.ADD_CONNECT)] def test_extract_descriptor_concat(): descriptor = get_concat_skip_model().extract_descriptor() - assert descriptor.n_conv == 5 - assert descriptor.n_dense == 2 - assert descriptor.skip_connections == [(2, 3, NetworkDescriptor.CONCAT_CONNECT), - (3, 4, NetworkDescriptor.CONCAT_CONNECT)] + assert len(descriptor.layers) == 32 + assert descriptor.skip_connections == [(6, 10, NetworkDescriptor.CONCAT_CONNECT), + (13, 17, NetworkDescriptor.CONCAT_CONNECT)] def test_deep_layer_ids(): graph = get_conv_dense_model() - assert len(graph.deep_layer_ids()) == 3 + assert len(graph.deep_layer_ids()) == 13 def test_wide_layer_ids(): @@ -186,7 +136,7 @@ def test_wide_layer_ids(): def test_skip_connection_layer_ids(): graph = get_conv_dense_model() - assert len(graph.skip_connection_layer_ids()) == 1 + assert len(graph.skip_connection_layer_ids()) == 12 def test_wider_dense(): @@ -196,17 +146,7 @@ def test_wider_dense(): for args in history: getattr(graph, args[0])(*list(args[1:])) graph.produce_model() - assert legal_graph(graph) - - -def test_long_transform(): - graph = CnnGenerator(10, (32, 32, 3)).generate() - history = [('to_wider_model', 1, 256), ('to_conv_deeper_model', 1, 3), - ('to_concat_skip_model', 5, 9)] - for args in history: - getattr(graph, args[0])(*list(args[1:])) - graph.produce_model() - assert legal_graph(graph) + assert graph.layer_list[14].output.shape[-1] == 128 def test_node_consistency(): @@ -216,26 +156,8 @@ def test_node_consistency(): for layer in graph.layer_list: assert layer.output.shape == layer.output_shape - graph.to_wider_model(5, 64) - assert graph.layer_list[5].output.shape == (16, 16, 128) - - for layer in graph.layer_list: - assert layer.output.shape == layer.output_shape - - graph.to_conv_deeper_model(5, 3) - assert graph.layer_list[19].output.shape == (16, 16, 128) - - for layer in graph.layer_list: - assert layer.output.shape == layer.output_shape - - graph.to_add_skip_model(5, 18) - assert graph.layer_list[23].output.shape == (16, 16, 128) - - for layer in graph.layer_list: - assert layer.output.shape == layer.output_shape - - graph.to_concat_skip_model(5, 18) - assert graph.layer_list[25].output.shape == (16, 16, 256) + graph.to_wider_model(6, 64) + assert graph.layer_list[6].output.shape == (16, 16, 128) for layer in graph.layer_list: assert layer.output.shape == layer.output_shape @@ -262,4 +184,14 @@ def test_keras_model(): def test_graph_size(): graph = CnnGenerator(10, (32, 32, 3)).generate() - assert graph.size() == 7498 + assert graph.size() == 7254 + + +def test_long_transform(): + graph = ResNetGenerator(10, (28, 28, 1)).generate() + graph.to_deeper_model(16, StubReLU()) + graph.to_deeper_model(16, StubReLU()) + graph.to_add_skip_model(13, 47) + model = graph.produce_model() + model(torch.Tensor(np.random.random((10, 1, 28, 28)))) + diff --git a/tests/nn/test_layer_transformer.py b/tests/nn/test_layer_transformer.py index 415b2ffb3..701725691 100644 --- a/tests/nn/test_layer_transformer.py +++ b/tests/nn/test_layer_transformer.py @@ -5,17 +5,6 @@ from tests.common import get_conv_dense_model -def test_deeper_conv_block(): - graph = CnnGenerator(10, (28, 28, 3)).generate() - layers = deeper_conv_block(graph.layer_list[1], 3) - assert len(layers) == Constant.CONV_BLOCK_DISTANCE + 1 - - -def test_dense_to_deeper_layer(): - a = StubDense(100, 100) - assert len(dense_to_deeper_block(a)) == 2 - - def test_dense_to_wider_layer(): a = StubDense(10, 5) a.set_weights((np.random.rand(10, 5), np.random.rand(5))) @@ -49,6 +38,6 @@ def test_wider_conv(): model.set_weight_to_graph() graph = model.graph - assert isinstance(wider_pre_conv(graph.layer_list[1], 3), StubConv2d) - assert isinstance(wider_bn(graph.layer_list[2], 3, 3, 3), StubBatchNormalization2d) - assert isinstance(wider_next_conv(graph.layer_list[5], 3, 3, 3), StubConv2d) + assert isinstance(wider_pre_conv(graph.layer_list[2], 3), StubConv2d) + assert isinstance(wider_bn(graph.layer_list[5], 3, 3, 3), StubBatchNormalization2d) + assert isinstance(wider_next_conv(graph.layer_list[6], 3, 3, 3), StubConv2d) diff --git a/tests/test_bayesian.py b/tests/test_bayesian.py index e3572e5c6..417edc6e6 100644 --- a/tests/test_bayesian.py +++ b/tests/test_bayesian.py @@ -1,21 +1,27 @@ from autokeras.bayesian import * +from autokeras.nn.layers import StubConv from tests.common import get_add_skip_model, get_concat_skip_model, get_conv_dense_model +def test_layer_distance(): + layer1 = StubConv(5, 5, 3, 2) + layer2 = StubConv(5, 1, 1, 1) + assert layer_distance(layer1, layer2) == 5.9 / 9 + + def test_edit_distance(): descriptor1 = get_add_skip_model().extract_descriptor() descriptor2 = get_concat_skip_model().extract_descriptor() - assert edit_distance(descriptor1, descriptor2) == 2.0 + assert edit_distance(descriptor1, descriptor2) == 12.0 def test_edit_distance2(): descriptor1 = get_conv_dense_model().extract_descriptor() graph = get_conv_dense_model() - graph.to_conv_deeper_model(1, 3) graph.to_wider_model(4, 6) - graph.to_wider_model(14, 3) + graph.to_wider_model(9, 3) descriptor2 = graph.extract_descriptor() - assert edit_distance(descriptor1, descriptor2) == 1.5 + assert edit_distance(descriptor1, descriptor2) == 2.0 / 9 def test_bourgain_embedding(): diff --git a/tests/test_net_transformer.py b/tests/test_net_transformer.py index ec6bb757d..31af3d96a 100644 --- a/tests/test_net_transformer.py +++ b/tests/test_net_transformer.py @@ -29,27 +29,3 @@ def test_skip(): def test_transform(): models = transform(get_pooling_model()) assert len(models) == Constant.N_NEIGHBOURS - - -def test_legal_graph(): - graph = get_pooling_model() - graph.to_add_skip_model(1, 4) - assert legal_graph(graph) - graph.to_add_skip_model(1, 4) - assert not legal_graph(graph) - - -def test_legal_graph2(): - graph = get_pooling_model() - graph.to_concat_skip_model(1, 4) - assert legal_graph(graph) - graph.to_concat_skip_model(1, 4) - assert not legal_graph(graph) - - -def test_default_transform(): - graphs = default_transform(CnnGenerator(10, (32, 32, 3)).generate()) - model = graphs[0].produce_model() - model(torch.Tensor(get_conv_data())) - assert len(graphs) == 1 - assert len(graphs[0].layer_list) == 43 diff --git a/tests/test_search.py b/tests/test_search.py index d6e58bca2..ac4063b88 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -19,14 +19,14 @@ def mock_train(**_): def test_bayesian_searcher(_, _1, _2): train_data, test_data = get_classification_data_loaders() clean_dir(TEST_TEMP_DIR) - generator = Searcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, - loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) + searcher = Searcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, + loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) Constant.N_NEIGHBOURS = 1 Constant.T_MIN = 0.8 for _ in range(2): - generator.search(train_data, test_data) + searcher.search(train_data, test_data) clean_dir(TEST_TEMP_DIR) - assert len(generator.history) == 2 + assert len(searcher.history) == 2 @patch('torch.multiprocessing.get_context', side_effect=MockProcess) @@ -62,7 +62,6 @@ def test_export_json(_, _1, _2): generator.export_json(file_path) import json data = json.load(open(file_path, 'r')) - assert len(data['networks']) == 3 assert len(data['tree']['children']) == 2 clean_dir(TEST_TEMP_DIR) assert len(generator.history) == 3 @@ -83,11 +82,7 @@ def test_max_acq(_, _2): generator.search(train_data, test_data) for index1, descriptor1 in enumerate(generator.descriptors): for descriptor2 in generator.descriptors[index1 + 1:]: - print(descriptor2.skip_connections) - print(descriptor1.skip_connections) - print('conv_width2', descriptor2.conv_widths) - print('conv_widt1', descriptor1.conv_widths) - assert edit_distance(descriptor1, descriptor2) != 0 + assert edit_distance(descriptor1, descriptor2) != 0.0 clean_dir(TEST_TEMP_DIR)