diff --git a/README.md b/README.md index b0064510..74c6552c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Hourglass and CPN model in TensorFlow for 2018-FashionAI Key Points Detection of Apparel at TianChi +# Hourglass, DHN and CPN model in TensorFlow for 2018-FashionAI Key Points Detection of Apparel at TianChi -This repository contains codes of the re-implementent of [Stacked Hourglass Networks for Human Pose Estimation](https://arxiv.org/abs/1603.06937) and [Cascaded Pyramid Network for Multi-Person Pose Estimation](https://arxiv.org/abs/1711.07319) in TensorFlow for [FashionAI Global Challenge 2018 - Key Points Detection of Apparel](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.11409106.5678.1.95b62e48Im9JVH&raceId=231648). The CPN(Cascaded Pyramid Network) here has several different backbones: ResNet50, SE-ResNet50, SE-ResNeXt50, [DetNet](https://arxiv.org/abs/1804.06215) or DetResNeXt50. I have also tried [Averaging Weights Leads to Wider Optima and Better Generalization](https://arxiv.org/abs/1803.05407) to ensemble models on the fly, although limited improvement was achieved. +This repository contains codes of the re-implementent of [Stacked Hourglass Networks for Human Pose Estimation](https://arxiv.org/abs/1603.06937), [Simple Baselines for Human Pose Estimation and Tracking (Deconvolution Head Network)](https://arxiv.org/abs/1804.06208) and [Cascaded Pyramid Network for Multi-Person Pose Estimation](https://arxiv.org/abs/1711.07319) in TensorFlow for [FashionAI Global Challenge 2018 - Key Points Detection of Apparel](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.11409106.5678.1.95b62e48Im9JVH&raceId=231648). Both the CPN(Cascaded Pyramid Network) and DHN (Deconvolution Head Network) here has several different backbones: ResNet50, SE-ResNet50, SE-ResNeXt50, [DetNet](https://arxiv.org/abs/1804.06215) or DetResNeXt50. I have also tried [Averaging Weights Leads to Wider Optima and Better Generalization](https://arxiv.org/abs/1803.05407) to ensemble models on the fly, although limited improvement was achieved. The pre-trained models of backbone networks can be found here: @@ -19,6 +19,7 @@ Almost all the codes was writen by myself and tested under TensorFlow 1.6, Pytho About the model: - DetNet is better, perform almost the same as SEResNeXt, while SEResNet showed little improvement than ResNet +- DHN has at least the same performance as CPN, but lack of thorough testing due to the limited time - Enforce the loss of invisible keypoints to zero gave better performance - OHKM is useful - It's bad to do gaussian blur on the predicted heatmap, but it's better to do gaussian blur on the target heatmaps for lower-level prediction @@ -66,9 +67,9 @@ If you find it's useful to your research or competitions, any contribution or st - train_2 -> fashionAI_key_points_test_a_20180227.tar - train_3 -> fashionAI_key_points_test_b_20180418.tgz - test_0 -> round2_fashionAI_key_points_test_a_20180426.tar - - test_1 -> round2_fashionAI_key_points_test_b_20180601.tar + - test_1 -> round2_fashionAI_key_points_test_b_20180530.zip.zip -- set your local dataset path in [config.py](https://github.com/HiKapok/tf.fashionAI/blob/e90c5b0072338fa638c56ae788f7146d3f36cb1f/config.py#L20) +- set your local dataset path in [config.py](https://github.com/HiKapok/tf.fashionAI/blob/e90c5b0072338fa638c56ae788f7146d3f36cb1f/config.py#L20), and then run convert_tfrecords.py to generate *.tfrecords - create one file foler named 'model' under the root path of your codes, download all the pre-trained weights of the backbone networks and put them into different sub-folders named 'resnet50', 'seresnet50' and 'seresnext50'. Then start training(set RECORDS_DATA_DIR and TEST_RECORDS_DATA_DIR according to your [config.py](https://github.com/HiKapok/tf.fashionAI/blob/e90c5b0072338fa638c56ae788f7146d3f36cb1f/config.py#L20)): ```sh python train_detxt_cpn_onebyone.py --run_on_cloud=False --data_dir=RECORDS_DATA_DIR diff --git a/config.py b/config.py index 52ba098a..2a27258c 100644 --- a/config.py +++ b/config.py @@ -20,6 +20,7 @@ DATA_DIR = '../Datasets' RECORDS_DATA_DIR = '../Datasets/tfrecords' TEST_RECORDS_DATA_DIR = '../Datasets/tfrecords_test' +TEST_RECORDS_STAGE2 = '../Datasets/tfrecords_test_stage2' CATEGORIES = ['blouse', 'dress', 'outwear', 'skirt', 'trousers'] SPLITS = ['test_0', 'train_1', 'train_2', 'train_3']#'train_0', @@ -300,30 +301,31 @@ # {'trousers': 10251, 'skirt': 11649, 'blouse': 11109, 'dress': 9002, 'outwear': 9586} 51597 # warm-up {'trousers': 2795, 'skirt': 2292, 'blouse': 2997, 'dress': 2312, 'outwear': 2138} 12534 # test_a {'trousers': 2631, 'skirt': 2683, 'blouse': 2586, 'dress': 2693, 'outwear': 2508} 13101 +# test_b {'outwear': 10906, 'trousers': 10618, 'dress': 11096, 'skirt': 11154, 'blouse': 10670} 54444 split_size = { '*': {'train': 51597+12534, 'val': 0, - 'test': 13101, - 'test_a': 9970}, + 'test': 54444, + 'test_a': 13101}, 'blouse': {'train': 11109+2997, 'val': 0, - 'test': 2586, - 'test_a': 1974}, + 'test': 10670, + 'test_a': 2586}, 'dress': {'train': 9002+2312, 'val': 0, - 'test': 2693, - 'test_a': 2052}, + 'test': 11096, + 'test_a': 2693}, 'outwear': {'train': 9586+2138, 'val': 0, - 'test': 2508, - 'test_a': 1947}, + 'test': 10906, + 'test_a': 2508}, 'skirt': {'train': 11649+2292, 'val': 0, - 'test': 2683, - 'test_a': 2051}, + 'test': 11154, + 'test_a': 2683}, 'trousers': {'train': 10251+2795, 'val': 0, - 'test': 2631, - 'test_a': 1946}, + 'test': 10618, + 'test_a': 2631}, } diff --git a/convert_tfrecords.py b/convert_tfrecords.py index d9aae33a..f54d0bd0 100644 --- a/convert_tfrecords.py +++ b/convert_tfrecords.py @@ -313,17 +313,24 @@ def count_split_examples(split_path, file_pattern=''): if __name__ == '__main__': np.random.seed(RANDOM_SEED) - #convert_test('../Datasets/tfrecords_test_stage1_b', splits=['test_stage1_b']) - os.mkdir(config.RECORDS_DATA_DIR) - convert_train(config.RECORDS_DATA_DIR, val_per=0.) - convert_train(config.RECORDS_DATA_DIR, val_per=0., all_splits=config.WARM_UP_SPLITS, file_idx_start=1000) - os.mkdir(config.TEST_RECORDS_DATA_DIR) - convert_test(config.TEST_RECORDS_DATA_DIR) - print('blouse', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='blouse_0000_val') - , 'outwear', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='outwear_0000_val') - , 'dress', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='dress_0000_val') - , 'skirt', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='skirt_0000_val') - , 'trousers', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='trousers_0000_val') - , 'all', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='val')) + convert_test(config.TEST_RECORDS_STAGE2, splits=['test_1']) + print('blouse', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='blouse') + , 'outwear', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='outwear') + , 'dress', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='dress') + , 'skirt', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='skirt') + , 'trousers', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='trousers') + , 'all', count_split_examples(config.TEST_RECORDS_STAGE2, file_pattern='_')) + + # os.mkdir(config.RECORDS_DATA_DIR) + # convert_train(config.RECORDS_DATA_DIR, val_per=0.) + # convert_train(config.RECORDS_DATA_DIR, val_per=0., all_splits=config.WARM_UP_SPLITS, file_idx_start=1000) + # os.mkdir(config.TEST_RECORDS_DATA_DIR) + # convert_test(config.TEST_RECORDS_DATA_DIR) + # print('blouse', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='blouse_0000_val') + # , 'outwear', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='outwear_0000_val') + # , 'dress', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='dress_0000_val') + # , 'skirt', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='skirt_0000_val') + # , 'trousers', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='trousers_0000_val') + # , 'all', count_split_examples(config.RECORDS_DATA_DIR, file_pattern='val')) # test_dataset() diff --git a/depth_conv2d.py b/depth_conv2d.py new file mode 100644 index 00000000..f1e1fdcd --- /dev/null +++ b/depth_conv2d.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# modified from tensorflow/contrib/layers/python/layers/layers.py + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.framework.python.ops import variables +from tensorflow.contrib.layers.python.layers import initializers +from tensorflow.contrib.layers.python.layers import utils +from tensorflow.python.framework import ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import variable_scope + +DATA_FORMAT_NCHW = 'NCHW' +DATA_FORMAT_NHWC = 'NHWC' +DATA_FORMAT_NCDHW = 'NCDHW' +DATA_FORMAT_NDHWC = 'NDHWC' + +def _model_variable_getter(getter, + name, + shape=None, + dtype=None, + initializer=None, + regularizer=None, + trainable=True, + collections=None, + caching_device=None, + partitioner=None, + rename=None, + use_resource=None, + **_): + """Getter that uses model_variable for compatibility with core layers.""" + short_name = name.split('/')[-1] + if rename and short_name in rename: + name_components = name.split('/') + name_components[-1] = rename[short_name] + name = '/'.join(name_components) + return variables.model_variable( + name, + shape=shape, + dtype=dtype, + initializer=initializer, + regularizer=regularizer, + collections=collections, + trainable=trainable, + caching_device=caching_device, + partitioner=partitioner, + custom_getter=getter, + use_resource=use_resource) + + +def _build_variable_getter(rename=None): + """Build a model variable getter that respects scope getter and renames.""" + + # VariableScope will nest the getters + def layer_variable_getter(getter, *args, **kwargs): + kwargs['rename'] = rename + return _model_variable_getter(getter, *args, **kwargs) + + return layer_variable_getter + +def depth_conv2d( + inputs, + kernel_size, + stride=1, + channel_multiplier=1, + padding='SAME', + data_format=DATA_FORMAT_NHWC, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + + if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): + raise ValueError('data_format has to be either NCHW or NHWC.') + layer_variable_getter = _build_variable_getter({ + 'bias': 'biases', + 'depthwise_kernel': 'depthwise_weights' + }) + + with variable_scope.variable_scope( + scope, + 'SeparableConv2d', [inputs], + reuse=reuse, + custom_getter=layer_variable_getter) as sc: + inputs = ops.convert_to_tensor(inputs) + + df = ('channels_first' + if data_format and data_format.startswith('NC') else 'channels_last') + + # Actually apply depthwise conv instead of separable conv. + dtype = inputs.dtype.base_dtype + kernel_h, kernel_w = utils.two_element_tuple(kernel_size) + stride_h, stride_w = utils.two_element_tuple(stride) + num_filters_in = utils.channel_dimension( + inputs.get_shape(), df, min_rank=4) + weights_collections = utils.get_variable_collections( + variables_collections, 'weights') + + depthwise_shape = [kernel_h, kernel_w, num_filters_in, channel_multiplier] + depthwise_weights = variables.model_variable( + 'depthwise_weights', + shape=depthwise_shape, + dtype=dtype, + initializer=weights_initializer, + regularizer=weights_regularizer, + trainable=trainable, + collections=weights_collections) + strides = [1, 1, stride_h, stride_w] if data_format.startswith('NC') else [1, stride_h, stride_w, 1] + + outputs = nn.depthwise_conv2d( + inputs, + depthwise_weights, + strides, + padding, + rate=utils.two_element_tuple(rate), + data_format=data_format) + num_outputs = num_filters_in + + if normalizer_fn is not None: + normalizer_params = normalizer_params or {} + outputs = normalizer_fn(outputs, **normalizer_params) + else: + if biases_initializer is not None: + biases_collections = utils.get_variable_collections( + variables_collections, 'biases') + biases = variables.model_variable( + 'biases', + shape=[ + num_outputs, + ], + dtype=dtype, + initializer=biases_initializer, + regularizer=biases_regularizer, + trainable=trainable, + collections=biases_collections) + outputs = nn.bias_add(outputs, biases, data_format=data_format) + + if activation_fn is not None: + outputs = activation_fn(outputs) + return utils.collect_named_outputs(outputs_collections, sc.name, outputs) diff --git a/eval_all_cpn_onepass.py b/eval_all_cpn_onepass.py index 8ed922fe..12345c0f 100644 --- a/eval_all_cpn_onepass.py +++ b/eval_all_cpn_onepass.py @@ -17,6 +17,7 @@ from __future__ import print_function import os +import time import sys import numpy as np import pandas as pd @@ -27,6 +28,7 @@ from net import detxt_cpn from net import seresnet_cpn from net import cpn +from net import simple_xt from utility import train_helper @@ -48,7 +50,7 @@ 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') # scaffold related configuration tf.app.flags.DEFINE_string( - 'data_dir', '../Datasets/tfrecords_test',#tfrecords_test tfrecords_test_stage1_b + 'data_dir', '../Datasets/tfrecords_test_stage2',#tfrecords_test tfrecords_test_stage1_b tfrecords_test_stage2 'The directory where the dataset input data is stored.') tf.app.flags.DEFINE_string( 'dataset_name', '{}_*.tfrecord', 'The pattern of the dataset name to load.') @@ -97,7 +99,7 @@ 'model_scope', 'blouse', 'Model scope name used to replace the name_scope in checkpoint.') tf.app.flags.DEFINE_boolean( - 'run_on_cloud', True, + 'run_on_cloud', False, 'Wether we will train on cloud.') tf.app.flags.DEFINE_string( 'model_to_eval', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', @@ -106,6 +108,7 @@ #--model_scope=blouse --checkpoint_path=./logs/blouse FLAGS = tf.app.flags.FLAGS +#print(FLAGS.data_dir) all_models = { 'resnet50_cpn': {'backbone': cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_cpn'}, 'detnet50_cpn': {'backbone': detnet_cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_detnet_cpn'}, @@ -116,6 +119,8 @@ 'logs_sub_dir': 'logs_large_sext_cpn'}, 'large_detnext_cpn': {'backbone': lambda inputs, output_channals, heatmap_size, istraining, data_format : detxt_cpn.cascaded_pyramid_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101), 'logs_sub_dir': 'logs_large_detxt_cpn'}, + 'simple_net': {'backbone': lambda inputs, output_channals, heatmap_size, istraining, data_format : simple_xt.simple_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101), + 'logs_sub_dir': 'logs_simple_net'}, 'head_seresnext50_cpn': {'backbone': seresnet_cpn.head_xt_cascaded_pyramid_net, 'logs_sub_dir': 'logs_head_sext_cpn'}, } @@ -443,10 +448,12 @@ def main(_): for m in model_to_eval[1:]: if m == '': continue df_list.append(pd.read_csv('./{}_{}.csv'.format(FLAGS.backbone.strip(), m), encoding='utf-8')) - pd.concat(df_list, ignore_index=True).to_csv('./{}_sub.csv'.format(FLAGS.backbone.strip()), encoding='utf-8', index=False) + + time_stamps = int(time.time()) + pd.concat(df_list, ignore_index=True).to_csv('./{}_sub_{}.csv'.format(FLAGS.backbone.strip(), time_stamps), encoding='utf-8', index=False) if FLAGS.run_on_cloud: - tf.gfile.Copy('./{}_sub.csv'.format(FLAGS.backbone.strip()), os.path.join(full_model_dir, '{}_sub.csv'.format(FLAGS.backbone.strip())), overwrite=True) + tf.gfile.Copy('./{}_sub_{}.csv'.format(FLAGS.backbone.strip(), time_stamps), os.path.join(full_model_dir, '{}_sub_{}.csv'.format(FLAGS.backbone.strip(), time_stamps)), overwrite=True) if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) diff --git a/eval_cpn.py b/eval_all_cpn_simple.py similarity index 61% rename from eval_cpn.py rename to eval_all_cpn_simple.py index d2948bd5..30059426 100644 --- a/eval_cpn.py +++ b/eval_all_cpn_simple.py @@ -23,7 +23,12 @@ #from scipy.misc import imread, imsave, imshow, imresize import tensorflow as tf -from net import cpn as cpn +from net import detnet_cpn +from net import detxt_cpn +from net import seresnet_cpn +from net import cpn +from net import simple_xt + from utility import train_helper from preprocessing import preprocessing @@ -44,13 +49,16 @@ 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') # scaffold related configuration tf.app.flags.DEFINE_string( - 'data_dir', '../Datasets/tfrecords_test', + 'data_dir', '../Datasets/tfrecords_test_stage2',#tfrecords_test tfrecords_test_stage1_b tfrecords_test_stage2 'The directory where the dataset input data is stored.') tf.app.flags.DEFINE_string( 'dataset_name', '{}_*.tfrecord', 'The pattern of the dataset name to load.') tf.app.flags.DEFINE_string( - 'model_dir', './logs_cpn/', + 'model_dir', '.', 'The parent directory where the model will be stored.') +tf.app.flags.DEFINE_string( + 'backbone', 'detnet50_cpn', + 'The backbone network to use for feature extraction.') tf.app.flags.DEFINE_integer( 'log_every_n_steps', 10, 'The frequency with which logs are print.') @@ -82,18 +90,15 @@ tf.app.flags.DEFINE_string( 'checkpoint_path', None, 'The path to a checkpoint from which to fine-tune.') -tf.app.flags.DEFINE_string( - 'coarse_pred_path', None, - 'The path to a pred csv file from which to crop the input image for finer prediction.') tf.app.flags.DEFINE_boolean( - 'flip_on_test', False, + 'flip_on_test', True, 'Wether we will average predictions of left-right fliped image.') tf.app.flags.DEFINE_string( #'blouse', 'dress', 'outwear', 'skirt', 'trousers', 'all' 'model_scope', 'blouse', 'Model scope name used to replace the name_scope in checkpoint.') tf.app.flags.DEFINE_boolean( - 'run_on_cloud', True, + 'run_on_cloud', False, 'Wether we will train on cloud.') tf.app.flags.DEFINE_string( 'model_to_eval', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', @@ -102,53 +107,25 @@ #--model_scope=blouse --checkpoint_path=./logs/blouse FLAGS = tf.app.flags.FLAGS -def preprocessing_fn(org_image, file_name, shape): - pd_df = None - if FLAGS.coarse_pred_path is not None: - if tf.gfile.Exists(FLAGS.coarse_pred_path): - tf.logging.info('Finetuning Prediction From {}.'.format(FLAGS.coarse_pred_path)) - tf.gfile.Copy(FLAGS.coarse_pred_path, './__coarse_pred.csv', overwrite=True) - pd_df = pd.read_csv('./__coarse_pred.csv', encoding='utf-8') - - all_filenames = [] - all_xmin = [] - all_ymin = [] - all_xmax = [] - all_ymax = [] - - all_values = pd_df.values.tolist() - for records in all_values: - all_filenames.append(records[0].encode('utf8')) - xmin = 2000 - ymin = 2000 - xmax = -1 - ymax = -1 - for kp in records[2:]: - keypoint_info = kp.strip().split('_') - if int(keypoint_info[2]) == -1: - continue - xmin = min(xmin, int(keypoint_info[0])) - ymin = min(ymin, int(keypoint_info[1])) - xmax = max(xmax, int(keypoint_info[0])) - ymax = max(ymax, int(keypoint_info[1])) - all_xmin.append(xmin) - all_ymin.append(ymin) - all_xmax.append(xmax) - all_ymax.append(ymax) - #print(all_filenames, all_xmin, all_ymin, all_xmax, all_ymax) - xmin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmin, dtype=tf.int64)), -1) - ymin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymin, dtype=tf.int64)), -1) - xmax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmax, dtype=tf.int64)), -1) - ymax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymax, dtype=tf.int64)), -1) - pd_df = [xmin_table, ymin_table, xmax_table, ymax_table] - #pred_item['file_name'].encode('utf8') - - #lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), tf.constant(config.global_norm_lvalues, dtype=tf.int64)), 0) - return preprocessing.preprocess_for_test(org_image, file_name, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size, pred_df=pd_df) +all_models = { + 'resnet50_cpn': {'backbone': cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_cpn'}, + 'detnet50_cpn': {'backbone': detnet_cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_detnet_cpn'}, + 'seresnet50_cpn': {'backbone': seresnet_cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_se_cpn'}, + 'seresnext50_cpn': {'backbone': seresnet_cpn.xt_cascaded_pyramid_net, 'logs_sub_dir': 'logs_sext_cpn'}, + 'detnext50_cpn': {'backbone': detxt_cpn.cascaded_pyramid_net, 'logs_sub_dir': 'logs_detxt_cpn'}, + 'large_seresnext_cpn': {'backbone': lambda inputs, output_channals, heatmap_size, istraining, data_format : seresnet_cpn.xt_cascaded_pyramid_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101), + 'logs_sub_dir': 'logs_large_sext_cpn'}, + 'large_detnext_cpn': {'backbone': lambda inputs, output_channals, heatmap_size, istraining, data_format : detxt_cpn.cascaded_pyramid_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101), + 'logs_sub_dir': 'logs_large_detxt_cpn'}, + 'simple_net': {'backbone': lambda inputs, output_channals, heatmap_size, istraining, data_format : simple_xt.simple_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101), + 'logs_sub_dir': 'logs_simple_net'}, + 'head_seresnext50_cpn': {'backbone': seresnet_cpn.head_xt_cascaded_pyramid_net, 'logs_sub_dir': 'logs_head_sext_cpn'}, +} + def input_pipeline(model_scope=FLAGS.model_scope): - #preprocessing_fn = lambda org_image, shape: preprocessing.preprocess_for_test(org_image, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) + preprocessing_fn = lambda org_image, file_name, shape: preprocessing.preprocess_for_test_raw_output(org_image, file_name, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) - images, shape, file_name, classid, offsets = dataset.slim_test_get_split(FLAGS.data_dir, preprocessing_fn, FLAGS.num_readers, FLAGS.num_preprocessing_threads, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None) + images, shape, file_name, classid, offsets = dataset.slim_test_get_split(FLAGS.data_dir, None, FLAGS.num_readers, FLAGS.num_preprocessing_threads, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None, dynamic_pad=True) return {'images': images, 'shape': shape, 'classid': classid, 'file_name': file_name, 'pred_offsets': offsets} @@ -190,33 +167,15 @@ def save_image_with_heatmap(image, height, width, heatmap_size, heatmap, predict imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img.astype(np.uint8)) return save_image_with_heatmap.counter -def gaussian_blur(inputs, inputs_filters, sigma, data_format, name=None): - with tf.name_scope(name, "gaussian_blur", [inputs]): - data_format_ = 'NHWC' if data_format=='channels_last' else 'NCHW' - if data_format_ == 'NHWC': - inputs = tf.transpose(inputs, [0, 2, 3, 1]) - ksize = int(6 * sigma + 1.) - x = tf.expand_dims(tf.range(ksize, delta=1, dtype=tf.float32), axis=1) - y = tf.transpose(x, [1, 0]) - kernel_matrix = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * sigma ** 2)) - #print(kernel_matrix) - kernel_filter = tf.reshape(kernel_matrix, [ksize, ksize, 1, 1]) - kernel_filter = tf.tile(kernel_filter, [1, 1, inputs_filters, 1]) - #kernel_filter = tf.transpose(kernel_filter, [1, 0, 2, 3]) - outputs = tf.nn.depthwise_conv2d(inputs, kernel_filter, strides=[1, 1, 1, 1], padding='SAME', data_format=data_format_, name='blur') - if data_format_ == 'NHWC': - outputs = tf.transpose(outputs, [0, 3, 1, 2]) - return outputs - -def get_keypoint(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): +def get_keypoint(image, predictions, heatmap_size, height, width, category, clip_at_zero=False, data_format='channels_last', name=None): # expand_border = 10 - # pad_pred = tf.pad(predictions, tf.constant([[0, 0], [0, 0], [expand_border, expand_border], [expand_border, expand_border]]), # mode='CONSTANT', name='pred_padding', constant_values=0) # blur_pred = gaussian_blur(pad_pred, config.class_num_joints[category], 3.5, 'channels_first', 'pred_blur') # predictions = tf.slice(blur_pred, [0, 0, expand_border, expand_border], [1, config.class_num_joints[category], heatmap_size, heatmap_size]) + predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) pred_max = tf.reduce_max(predictions, axis=-1) @@ -267,91 +226,65 @@ def get_keypoint(image, predictions, heatmap_size, height, width, category, clip pred_x, pred_y = pred_x * 1., pred_y * 1. return pred_x, pred_y -def get_keypoint_v0(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_indices = tf.argmax(predictions, axis=-1) - pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - pred_x, pred_y = pred_x * width / tf.cast(heatmap_size, tf.float32), pred_y * height / tf.cast(heatmap_size, tf.float32) - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - pred_indices_ = tf.squeeze(pred_indices) - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y +backbone_ = all_models[FLAGS.backbone.strip()]['backbone'] def keypoint_model_fn(features, labels, mode, params): #print(features) shape = features['shape'] classid = features['classid'] - pred_offsets = tf.to_float(features['pred_offsets']) file_name = features['file_name'] features = features['images'] file_name = tf.identity(file_name, name='current_file') + image = preprocessing.preprocess_for_test_raw_output(features, params['train_image_size'], params['train_image_size'], data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), scope='first_stage') + if not params['flip_on_test']: - with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) + with tf.variable_scope(params['model_scope'], default_name=None, values=[image], reuse=tf.AUTO_REUSE): + pred_outputs = backbone_(image, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) if params['data_format'] == 'channels_last': pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] + + pred_x, pred_y = get_keypoint(image, pred_outputs[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=False, data_format=params['data_format']) else: # test augumentation on the fly if params['data_format'] == 'channels_last': - double_features = tf.reshape(tf.stack([features, tf.map_fn(tf.image.flip_left_right, features, back_prop=False)], axis = 1), [-1, params['train_image_size'], params['train_image_size'], 3]) + double_features = tf.reshape(tf.stack([image, tf.map_fn(tf.image.flip_left_right, image, back_prop=False)], axis = 1), [-1, params['train_image_size'], params['train_image_size'], 3]) else: - double_features = tf.reshape(tf.stack([features, tf.transpose(tf.map_fn(tf.image.flip_left_right, tf.transpose(features, [0, 2, 3, 1], name='nchw2nhwc'), back_prop=False), [0, 3, 1, 2], name='nhwc2nchw')], axis = 1), [-1, 3, params['train_image_size'], params['train_image_size']]) + double_features = tf.reshape(tf.stack([image, tf.transpose(tf.map_fn(tf.image.flip_left_right, tf.transpose(image, [0, 2, 3, 1], name='nchw2nhwc'), back_prop=False), [0, 3, 1, 2], name='nhwc2nchw')], axis = 1), [-1, 3, params['train_image_size'], params['train_image_size']]) num_joints = config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')] with tf.variable_scope(params['model_scope'], default_name=None, values=[double_features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(double_features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) + pred_outputs = backbone_(double_features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) if params['data_format'] == 'channels_last': pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # [[0, 0, 0, ..], [1, 1, 1, ...], ...] - row_indices = tf.tile(tf.reshape(tf.range(tf.shape(double_features)[0]), [-1, 1]), [1, num_joints]) - # [[0, 1, 2, ...], [1, 0, 2, ...], [0, 1, 2], [1, 0, 2], ...] - col_indices = tf.reshape(tf.tile(tf.reshape(tf.stack([tf.range(num_joints), tf.constant(config.left_right_remap[(params['model_scope'] if 'all' not in params['model_scope'] else '*')])], axis=0), [-1]), [tf.shape(features)[0]]), [-1, num_joints]) - # [[[0, 0], [0, 1], [0, 2], ...], [[1, 1], [1, 0], [1, 2], ...], [[2, 0], [2, 1], [2, 2], ...], ...] + row_indices = tf.tile(tf.reshape(tf.stack([tf.range(0, tf.shape(double_features)[0], delta=2), tf.range(1, tf.shape(double_features)[0], delta=2)], axis=0), [-1, 1]), [1, num_joints]) + col_indices = tf.reshape(tf.tile(tf.reshape(tf.stack([tf.range(num_joints), tf.constant(config.left_right_remap[(params['model_scope'] if 'all' not in params['model_scope'] else '*')])], axis=0), [2, -1]), [1, tf.shape(features)[0]]), [-1, num_joints]) flip_indices=tf.stack([row_indices, col_indices], axis=-1) #flip_indices = tf.Print(flip_indices, [flip_indices], summarize=500) pred_outputs = [tf.gather_nd(pred_outputs[ind], flip_indices, name='gather_nd_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] def cond_flip(heatmap_ind): - return tf.cond(heatmap_ind[1] < 1, lambda : heatmap_ind[0], lambda : tf.transpose(tf.image.flip_left_right(tf.transpose(heatmap_ind[0], [1, 2, 0], name='pred_nchw2nhwc')), [2, 0, 1], name='pred_nhwc2nchw')) + return tf.cond(heatmap_ind[1] < tf.shape(features)[0], lambda : heatmap_ind[0], lambda : tf.transpose(tf.image.flip_left_right(tf.transpose(heatmap_ind[0], [1, 2, 0], name='pred_nchw2nhwc')), [2, 0, 1], name='pred_nhwc2nchw')) # all the heatmap of the fliped image should also be fliped back - pred_outputs = [tf.map_fn(cond_flip, [pred_outputs[ind], tf.tile(tf.reshape(tf.range(2), [-1]), [tf.shape(features)[0]])], dtype=tf.float32, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name='map_fn_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # average predictions of left_reight_fliped image - segment_indices = tf.reshape(tf.tile(tf.reshape(tf.range(tf.shape(features)[0]), [-1, 1]), [1, 2]), [-1]) - pred_outputs = [tf.segment_mean(pred_outputs[ind], segment_indices, name='segment_mean_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] + pred_outputs = [tf.map_fn(cond_flip, [pred_outputs[ind], tf.range(tf.shape(double_features)[0])], dtype=tf.float32, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name='map_fn_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] + pred_outputs = [tf.split(_, 2) for _ in pred_outputs] + pred_outputs_1 = [_[0] for _ in pred_outputs] + pred_outputs_2 = [_[1] for _ in pred_outputs] + pred_x_first_stage1, pred_y_first_stage1 = get_keypoint(image, pred_outputs_1[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=False, data_format=params['data_format']) + pred_x_first_stage2, pred_y_first_stage2 = get_keypoint(image, pred_outputs_2[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=False, data_format=params['data_format']) + + dist = tf.pow(tf.pow(pred_x_first_stage1 - pred_x_first_stage2, 2.) + tf.pow(pred_y_first_stage1 - pred_y_first_stage2, 2.), .5) - pred_x, pred_y = get_keypoint(features, pred_outputs[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=True, data_format=params['data_format']) + pred_x = tf.where(dist < 1e-3, pred_x_first_stage1, pred_x_first_stage1 + (pred_x_first_stage2 - pred_x_first_stage1) * 0.25 / dist) + pred_y = tf.where(dist < 1e-3, pred_y_first_stage1, pred_y_first_stage1 + (pred_y_first_stage2 - pred_y_first_stage1) * 0.25 / dist) - predictions = {'pred_x': pred_x + pred_offsets[:, 0], 'pred_y': pred_y + pred_offsets[:, 1], 'file_name': file_name} + # for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):#TRAINABLE_VARIABLES): + # print(var.op.name) + + predictions = {'pred_x': pred_x, 'pred_y': pred_y, 'file_name': file_name} if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( @@ -404,9 +337,11 @@ def main(_): session_config=sess_config) model_to_eval = [s.strip() for s in FLAGS.model_to_eval.split(',')] + + full_model_dir = os.path.join(FLAGS.model_dir, all_models[FLAGS.backbone.strip()]['logs_sub_dir']) for m in model_to_eval: if m == '': continue - pred_results = eval_each(keypoint_model_fn, os.path.join(FLAGS.model_dir, m), m, run_config) + pred_results = eval_each(keypoint_model_fn, os.path.join(full_model_dir, m), m, run_config) #print(pred_results) # collect result df = pd.DataFrame(columns=['image_id', 'image_category'] + config.all_keys) @@ -427,17 +362,17 @@ def main(_): #Images/blouse/ab669925e96490ec698af976586f0b2f.jpg df.loc[cur_record] = [filename, m] + temp_list cur_record = cur_record + 1 - df.to_csv('./{}.csv'.format(m), encoding='utf-8', index=False) + df.to_csv('./{}_{}.csv'.format(FLAGS.backbone.strip(), m), encoding='utf-8', index=False) # merge dataframe - df_list = [pd.read_csv('./{}.csv'.format(model_to_eval[0]), encoding='utf-8')] + df_list = [pd.read_csv('./{}_{}.csv'.format(FLAGS.backbone.strip(), model_to_eval[0]), encoding='utf-8')] for m in model_to_eval[1:]: if m == '': continue - df_list.append(pd.read_csv('./{}.csv'.format(m), encoding='utf-8')) - pd.concat(df_list, ignore_index=True).to_csv('./sub.csv', encoding='utf-8', index=False) + df_list.append(pd.read_csv('./{}_{}.csv'.format(FLAGS.backbone.strip(), m), encoding='utf-8')) + pd.concat(df_list, ignore_index=True).to_csv('./{}_sub.csv'.format(FLAGS.backbone.strip()), encoding='utf-8', index=False) if FLAGS.run_on_cloud: - tf.gfile.Copy('./sub.csv', os.path.join(FLAGS.model_dir, 'sub.csv'), overwrite=True) + tf.gfile.Copy('./{}_sub.csv'.format(FLAGS.backbone.strip()), os.path.join(full_model_dir, '{}_sub.csv'.format(FLAGS.backbone.strip())), overwrite=True) if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) diff --git a/eval_detnet_cpn.py b/eval_detnet_cpn.py deleted file mode 100644 index 9f42e688..00000000 --- a/eval_detnet_cpn.py +++ /dev/null @@ -1,445 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import numpy as np -import pandas as pd -#from scipy.misc import imread, imsave, imshow, imresize -import tensorflow as tf - -from net import detnet_cpn as cpn -from utility import train_helper - -from preprocessing import preprocessing -from preprocessing import dataset -import config -#--num_readers=2 --num_preprocessing_threads=2 --data_dir=/media/disk/keypoint/tfrecords --model_to_train=all, blouse -# hardware related configuration -tf.app.flags.DEFINE_integer( - 'num_readers', 16, - 'The number of parallel readers that read data from the dataset.') -tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 48, - 'The number of threads used to create the batches.') -tf.app.flags.DEFINE_integer( - 'num_cpu_threads', 0, - 'The number of cpu cores used to train.') -tf.app.flags.DEFINE_float( - 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') -# scaffold related configuration -tf.app.flags.DEFINE_string( - 'data_dir', '../Datasets/tfrecords_test',#tfrecords_test tfrecords_test_stage1_b - 'The directory where the dataset input data is stored.') -tf.app.flags.DEFINE_string( - 'dataset_name', '{}_*.tfrecord', 'The pattern of the dataset name to load.') -tf.app.flags.DEFINE_string( - 'model_dir', './logs_detnet_cpn/', - 'The parent directory where the model will be stored.') -tf.app.flags.DEFINE_integer( - 'log_every_n_steps', 10, - 'The frequency with which logs are print.') -tf.app.flags.DEFINE_integer( - 'save_summary_steps', 100, - 'The frequency with which summaries are saved, in seconds.') -# model related configuration -tf.app.flags.DEFINE_integer( - 'train_image_size', 384, - 'The size of the input image for the model to use.') -tf.app.flags.DEFINE_integer( - 'heatmap_size', 96, - 'The size of the output heatmap of the model.') -tf.app.flags.DEFINE_float( - 'heatmap_sigma', 1., - 'The sigma of Gaussian which generate the target heatmap.') -tf.app.flags.DEFINE_float( - 'bbox_border', 25., - 'The nearest distance of the crop border to al keypoints.') -tf.app.flags.DEFINE_string( - 'data_format', 'channels_last', # 'channels_first' or 'channels_last' - 'A flag to override the data format used in the model. channels_first ' - 'provides a performance boost on GPU but is not always compatible ' - 'with CPU. If left unspecified, the data format will be chosen ' - 'automatically based on whether TensorFlow was built for CPU or GPU.') -tf.app.flags.DEFINE_integer( - 'tf_random_seed', 20180417, 'Random seed for TensorFlow initializers.') -# checkpoint related configuration -tf.app.flags.DEFINE_string( - 'checkpoint_path', None, - 'The path to a checkpoint from which to fine-tune.') -tf.app.flags.DEFINE_string( - 'coarse_pred_path', None, - 'The path to a pred csv file from which to crop the input image for finer prediction.') -tf.app.flags.DEFINE_boolean( - 'flip_on_test', False, - 'Wether we will average predictions of left-right fliped image.') -tf.app.flags.DEFINE_string( - #'blouse', 'dress', 'outwear', 'skirt', 'trousers', 'all' - 'model_scope', 'blouse', - 'Model scope name used to replace the name_scope in checkpoint.') -tf.app.flags.DEFINE_boolean( - 'run_on_cloud', True, - 'Wether we will train on cloud.') -tf.app.flags.DEFINE_string( - 'model_to_eval', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', - 'The sub-model to eval (comma-separated list).') - -#--model_scope=blouse --checkpoint_path=./logs/blouse -FLAGS = tf.app.flags.FLAGS - -def preprocessing_fn(org_image, file_name, shape): - pd_df = None - if FLAGS.coarse_pred_path is not None: - if tf.gfile.Exists(FLAGS.coarse_pred_path): - tf.logging.info('Finetuning Prediction From {}.'.format(FLAGS.coarse_pred_path)) - tf.gfile.Copy(FLAGS.coarse_pred_path, './__coarse_pred.csv', overwrite=True) - pd_df = pd.read_csv('./__coarse_pred.csv', encoding='utf-8') - - all_filenames = [] - all_xmin = [] - all_ymin = [] - all_xmax = [] - all_ymax = [] - - all_values = pd_df.values.tolist() - for records in all_values: - all_filenames.append(records[0].encode('utf8')) - xmin = 2000 - ymin = 2000 - xmax = -1 - ymax = -1 - for kp in records[2:]: - keypoint_info = kp.strip().split('_') - if int(keypoint_info[2]) == -1: - continue - xmin = min(xmin, int(keypoint_info[0])) - ymin = min(ymin, int(keypoint_info[1])) - xmax = max(xmax, int(keypoint_info[0])) - ymax = max(ymax, int(keypoint_info[1])) - all_xmin.append(xmin) - all_ymin.append(ymin) - all_xmax.append(xmax) - all_ymax.append(ymax) - #print(all_filenames, all_xmin, all_ymin, all_xmax, all_ymax) - xmin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmin, dtype=tf.int64)), -1) - ymin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymin, dtype=tf.int64)), -1) - xmax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmax, dtype=tf.int64)), -1) - ymax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymax, dtype=tf.int64)), -1) - pd_df = [xmin_table, ymin_table, xmax_table, ymax_table] - #pred_item['file_name'].encode('utf8') - - #lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), tf.constant(config.global_norm_lvalues, dtype=tf.int64)), 0) - return preprocessing.preprocess_for_test(org_image, file_name, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size, pred_df=pd_df) - -def input_pipeline(model_scope=FLAGS.model_scope): - # preprocessing_fn = lambda org_image, shape: preprocessing.preprocess_for_test(org_image, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) - - images, shape, file_name, classid, offsets = dataset.slim_test_get_split(FLAGS.data_dir, preprocessing_fn, FLAGS.num_readers, FLAGS.num_preprocessing_threads, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None) - - return {'images': images, 'shape': shape, 'classid': classid, 'file_name': file_name, 'pred_offsets': offsets} - -if config.PRED_DEBUG: - from scipy.misc import imread, imsave, imshow, imresize - def save_image_with_heatmap(image, height, width, heatmap_size, heatmap, predictions, indR, indG, indB): - if not hasattr(save_image_with_heatmap, "counter"): - save_image_with_heatmap.counter = 0 # it doesn't exist yet, so initialize it - save_image_with_heatmap.counter += 1 - - img_to_save = np.array(image.tolist()) + 120 - #print(img_to_save) - - img_to_save = img_to_save.astype(np.uint8) - - heatmap0 = np.sum(heatmap[indR, ...], axis=0).astype(np.uint8) - heatmap1 = np.sum(heatmap[indG, ...], axis=0).astype(np.uint8) - heatmap2 = np.sum(heatmap[indB, ...], axis=0).astype(np.uint8) if len(indB) > 0 else np.zeros((heatmap_size, heatmap_size), dtype=np.float32) - - img_to_save = imresize(img_to_save, (height, width), interp='lanczos') - heatmap0 = imresize(heatmap0, (height, width), interp='lanczos') - heatmap1 = imresize(heatmap1, (height, width), interp='lanczos') - heatmap2 = imresize(heatmap2, (height, width), interp='lanczos') - - img_to_save = img_to_save/2 - img_to_save[:,:,0] = np.clip((img_to_save[:,:,0] + heatmap0 + heatmap2), 0, 255) - img_to_save[:,:,1] = np.clip((img_to_save[:,:,1] + heatmap1 + heatmap2), 0, 255) - #img_to_save[:,:,2] = np.clip((img_to_save[:,:,2]/4. + heatmap2), 0, 255) - file_name = 'with_heatmap_{}.jpg'.format(save_image_with_heatmap.counter) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img_to_save.astype(np.uint8)) - - predictions = np.array(predictions.tolist()) - #print(predictions.shape) - for ind in range(predictions.shape[0]): - img = predictions[ind] - img = img - img.min() - img *= 255.0/img.max() - file_name = 'heatmap_{}_{}.jpg'.format(save_image_with_heatmap.counter, ind) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img.astype(np.uint8)) - return save_image_with_heatmap.counter - -def gaussian_blur(inputs, inputs_filters, sigma, data_format, name=None): - with tf.name_scope(name, "gaussian_blur", [inputs]): - data_format_ = 'NHWC' if data_format=='channels_last' else 'NCHW' - if data_format_ == 'NHWC': - inputs = tf.transpose(inputs, [0, 2, 3, 1]) - ksize = int(6 * sigma + 1.) - x = tf.expand_dims(tf.range(ksize, delta=1, dtype=tf.float32), axis=1) - y = tf.transpose(x, [1, 0]) - kernel_matrix = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * sigma ** 2)) - #print(kernel_matrix) - kernel_filter = tf.reshape(kernel_matrix, [ksize, ksize, 1, 1]) - kernel_filter = tf.tile(kernel_filter, [1, 1, inputs_filters, 1]) - #kernel_filter = tf.transpose(kernel_filter, [1, 0, 2, 3]) - outputs = tf.nn.depthwise_conv2d(inputs, kernel_filter, strides=[1, 1, 1, 1], padding='SAME', data_format=data_format_, name='blur') - if data_format_ == 'NHWC': - outputs = tf.transpose(outputs, [0, 3, 1, 2]) - return outputs - -def get_keypoint(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - # expand_border = 10 - # pad_pred = tf.pad(predictions, tf.constant([[0, 0], [0, 0], [expand_border, expand_border], [expand_border, expand_border]]), - # mode='CONSTANT', name='pred_padding', constant_values=0) - - # blur_pred = gaussian_blur(pad_pred, config.class_num_joints[category], 3.5, 'channels_first', 'pred_blur') - - # predictions = tf.slice(blur_pred, [0, 0, expand_border, expand_border], [1, config.class_num_joints[category], heatmap_size, heatmap_size]) - - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_max_indices = tf.argmax(predictions, axis=-1) - pred_max_x, pred_max_y = tf.cast(tf.floormod(pred_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_max_indices, heatmap_size), tf.float32) - # mask the max elements to zero - mask_predictions = predictions * tf.one_hot(pred_max_indices, heatmap_size*heatmap_size, on_value=0., off_value=1., dtype=tf.float32) - # get the second max prediction - pred_next_max = tf.reduce_max(mask_predictions, axis=-1) - pred_next_max_indices = tf.argmax(mask_predictions, axis=-1) - pred_next_max_x, pred_next_max_y = tf.cast(tf.floormod(pred_next_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_next_max_indices, heatmap_size), tf.float32) - - dist = tf.pow(tf.pow(pred_next_max_x - pred_max_x, 2.) + tf.pow(pred_next_max_y - pred_max_y, 2.), .5) - - pred_x = tf.where(dist < 1e-3, pred_max_x, pred_max_x + (pred_next_max_x - pred_max_x) * 0.25 / dist) - pred_y = tf.where(dist < 1e-3, pred_max_y, pred_max_y + (pred_next_max_y - pred_max_y) * 0.25 / dist) - - pred_indices_ = tf.squeeze(tf.cast(pred_x, tf.int64) + tf.cast(pred_y, tf.int64) * heatmap_size) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - width_ratio, height_ratio = width / tf.cast(heatmap_size, tf.float32), height / tf.cast(heatmap_size, tf.float32) - - pred_x, pred_y = pred_x * width_ratio, pred_y * height_ratio - #pred_x, pred_y = pred_x * width_ratio + width_ratio/2., pred_y * height_ratio + height_ratio/2. - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -def get_keypoint_v0(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_indices = tf.argmax(predictions, axis=-1) - pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - pred_x, pred_y = pred_x * width / tf.cast(heatmap_size, tf.float32), pred_y * height / tf.cast(heatmap_size, tf.float32) - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - pred_indices_ = tf.squeeze(pred_indices) - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -def keypoint_model_fn(features, labels, mode, params): - #print(features) - shape = features['shape'] - classid = features['classid'] - pred_offsets = tf.to_float(features['pred_offsets']) - file_name = features['file_name'] - features = features['images'] - - file_name = tf.identity(file_name, name='current_file') - - if not params['flip_on_test']: - with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - else: - # test augumentation on the fly - if params['data_format'] == 'channels_last': - double_features = tf.reshape(tf.stack([features, tf.map_fn(tf.image.flip_left_right, features, back_prop=False)], axis = 1), [-1, params['train_image_size'], params['train_image_size'], 3]) - else: - double_features = tf.reshape(tf.stack([features, tf.transpose(tf.map_fn(tf.image.flip_left_right, tf.transpose(features, [0, 2, 3, 1], name='nchw2nhwc'), back_prop=False), [0, 3, 1, 2], name='nhwc2nchw')], axis = 1), [-1, 3, params['train_image_size'], params['train_image_size']]) - - num_joints = config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')] - with tf.variable_scope(params['model_scope'], default_name=None, values=[double_features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(double_features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # [[0, 0, 0, ..], [1, 1, 1, ...], ...] - row_indices = tf.tile(tf.reshape(tf.range(tf.shape(double_features)[0]), [-1, 1]), [1, num_joints]) - # [[0, 1, 2, ...], [1, 0, 2, ...], [0, 1, 2], [1, 0, 2], ...] - col_indices = tf.reshape(tf.tile(tf.reshape(tf.stack([tf.range(num_joints), tf.constant(config.left_right_remap[(params['model_scope'] if 'all' not in params['model_scope'] else '*')])], axis=0), [-1]), [tf.shape(features)[0]]), [-1, num_joints]) - # [[[0, 0], [0, 1], [0, 2], ...], [[1, 1], [1, 0], [1, 2], ...], [[2, 0], [2, 1], [2, 2], ...], ...] - flip_indices=tf.stack([row_indices, col_indices], axis=-1) - - #flip_indices = tf.Print(flip_indices, [flip_indices], summarize=500) - pred_outputs = [tf.gather_nd(pred_outputs[ind], flip_indices, name='gather_nd_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - def cond_flip(heatmap_ind): - return tf.cond(heatmap_ind[1] < 1, lambda : heatmap_ind[0], lambda : tf.transpose(tf.image.flip_left_right(tf.transpose(heatmap_ind[0], [1, 2, 0], name='pred_nchw2nhwc')), [2, 0, 1], name='pred_nhwc2nchw')) - # all the heatmap of the fliped image should also be fliped back - pred_outputs = [tf.map_fn(cond_flip, [pred_outputs[ind], tf.tile(tf.reshape(tf.range(2), [-1]), [tf.shape(features)[0]])], dtype=tf.float32, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name='map_fn_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # average predictions of left_reight_fliped image - segment_indices = tf.reshape(tf.tile(tf.reshape(tf.range(tf.shape(features)[0]), [-1, 1]), [1, 2]), [-1]) - pred_outputs = [tf.segment_mean(pred_outputs[ind], segment_indices, name='segment_mean_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - pred_x, pred_y = get_keypoint(features, pred_outputs[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=True, data_format=params['data_format']) - - predictions = {'pred_x': pred_x + pred_offsets[:, 0], 'pred_y': pred_y + pred_offsets[:, 1], 'file_name': file_name} - - if mode == tf.estimator.ModeKeys.PREDICT: - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=predictions, - loss=None, train_op=None) - else: - raise ValueError('Only "PREDICT" mode is supported.') - -def parse_comma_list(args): - return [float(s.strip()) for s in args.split(',')] - -def eval_each(model_fn, model_dir, model_scope, run_config): - fashionAI = tf.estimator.Estimator( - model_fn=model_fn, model_dir=model_dir, config=run_config, - params={ - 'train_image_size': FLAGS.train_image_size, - 'heatmap_size': FLAGS.heatmap_size, - 'data_format': FLAGS.data_format, - 'model_scope': model_scope, - 'flip_on_test': FLAGS.flip_on_test, - }) - #tf.logging.info('params recv: %s', FLAGS.flag_values_dict()) - - tensors_to_log = { - 'cur_file': 'current_file' - } - - logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps, formatter=lambda dicts: ', '.join(['%s=%s' % (k, v) for k, v in dicts.items()])) - tf.logging.info('Starting to predict model {}.'.format(model_scope)) - pred_results = fashionAI.predict(input_fn=lambda : input_pipeline(model_scope), hooks=[logging_hook], checkpoint_path=train_helper.get_latest_checkpoint_for_evaluate_(model_dir, model_dir)) - #tf.logging.info() - return list(pred_results) - -def main(_): - # Using the Winograd non-fused algorithms provides a small performance boost. - os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' - - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction) - sess_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = False, intra_op_parallelism_threads = FLAGS.num_cpu_threads, inter_op_parallelism_threads = FLAGS.num_cpu_threads, gpu_options = gpu_options) - - # Set up a RunConfig to only save checkpoints once per training cycle. - run_config = tf.estimator.RunConfig().replace( - save_checkpoints_secs=None).replace( - save_checkpoints_steps=None).replace( - save_summary_steps=FLAGS.save_summary_steps).replace( - keep_checkpoint_max=5).replace( - tf_random_seed=FLAGS.tf_random_seed).replace( - log_step_count_steps=FLAGS.log_every_n_steps).replace( - session_config=sess_config) - - model_to_eval = [s.strip() for s in FLAGS.model_to_eval.split(',')] - for m in model_to_eval: - if m == '': continue - pred_results = eval_each(keypoint_model_fn, os.path.join(FLAGS.model_dir, m), m, run_config) - #print(pred_results) - # collect result - df = pd.DataFrame(columns=['image_id', 'image_category'] + config.all_keys) - cur_record = 0 - gloabl2local_ind = dict(zip(config.class2global_ind_map[m], list(range(len(config.class2global_ind_map[m]))) )) - #print(gloabl2local_ind) - for pred_item in pred_results: - temp_list = [] - index = 0 - x = pred_item['pred_x'].tolist() - y = pred_item['pred_y'].tolist() - filename = pred_item['file_name'].decode('utf8') - for ind in list(range(config.class_num_joints['*'])): - if ind in gloabl2local_ind: - temp_list.append('{}_{}_1'.format(round(x[gloabl2local_ind[ind]]), round(y[gloabl2local_ind[ind]]))) - else: - temp_list.append('-1_-1_-1') - #Images/blouse/ab669925e96490ec698af976586f0b2f.jpg - df.loc[cur_record] = [filename, m] + temp_list - cur_record = cur_record + 1 - df.to_csv('./{}.csv'.format(m), encoding='utf-8', index=False) - - # merge dataframe - df_list = [pd.read_csv('./{}.csv'.format(model_to_eval[0]), encoding='utf-8')] - for m in model_to_eval[1:]: - if m == '': continue - df_list.append(pd.read_csv('./{}.csv'.format(m), encoding='utf-8')) - pd.concat(df_list, ignore_index=True).to_csv('./sub.csv', encoding='utf-8', index=False) - - if FLAGS.run_on_cloud: - tf.gfile.Copy('./sub.csv', os.path.join(FLAGS.model_dir, 'sub.csv'), overwrite=True) - -if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/eval_detxt_cpn.py b/eval_detxt_cpn.py deleted file mode 100644 index 5b548c66..00000000 --- a/eval_detxt_cpn.py +++ /dev/null @@ -1,445 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import numpy as np -import pandas as pd -#from scipy.misc import imread, imsave, imshow, imresize -import tensorflow as tf - -from net import detxt_cpn as cpn -from utility import train_helper - -from preprocessing import preprocessing -from preprocessing import dataset -import config -#--num_readers=2 --num_preprocessing_threads=2 --data_dir=/media/disk/keypoint/tfrecords --model_to_train=all, blouse -# hardware related configuration -tf.app.flags.DEFINE_integer( - 'num_readers', 16, - 'The number of parallel readers that read data from the dataset.') -tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 48, - 'The number of threads used to create the batches.') -tf.app.flags.DEFINE_integer( - 'num_cpu_threads', 0, - 'The number of cpu cores used to train.') -tf.app.flags.DEFINE_float( - 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') -# scaffold related configuration -tf.app.flags.DEFINE_string( - 'data_dir', '../Datasets/tfrecords_test',#tfrecords_test tfrecords_test_stage1_b - 'The directory where the dataset input data is stored.') -tf.app.flags.DEFINE_string( - 'dataset_name', '{}_*.tfrecord', 'The pattern of the dataset name to load.') -tf.app.flags.DEFINE_string( - 'model_dir', './logs_detxt_cpn/', - 'The parent directory where the model will be stored.') -tf.app.flags.DEFINE_integer( - 'log_every_n_steps', 10, - 'The frequency with which logs are print.') -tf.app.flags.DEFINE_integer( - 'save_summary_steps', 100, - 'The frequency with which summaries are saved, in seconds.') -# model related configuration -tf.app.flags.DEFINE_integer( - 'train_image_size', 384, - 'The size of the input image for the model to use.') -tf.app.flags.DEFINE_integer( - 'heatmap_size', 96, - 'The size of the output heatmap of the model.') -tf.app.flags.DEFINE_float( - 'heatmap_sigma', 1., - 'The sigma of Gaussian which generate the target heatmap.') -tf.app.flags.DEFINE_float( - 'bbox_border', 25., - 'The nearest distance of the crop border to al keypoints.') -tf.app.flags.DEFINE_string( - 'data_format', 'channels_last', # 'channels_first' or 'channels_last' - 'A flag to override the data format used in the model. channels_first ' - 'provides a performance boost on GPU but is not always compatible ' - 'with CPU. If left unspecified, the data format will be chosen ' - 'automatically based on whether TensorFlow was built for CPU or GPU.') -tf.app.flags.DEFINE_integer( - 'tf_random_seed', 20180417, 'Random seed for TensorFlow initializers.') -# checkpoint related configuration -tf.app.flags.DEFINE_string( - 'checkpoint_path', None, - 'The path to a checkpoint from which to fine-tune.') -tf.app.flags.DEFINE_string( - 'coarse_pred_path', None, - 'The path to a pred csv file from which to crop the input image for finer prediction.') -tf.app.flags.DEFINE_boolean( - 'flip_on_test', False, - 'Wether we will average predictions of left-right fliped image.') -tf.app.flags.DEFINE_string( - #'blouse', 'dress', 'outwear', 'skirt', 'trousers', 'all' - 'model_scope', 'blouse', - 'Model scope name used to replace the name_scope in checkpoint.') -tf.app.flags.DEFINE_boolean( - 'run_on_cloud', True, - 'Wether we will train on cloud.') -tf.app.flags.DEFINE_string( - 'model_to_eval', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', - 'The sub-model to eval (comma-separated list).') - -#--model_scope=blouse --checkpoint_path=./logs/blouse -FLAGS = tf.app.flags.FLAGS - -def preprocessing_fn(org_image, file_name, shape): - pd_df = None - if FLAGS.coarse_pred_path is not None: - if tf.gfile.Exists(FLAGS.coarse_pred_path): - tf.logging.info('Finetuning Prediction From {}.'.format(FLAGS.coarse_pred_path)) - tf.gfile.Copy(FLAGS.coarse_pred_path, './__coarse_pred.csv', overwrite=True) - pd_df = pd.read_csv('./__coarse_pred.csv', encoding='utf-8') - - all_filenames = [] - all_xmin = [] - all_ymin = [] - all_xmax = [] - all_ymax = [] - - all_values = pd_df.values.tolist() - for records in all_values: - all_filenames.append(records[0].encode('utf8')) - xmin = 2000 - ymin = 2000 - xmax = -1 - ymax = -1 - for kp in records[2:]: - keypoint_info = kp.strip().split('_') - if int(keypoint_info[2]) == -1: - continue - xmin = min(xmin, int(keypoint_info[0])) - ymin = min(ymin, int(keypoint_info[1])) - xmax = max(xmax, int(keypoint_info[0])) - ymax = max(ymax, int(keypoint_info[1])) - all_xmin.append(xmin) - all_ymin.append(ymin) - all_xmax.append(xmax) - all_ymax.append(ymax) - #print(all_filenames, all_xmin, all_ymin, all_xmax, all_ymax) - xmin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmin, dtype=tf.int64)), -1) - ymin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymin, dtype=tf.int64)), -1) - xmax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmax, dtype=tf.int64)), -1) - ymax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymax, dtype=tf.int64)), -1) - pd_df = [xmin_table, ymin_table, xmax_table, ymax_table] - #pred_item['file_name'].encode('utf8') - - #lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), tf.constant(config.global_norm_lvalues, dtype=tf.int64)), 0) - return preprocessing.preprocess_for_test(org_image, file_name, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size, pred_df=pd_df) - -def input_pipeline(model_scope=FLAGS.model_scope): - # preprocessing_fn = lambda org_image, shape: preprocessing.preprocess_for_test(org_image, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) - - images, shape, file_name, classid, offsets = dataset.slim_test_get_split(FLAGS.data_dir, preprocessing_fn, FLAGS.num_readers, FLAGS.num_preprocessing_threads, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None) - - return {'images': images, 'shape': shape, 'classid': classid, 'file_name': file_name, 'pred_offsets': offsets} - -if config.PRED_DEBUG: - from scipy.misc import imread, imsave, imshow, imresize - def save_image_with_heatmap(image, height, width, heatmap_size, heatmap, predictions, indR, indG, indB): - if not hasattr(save_image_with_heatmap, "counter"): - save_image_with_heatmap.counter = 0 # it doesn't exist yet, so initialize it - save_image_with_heatmap.counter += 1 - - img_to_save = np.array(image.tolist()) + 120 - #print(img_to_save) - - img_to_save = img_to_save.astype(np.uint8) - - heatmap0 = np.sum(heatmap[indR, ...], axis=0).astype(np.uint8) - heatmap1 = np.sum(heatmap[indG, ...], axis=0).astype(np.uint8) - heatmap2 = np.sum(heatmap[indB, ...], axis=0).astype(np.uint8) if len(indB) > 0 else np.zeros((heatmap_size, heatmap_size), dtype=np.float32) - - img_to_save = imresize(img_to_save, (height, width), interp='lanczos') - heatmap0 = imresize(heatmap0, (height, width), interp='lanczos') - heatmap1 = imresize(heatmap1, (height, width), interp='lanczos') - heatmap2 = imresize(heatmap2, (height, width), interp='lanczos') - - img_to_save = img_to_save/2 - img_to_save[:,:,0] = np.clip((img_to_save[:,:,0] + heatmap0 + heatmap2), 0, 255) - img_to_save[:,:,1] = np.clip((img_to_save[:,:,1] + heatmap1 + heatmap2), 0, 255) - #img_to_save[:,:,2] = np.clip((img_to_save[:,:,2]/4. + heatmap2), 0, 255) - file_name = 'with_heatmap_{}.jpg'.format(save_image_with_heatmap.counter) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img_to_save.astype(np.uint8)) - - predictions = np.array(predictions.tolist()) - #print(predictions.shape) - for ind in range(predictions.shape[0]): - img = predictions[ind] - img = img - img.min() - img *= 255.0/img.max() - file_name = 'heatmap_{}_{}.jpg'.format(save_image_with_heatmap.counter, ind) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img.astype(np.uint8)) - return save_image_with_heatmap.counter - -def gaussian_blur(inputs, inputs_filters, sigma, data_format, name=None): - with tf.name_scope(name, "gaussian_blur", [inputs]): - data_format_ = 'NHWC' if data_format=='channels_last' else 'NCHW' - if data_format_ == 'NHWC': - inputs = tf.transpose(inputs, [0, 2, 3, 1]) - ksize = int(6 * sigma + 1.) - x = tf.expand_dims(tf.range(ksize, delta=1, dtype=tf.float32), axis=1) - y = tf.transpose(x, [1, 0]) - kernel_matrix = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * sigma ** 2)) - #print(kernel_matrix) - kernel_filter = tf.reshape(kernel_matrix, [ksize, ksize, 1, 1]) - kernel_filter = tf.tile(kernel_filter, [1, 1, inputs_filters, 1]) - #kernel_filter = tf.transpose(kernel_filter, [1, 0, 2, 3]) - outputs = tf.nn.depthwise_conv2d(inputs, kernel_filter, strides=[1, 1, 1, 1], padding='SAME', data_format=data_format_, name='blur') - if data_format_ == 'NHWC': - outputs = tf.transpose(outputs, [0, 3, 1, 2]) - return outputs - -def get_keypoint(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - # expand_border = 10 - # pad_pred = tf.pad(predictions, tf.constant([[0, 0], [0, 0], [expand_border, expand_border], [expand_border, expand_border]]), - # mode='CONSTANT', name='pred_padding', constant_values=0) - - # blur_pred = gaussian_blur(pad_pred, config.class_num_joints[category], 3.5, 'channels_first', 'pred_blur') - - # predictions = tf.slice(blur_pred, [0, 0, expand_border, expand_border], [1, config.class_num_joints[category], heatmap_size, heatmap_size]) - - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_max_indices = tf.argmax(predictions, axis=-1) - pred_max_x, pred_max_y = tf.cast(tf.floormod(pred_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_max_indices, heatmap_size), tf.float32) - # mask the max elements to zero - mask_predictions = predictions * tf.one_hot(pred_max_indices, heatmap_size*heatmap_size, on_value=0., off_value=1., dtype=tf.float32) - # get the second max prediction - pred_next_max = tf.reduce_max(mask_predictions, axis=-1) - pred_next_max_indices = tf.argmax(mask_predictions, axis=-1) - pred_next_max_x, pred_next_max_y = tf.cast(tf.floormod(pred_next_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_next_max_indices, heatmap_size), tf.float32) - - dist = tf.pow(tf.pow(pred_next_max_x - pred_max_x, 2.) + tf.pow(pred_next_max_y - pred_max_y, 2.), .5) - - pred_x = tf.where(dist < 1e-3, pred_max_x, pred_max_x + (pred_next_max_x - pred_max_x) * 0.25 / dist) - pred_y = tf.where(dist < 1e-3, pred_max_y, pred_max_y + (pred_next_max_y - pred_max_y) * 0.25 / dist) - - pred_indices_ = tf.squeeze(tf.cast(pred_x, tf.int64) + tf.cast(pred_y, tf.int64) * heatmap_size) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - width_ratio, height_ratio = width / tf.cast(heatmap_size, tf.float32), height / tf.cast(heatmap_size, tf.float32) - - pred_x, pred_y = pred_x * width_ratio, pred_y * height_ratio - #pred_x, pred_y = pred_x * width_ratio + width_ratio/2., pred_y * height_ratio + height_ratio/2. - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -def get_keypoint_v0(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_indices = tf.argmax(predictions, axis=-1) - pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - pred_x, pred_y = pred_x * width / tf.cast(heatmap_size, tf.float32), pred_y * height / tf.cast(heatmap_size, tf.float32) - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - pred_indices_ = tf.squeeze(pred_indices) - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -def keypoint_model_fn(features, labels, mode, params): - #print(features) - shape = features['shape'] - classid = features['classid'] - pred_offsets = tf.to_float(features['pred_offsets']) - file_name = features['file_name'] - features = features['images'] - - file_name = tf.identity(file_name, name='current_file') - - if not params['flip_on_test']: - with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - else: - # test augumentation on the fly - if params['data_format'] == 'channels_last': - double_features = tf.reshape(tf.stack([features, tf.map_fn(tf.image.flip_left_right, features, back_prop=False)], axis = 1), [-1, params['train_image_size'], params['train_image_size'], 3]) - else: - double_features = tf.reshape(tf.stack([features, tf.transpose(tf.map_fn(tf.image.flip_left_right, tf.transpose(features, [0, 2, 3, 1], name='nchw2nhwc'), back_prop=False), [0, 3, 1, 2], name='nhwc2nchw')], axis = 1), [-1, 3, params['train_image_size'], params['train_image_size']]) - - num_joints = config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')] - with tf.variable_scope(params['model_scope'], default_name=None, values=[double_features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn.cascaded_pyramid_net(double_features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # [[0, 0, 0, ..], [1, 1, 1, ...], ...] - row_indices = tf.tile(tf.reshape(tf.range(tf.shape(double_features)[0]), [-1, 1]), [1, num_joints]) - # [[0, 1, 2, ...], [1, 0, 2, ...], [0, 1, 2], [1, 0, 2], ...] - col_indices = tf.reshape(tf.tile(tf.reshape(tf.stack([tf.range(num_joints), tf.constant(config.left_right_remap[(params['model_scope'] if 'all' not in params['model_scope'] else '*')])], axis=0), [-1]), [tf.shape(features)[0]]), [-1, num_joints]) - # [[[0, 0], [0, 1], [0, 2], ...], [[1, 1], [1, 0], [1, 2], ...], [[2, 0], [2, 1], [2, 2], ...], ...] - flip_indices=tf.stack([row_indices, col_indices], axis=-1) - - #flip_indices = tf.Print(flip_indices, [flip_indices], summarize=500) - pred_outputs = [tf.gather_nd(pred_outputs[ind], flip_indices, name='gather_nd_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - def cond_flip(heatmap_ind): - return tf.cond(heatmap_ind[1] < 1, lambda : heatmap_ind[0], lambda : tf.transpose(tf.image.flip_left_right(tf.transpose(heatmap_ind[0], [1, 2, 0], name='pred_nchw2nhwc')), [2, 0, 1], name='pred_nhwc2nchw')) - # all the heatmap of the fliped image should also be fliped back - pred_outputs = [tf.map_fn(cond_flip, [pred_outputs[ind], tf.tile(tf.reshape(tf.range(2), [-1]), [tf.shape(features)[0]])], dtype=tf.float32, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name='map_fn_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # average predictions of left_reight_fliped image - segment_indices = tf.reshape(tf.tile(tf.reshape(tf.range(tf.shape(features)[0]), [-1, 1]), [1, 2]), [-1]) - pred_outputs = [tf.segment_mean(pred_outputs[ind], segment_indices, name='segment_mean_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - pred_x, pred_y = get_keypoint(features, pred_outputs[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=True, data_format=params['data_format']) - - predictions = {'pred_x': pred_x + pred_offsets[:, 0], 'pred_y': pred_y + pred_offsets[:, 1], 'file_name': file_name} - - if mode == tf.estimator.ModeKeys.PREDICT: - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=predictions, - loss=None, train_op=None) - else: - raise ValueError('Only "PREDICT" mode is supported.') - -def parse_comma_list(args): - return [float(s.strip()) for s in args.split(',')] - -def eval_each(model_fn, model_dir, model_scope, run_config): - fashionAI = tf.estimator.Estimator( - model_fn=model_fn, model_dir=model_dir, config=run_config, - params={ - 'train_image_size': FLAGS.train_image_size, - 'heatmap_size': FLAGS.heatmap_size, - 'data_format': FLAGS.data_format, - 'model_scope': model_scope, - 'flip_on_test': FLAGS.flip_on_test, - }) - #tf.logging.info('params recv: %s', FLAGS.flag_values_dict()) - - tensors_to_log = { - 'cur_file': 'current_file' - } - - logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps, formatter=lambda dicts: ', '.join(['%s=%s' % (k, v) for k, v in dicts.items()])) - tf.logging.info('Starting to predict model {}.'.format(model_scope)) - pred_results = fashionAI.predict(input_fn=lambda : input_pipeline(model_scope), hooks=[logging_hook], checkpoint_path=train_helper.get_latest_checkpoint_for_evaluate_(model_dir, model_dir)) - #tf.logging.info() - return list(pred_results) - -def main(_): - # Using the Winograd non-fused algorithms provides a small performance boost. - os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' - - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction) - sess_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = False, intra_op_parallelism_threads = FLAGS.num_cpu_threads, inter_op_parallelism_threads = FLAGS.num_cpu_threads, gpu_options = gpu_options) - - # Set up a RunConfig to only save checkpoints once per training cycle. - run_config = tf.estimator.RunConfig().replace( - save_checkpoints_secs=None).replace( - save_checkpoints_steps=None).replace( - save_summary_steps=FLAGS.save_summary_steps).replace( - keep_checkpoint_max=5).replace( - tf_random_seed=FLAGS.tf_random_seed).replace( - log_step_count_steps=FLAGS.log_every_n_steps).replace( - session_config=sess_config) - - model_to_eval = [s.strip() for s in FLAGS.model_to_eval.split(',')] - for m in model_to_eval: - if m == '': continue - pred_results = eval_each(keypoint_model_fn, os.path.join(FLAGS.model_dir, m), m, run_config) - #print(pred_results) - # collect result - df = pd.DataFrame(columns=['image_id', 'image_category'] + config.all_keys) - cur_record = 0 - gloabl2local_ind = dict(zip(config.class2global_ind_map[m], list(range(len(config.class2global_ind_map[m]))) )) - #print(gloabl2local_ind) - for pred_item in pred_results: - temp_list = [] - index = 0 - x = pred_item['pred_x'].tolist() - y = pred_item['pred_y'].tolist() - filename = pred_item['file_name'].decode('utf8') - for ind in list(range(config.class_num_joints['*'])): - if ind in gloabl2local_ind: - temp_list.append('{}_{}_1'.format(round(x[gloabl2local_ind[ind]]), round(y[gloabl2local_ind[ind]]))) - else: - temp_list.append('-1_-1_-1') - #Images/blouse/ab669925e96490ec698af976586f0b2f.jpg - df.loc[cur_record] = [filename, m] + temp_list - cur_record = cur_record + 1 - df.to_csv('./{}.csv'.format(m), encoding='utf-8', index=False) - - # merge dataframe - df_list = [pd.read_csv('./{}.csv'.format(model_to_eval[0]), encoding='utf-8')] - for m in model_to_eval[1:]: - if m == '': continue - df_list.append(pd.read_csv('./{}.csv'.format(m), encoding='utf-8')) - pd.concat(df_list, ignore_index=True).to_csv('./sub.csv', encoding='utf-8', index=False) - - if FLAGS.run_on_cloud: - tf.gfile.Copy('./sub.csv', os.path.join(FLAGS.model_dir, 'sub.csv'), overwrite=True) - -if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/eval_senet_cpn.py b/eval_senet_cpn.py deleted file mode 100644 index bf812a40..00000000 --- a/eval_senet_cpn.py +++ /dev/null @@ -1,451 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import numpy as np -import pandas as pd -#from scipy.misc import imread, imsave, imshow, imresize -import tensorflow as tf - -from net import seresnet_cpn as cpn -from utility import train_helper - -from preprocessing import preprocessing -from preprocessing import dataset -import config -#--num_readers=2 --num_preprocessing_threads=2 --data_dir=/media/disk/keypoint/tfrecords --model_to_train=all, blouse -# hardware related configuration -tf.app.flags.DEFINE_integer( - 'num_readers', 16, - 'The number of parallel readers that read data from the dataset.') -tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 48, - 'The number of threads used to create the batches.') -tf.app.flags.DEFINE_integer( - 'num_cpu_threads', 0, - 'The number of cpu cores used to train.') -tf.app.flags.DEFINE_float( - 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') -# scaffold related configuration -tf.app.flags.DEFINE_string( - 'data_dir', '../Datasets/tfrecords_test',#tfrecords_test_stage1_b tfrecords_test - 'The directory where the dataset input data is stored.') -tf.app.flags.DEFINE_string( - 'dataset_name', '{}_*.tfrecord', 'The pattern of the dataset name to load.') -tf.app.flags.DEFINE_string( - 'model_dir', './logs_sext_cpn/', - 'The parent directory where the model will be stored.') -tf.app.flags.DEFINE_integer( - 'log_every_n_steps', 10, - 'The frequency with which logs are print.') -tf.app.flags.DEFINE_integer( - 'save_summary_steps', 100, - 'The frequency with which summaries are saved, in seconds.') -# model related configuration -tf.app.flags.DEFINE_integer( - 'train_image_size', 384, - 'The size of the input image for the model to use.') -tf.app.flags.DEFINE_integer( - 'heatmap_size', 96, - 'The size of the output heatmap of the model.') -tf.app.flags.DEFINE_string( - 'backbone', 'seresnext50',#or seresnext50 seresnet50 - 'The backbone network to use for feature pyramid.') -tf.app.flags.DEFINE_float( - 'heatmap_sigma', 1., - 'The sigma of Gaussian which generate the target heatmap.') -tf.app.flags.DEFINE_float( - 'bbox_border', 25., - 'The nearest distance of the crop border to al keypoints.') -tf.app.flags.DEFINE_string( - 'data_format', 'channels_last', # 'channels_first' or 'channels_last' - 'A flag to override the data format used in the model. channels_first ' - 'provides a performance boost on GPU but is not always compatible ' - 'with CPU. If left unspecified, the data format will be chosen ' - 'automatically based on whether TensorFlow was built for CPU or GPU.') -tf.app.flags.DEFINE_integer( - 'tf_random_seed', 20180417, 'Random seed for TensorFlow initializers.') -# checkpoint related configuration -tf.app.flags.DEFINE_string( - 'checkpoint_path', None, - 'The path to a checkpoint from which to fine-tune.') -tf.app.flags.DEFINE_string( - 'coarse_pred_path', None, - 'The path to a pred csv file from which to crop the input image for finer prediction.') -tf.app.flags.DEFINE_boolean( - 'flip_on_test', False, - 'Wether we will average predictions of left-right fliped image.') -tf.app.flags.DEFINE_string( - #'blouse', 'dress', 'outwear', 'skirt', 'trousers', 'all' - 'model_scope', 'blouse', - 'Model scope name used to replace the name_scope in checkpoint.') -tf.app.flags.DEFINE_boolean( - 'run_on_cloud', True, - 'Wether we will train on cloud.') -tf.app.flags.DEFINE_string( - 'model_to_eval', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', - 'The sub-model to eval (comma-separated list).') - -#--model_scope=blouse --checkpoint_path=./logs/blouse -FLAGS = tf.app.flags.FLAGS - -def preprocessing_fn(org_image, file_name, shape): - pd_df = None - if FLAGS.coarse_pred_path is not None: - tf.logging.info('Finetuning Prediction From {}.'.format(FLAGS.coarse_pred_path)) - if tf.gfile.Exists(FLAGS.coarse_pred_path): - tf.gfile.Copy(FLAGS.coarse_pred_path, './__coarse_pred.csv', overwrite=True) - pd_df = pd.read_csv('./__coarse_pred.csv', encoding='utf-8') - - all_filenames = [] - all_xmin = [] - all_ymin = [] - all_xmax = [] - all_ymax = [] - - all_values = pd_df.values.tolist() - for records in all_values: - all_filenames.append(records[0].encode('utf8')) - xmin = 2000 - ymin = 2000 - xmax = -1 - ymax = -1 - for kp in records[2:]: - keypoint_info = kp.strip().split('_') - if int(keypoint_info[2]) == -1: - continue - xmin = min(xmin, int(keypoint_info[0])) - ymin = min(ymin, int(keypoint_info[1])) - xmax = max(xmax, int(keypoint_info[0])) - ymax = max(ymax, int(keypoint_info[1])) - all_xmin.append(xmin) - all_ymin.append(ymin) - all_xmax.append(xmax) - all_ymax.append(ymax) - #print(all_filenames, all_xmin, all_ymin, all_xmax, all_ymax) - xmin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmin, dtype=tf.int64)), -1) - ymin_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymin, dtype=tf.int64)), -1) - xmax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_xmax, dtype=tf.int64)), -1) - ymax_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(all_filenames, dtype=tf.string), tf.constant(all_ymax, dtype=tf.int64)), -1) - pd_df = [xmin_table, ymin_table, xmax_table, ymax_table] - #pred_item['file_name'].encode('utf8') - - #lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), tf.constant(config.global_norm_lvalues, dtype=tf.int64)), 0) - return preprocessing.preprocess_for_test(org_image, file_name, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size, pred_df=pd_df) -def input_pipeline(model_scope=FLAGS.model_scope): - #preprocessing_fn = lambda org_image, shape: preprocessing.preprocess_for_test(org_image, shape, FLAGS.train_image_size, FLAGS.train_image_size, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) - - images, shape, file_name, classid, offsets = dataset.slim_test_get_split(FLAGS.data_dir, preprocessing_fn, FLAGS.num_readers, FLAGS.num_preprocessing_threads, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None) - - return {'images': images, 'shape': shape, 'classid': classid, 'file_name': file_name, 'pred_offsets': offsets} - -if config.PRED_DEBUG: - from scipy.misc import imread, imsave, imshow, imresize - def save_image_with_heatmap(image, height, width, heatmap_size, heatmap, predictions, indR, indG, indB): - if not hasattr(save_image_with_heatmap, "counter"): - save_image_with_heatmap.counter = 0 # it doesn't exist yet, so initialize it - save_image_with_heatmap.counter += 1 - - img_to_save = np.array(image.tolist()) + 120 - #print(img_to_save) - - img_to_save = img_to_save.astype(np.uint8) - - heatmap0 = np.sum(heatmap[indR, ...], axis=0).astype(np.uint8) - heatmap1 = np.sum(heatmap[indG, ...], axis=0).astype(np.uint8) - heatmap2 = np.sum(heatmap[indB, ...], axis=0).astype(np.uint8) if len(indB) > 0 else np.zeros((heatmap_size, heatmap_size), dtype=np.float32) - - img_to_save = imresize(img_to_save, (height, width), interp='lanczos') - heatmap0 = imresize(heatmap0, (height, width), interp='lanczos') - heatmap1 = imresize(heatmap1, (height, width), interp='lanczos') - heatmap2 = imresize(heatmap2, (height, width), interp='lanczos') - - img_to_save = img_to_save/2 - img_to_save[:,:,0] = np.clip((img_to_save[:,:,0] + heatmap0 + heatmap2), 0, 255) - img_to_save[:,:,1] = np.clip((img_to_save[:,:,1] + heatmap1 + heatmap2), 0, 255) - #img_to_save[:,:,2] = np.clip((img_to_save[:,:,2]/4. + heatmap2), 0, 255) - file_name = 'with_heatmap_{}.jpg'.format(save_image_with_heatmap.counter) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img_to_save.astype(np.uint8)) - - predictions = np.array(predictions.tolist()) - #print(predictions.shape) - for ind in range(predictions.shape[0]): - img = predictions[ind] - img = img - img.min() - img *= 255.0/img.max() - file_name = 'heatmap_{}_{}.jpg'.format(save_image_with_heatmap.counter, ind) - imsave(os.path.join(config.EVAL_DEBUG_DIR, file_name), img.astype(np.uint8)) - return save_image_with_heatmap.counter - -def gaussian_blur(inputs, inputs_filters, sigma, data_format, name=None): - with tf.name_scope(name, "gaussian_blur", [inputs]): - data_format_ = 'NHWC' if data_format=='channels_last' else 'NCHW' - if data_format_ == 'NHWC': - inputs = tf.transpose(inputs, [0, 2, 3, 1]) - ksize = int(6 * sigma + 1.) - x = tf.expand_dims(tf.range(ksize, delta=1, dtype=tf.float32), axis=1) - y = tf.transpose(x, [1, 0]) - kernel_matrix = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * sigma ** 2)) - #print(kernel_matrix) - kernel_filter = tf.reshape(kernel_matrix, [ksize, ksize, 1, 1]) - kernel_filter = tf.tile(kernel_filter, [1, 1, inputs_filters, 1]) - #kernel_filter = tf.transpose(kernel_filter, [1, 0, 2, 3]) - outputs = tf.nn.depthwise_conv2d(inputs, kernel_filter, strides=[1, 1, 1, 1], padding='SAME', data_format=data_format_, name='blur') - if data_format_ == 'NHWC': - outputs = tf.transpose(outputs, [0, 3, 1, 2]) - return outputs - -def get_keypoint(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - # expand_border = 10 - - # pad_pred = tf.pad(predictions, tf.constant([[0, 0], [0, 0], [expand_border, expand_border], [expand_border, expand_border]]), - # mode='CONSTANT', name='pred_padding', constant_values=0) - - # blur_pred = gaussian_blur(pad_pred, config.class_num_joints[category], 3.5, 'channels_first', 'pred_blur') - - # predictions = tf.slice(blur_pred, [0, 0, expand_border, expand_border], [1, config.class_num_joints[category], heatmap_size, heatmap_size]) - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_max_indices = tf.argmax(predictions, axis=-1) - pred_max_x, pred_max_y = tf.cast(tf.floormod(pred_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_max_indices, heatmap_size), tf.float32) - # mask the max elements to zero - mask_predictions = predictions * tf.one_hot(pred_max_indices, heatmap_size*heatmap_size, on_value=0., off_value=1., dtype=tf.float32) - # get the second max prediction - pred_next_max = tf.reduce_max(mask_predictions, axis=-1) - pred_next_max_indices = tf.argmax(mask_predictions, axis=-1) - pred_next_max_x, pred_next_max_y = tf.cast(tf.floormod(pred_next_max_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_next_max_indices, heatmap_size), tf.float32) - - dist = tf.pow(tf.pow(pred_next_max_x - pred_max_x, 2.) + tf.pow(pred_next_max_y - pred_max_y, 2.), .5) - - pred_x = tf.where(dist < 1e-3, pred_max_x, pred_max_x + (pred_next_max_x - pred_max_x) * 0.25 / dist) - pred_y = tf.where(dist < 1e-3, pred_max_y, pred_max_y + (pred_next_max_y - pred_max_y) * 0.25 / dist) - - pred_indices_ = tf.squeeze(tf.cast(pred_x, tf.int64) + tf.cast(pred_y, tf.int64) * heatmap_size) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - width_ratio, height_ratio = width / tf.cast(heatmap_size, tf.float32), height / tf.cast(heatmap_size, tf.float32) - - pred_x, pred_y = pred_x * width_ratio, pred_y * height_ratio - #pred_x, pred_y = pred_x * width_ratio + width_ratio/2., pred_y * height_ratio + height_ratio/2. - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -def get_keypoint_v0(image, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): - predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) - - pred_max = tf.reduce_max(predictions, axis=-1) - pred_indices = tf.argmax(predictions, axis=-1) - pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) - - width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) - pred_x, pred_y = pred_x * width / tf.cast(heatmap_size, tf.float32), pred_y * height / tf.cast(heatmap_size, tf.float32) - - if clip_at_zero: - pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) - pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) - pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) - - if config.PRED_DEBUG: - pred_indices_ = tf.squeeze(pred_indices) - image_ = tf.squeeze(image) * 255. - pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=255, off_value=0, axis=-1, dtype=tf.int32) - - pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) - if data_format == 'channels_first': - image_ = tf.transpose(image_, perm=(1, 2, 0)) - save_image_op = tf.py_func(save_image_with_heatmap, - [image_, height, width, - heatmap_size, - pred_heatmap, - tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), - config.left_right_group_map[category][0], - config.left_right_group_map[category][1], - config.left_right_group_map[category][2]], - tf.int64, stateful=True) - with tf.control_dependencies([save_image_op]): - pred_x, pred_y = pred_x * 1., pred_y * 1. - return pred_x, pred_y - -cpn_backbone = cpn.cascaded_pyramid_net -if 'seresnext50' in FLAGS.backbone: - cpn_backbone = cpn.xt_cascaded_pyramid_net - -def keypoint_model_fn(features, labels, mode, params): - #print(features) - shape = features['shape'] - classid = features['classid'] - pred_offsets = tf.to_float(features['pred_offsets']) - file_name = features['file_name'] - features = features['images'] - - file_name = tf.identity(file_name, name='current_file') - - if not params['flip_on_test']: - with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn_backbone(features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - else: - # test augumentation on the fly - if params['data_format'] == 'channels_last': - double_features = tf.reshape(tf.stack([features, tf.map_fn(tf.image.flip_left_right, features, back_prop=False)], axis = 1), [-1, params['train_image_size'], params['train_image_size'], 3]) - else: - double_features = tf.reshape(tf.stack([features, tf.transpose(tf.map_fn(tf.image.flip_left_right, tf.transpose(features, [0, 2, 3, 1], name='nchw2nhwc'), back_prop=False), [0, 3, 1, 2], name='nhwc2nchw')], axis = 1), [-1, 3, params['train_image_size'], params['train_image_size']]) - - num_joints = config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')] - with tf.variable_scope(params['model_scope'], default_name=None, values=[double_features], reuse=tf.AUTO_REUSE): - pred_outputs = cpn_backbone(double_features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) - - if params['data_format'] == 'channels_last': - pred_outputs = [tf.transpose(pred_outputs[ind], [0, 3, 1, 2], name='outputs_trans_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # [[0, 0, 0, ..], [1, 1, 1, ...], ...] - row_indices = tf.tile(tf.reshape(tf.range(tf.shape(double_features)[0]), [-1, 1]), [1, num_joints]) - # [[0, 1, 2, ...], [1, 0, 2, ...], [0, 1, 2], [1, 0, 2], ...] - col_indices = tf.reshape(tf.tile(tf.reshape(tf.stack([tf.range(num_joints), tf.constant(config.left_right_remap[(params['model_scope'] if 'all' not in params['model_scope'] else '*')])], axis=0), [-1]), [tf.shape(features)[0]]), [-1, num_joints]) - # [[[0, 0], [0, 1], [0, 2], ...], [[1, 1], [1, 0], [1, 2], ...], [[2, 0], [2, 1], [2, 2], ...], ...] - flip_indices=tf.stack([row_indices, col_indices], axis=-1) - - #flip_indices = tf.Print(flip_indices, [flip_indices], summarize=500) - pred_outputs = [tf.gather_nd(pred_outputs[ind], flip_indices, name='gather_nd_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - def cond_flip(heatmap_ind): - return tf.cond(heatmap_ind[1] < 1, lambda : heatmap_ind[0], lambda : tf.transpose(tf.image.flip_left_right(tf.transpose(heatmap_ind[0], [1, 2, 0], name='pred_nchw2nhwc')), [2, 0, 1], name='pred_nhwc2nchw')) - # all the heatmap of the fliped image should also be fliped back - pred_outputs = [tf.map_fn(cond_flip, [pred_outputs[ind], tf.tile(tf.reshape(tf.range(2), [-1]), [tf.shape(features)[0]])], dtype=tf.float32, parallel_iterations=10, back_prop=True, swap_memory=False, infer_shape=True, name='map_fn_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - # average predictions of left_reight_fliped image - segment_indices = tf.reshape(tf.tile(tf.reshape(tf.range(tf.shape(features)[0]), [-1, 1]), [1, 2]), [-1]) - pred_outputs = [tf.segment_mean(pred_outputs[ind], segment_indices, name='segment_mean_{}'.format(ind)) for ind in list(range(len(pred_outputs)))] - - pred_x, pred_y = get_keypoint(features, pred_outputs[-1], params['heatmap_size'], shape[0][0], shape[0][1], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=True, data_format=params['data_format']) - - predictions = {'pred_x': pred_x + pred_offsets[:, 0], 'pred_y': pred_y + pred_offsets[:, 1], 'file_name': file_name} - - if mode == tf.estimator.ModeKeys.PREDICT: - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=predictions, - loss=None, train_op=None) - else: - raise ValueError('Only "PREDICT" mode is supported.') - -def parse_comma_list(args): - return [float(s.strip()) for s in args.split(',')] - -def eval_each(model_fn, model_dir, model_scope, run_config): - fashionAI = tf.estimator.Estimator( - model_fn=model_fn, model_dir=model_dir, config=run_config, - params={ - 'train_image_size': FLAGS.train_image_size, - 'heatmap_size': FLAGS.heatmap_size, - 'data_format': FLAGS.data_format, - 'model_scope': model_scope, - 'flip_on_test': FLAGS.flip_on_test, - }) - #tf.logging.info('params recv: %s', FLAGS.flag_values_dict()) - - tensors_to_log = { - 'cur_file': 'current_file' - } - - logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps, formatter=lambda dicts: ', '.join(['%s=%s' % (k, v) for k, v in dicts.items()])) - tf.logging.info('Starting to predict model {}.'.format(model_scope)) - pred_results = fashionAI.predict(input_fn=lambda : input_pipeline(model_scope), hooks=[logging_hook], checkpoint_path=train_helper.get_latest_checkpoint_for_evaluate_(model_dir, model_dir)) - #tf.logging.info() - return list(pred_results) - -def main(_): - # Using the Winograd non-fused algorithms provides a small performance boost. - os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' - - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction) - sess_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = False, intra_op_parallelism_threads = FLAGS.num_cpu_threads, inter_op_parallelism_threads = FLAGS.num_cpu_threads, gpu_options = gpu_options) - - # Set up a RunConfig to only save checkpoints once per training cycle. - run_config = tf.estimator.RunConfig().replace( - save_checkpoints_secs=None).replace( - save_checkpoints_steps=None).replace( - save_summary_steps=FLAGS.save_summary_steps).replace( - keep_checkpoint_max=5).replace( - tf_random_seed=FLAGS.tf_random_seed).replace( - log_step_count_steps=FLAGS.log_every_n_steps).replace( - session_config=sess_config) - - model_to_eval = [s.strip() for s in FLAGS.model_to_eval.split(',')] - for m in model_to_eval: - if m == '': continue - pred_results = eval_each(keypoint_model_fn, os.path.join(FLAGS.model_dir, m), m, run_config) - #print(pred_results) - # collect result - df = pd.DataFrame(columns=['image_id', 'image_category'] + config.all_keys) - cur_record = 0 - gloabl2local_ind = dict(zip(config.class2global_ind_map[m], list(range(len(config.class2global_ind_map[m]))) )) - #print(gloabl2local_ind) - for pred_item in pred_results: - temp_list = [] - index = 0 - x = pred_item['pred_x'].tolist() - y = pred_item['pred_y'].tolist() - filename = pred_item['file_name'].decode('utf8') - for ind in list(range(config.class_num_joints['*'])): - if ind in gloabl2local_ind: - temp_list.append('{}_{}_1'.format(round(x[gloabl2local_ind[ind]]), round(y[gloabl2local_ind[ind]]))) - else: - temp_list.append('-1_-1_-1') - #Images/blouse/ab669925e96490ec698af976586f0b2f.jpg - df.loc[cur_record] = [filename, m] + temp_list - cur_record = cur_record + 1 - df.to_csv('./{}.csv'.format(m), encoding='utf-8', index=False) - - # merge dataframe - df_list = [pd.read_csv('./{}.csv'.format(model_to_eval[0]), encoding='utf-8')] - for m in model_to_eval[1:]: - if m == '': continue - df_list.append(pd.read_csv('./{}.csv'.format(m), encoding='utf-8')) - pd.concat(df_list, ignore_index=True).to_csv('./sub.csv', encoding='utf-8', index=False) - - if FLAGS.run_on_cloud: - tf.gfile.Copy('./sub.csv', os.path.join(FLAGS.model_dir, 'sub.csv'), overwrite=True) - -if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/net/hourglass_old.py b/net/hourglass_old.py deleted file mode 100644 index 68b69c2b..00000000 --- a/net/hourglass_old.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -_BATCH_NORM_DECAY = 0.9 -_BATCH_NORM_EPSILON = 1e-5 -_USE_FUSED_BN = True -#initializer_to_use = tf.glorot_uniform_initializer -initializer_to_use = tf.glorot_normal_initializer -conv_bn_initializer_to_use = tf.glorot_normal_initializer#lambda : tf.truncated_normal_initializer(mean=0.0, stddev=0.005) - -def batch_norm_relu(inputs, is_training, data_format, name=None): - """Performs a batch normalization followed by a ReLU.""" - # We set fused=True for a significant performance boost. See - # https://www.tensorflow.org/performance/performance_guide#common_fused_ops - inputs = tf.layers.batch_normalization( - inputs=inputs, axis=1 if data_format == 'channels_first' else 3, - momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, - scale=True, training=is_training, fused=_USE_FUSED_BN, name=name) - inputs = tf.nn.relu(inputs, name=name + '/relu' if name is not None else None) - return inputs - -def batch_norm(inputs, is_training, data_format, name=None): - """Performs a batch normalization followed by a ReLU.""" - # We set fused=True for a significant performance boost. See - # https://www.tensorflow.org/performance/performance_guide#common_fused_ops - inputs = tf.layers.batch_normalization( - inputs=inputs, axis=1 if data_format == 'channels_first' else 3, - momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, - scale=True, training=is_training, fused=_USE_FUSED_BN, name=name) - return inputs - -def fixed_padding(inputs, kernel_size, data_format): - pad_total = kernel_size - 1 - pad_beg = pad_total // 2 - pad_end = pad_total - pad_beg - - if data_format == 'channels_first': - padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], - [pad_beg, pad_end], [pad_beg, pad_end]]) - else: - padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], - [pad_beg, pad_end], [0, 0]]) - return padded_inputs - -# this is only can be used before BN -def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format, kernel_initializer=conv_bn_initializer_to_use, name=None): - """Strided 2-D convolution with explicit padding.""" - # The padding is consistent and is based only on `kernel_size`, not on the - # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). - if strides > 1: - inputs = fixed_padding(inputs, kernel_size, data_format) - - return tf.layers.conv2d( - inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, - padding=('same' if strides == 1 else 'valid'), use_bias=False, - kernel_initializer=kernel_initializer(), - data_format=data_format, name=name) - - -def bottleneck_block(inputs, in_filters, out_filters, is_training, data_format, name=None): - shortcut = inputs - inputs = batch_norm_relu(inputs, is_training, data_format, name=None if name is None else name+'_bn1') - - # The projection shortcut should come after the first batch norm and ReLU - # since it performs a 1x1 convolution. - # different from original hourglass - if in_filters != out_filters: - shortcut = conv2d_fixed_padding( - inputs=inputs, filters=out_filters, kernel_size=1, strides=1, - data_format=data_format, name=None if name is None else name+'_skip') - - inputs = conv2d_fixed_padding( - inputs=inputs, filters=out_filters//2, kernel_size=1, strides=1, - data_format=data_format, name=None if name is None else name+'_1x1_down') - inputs = batch_norm_relu(inputs, is_training, data_format, name=None if name is None else name+'_bn2') - - inputs = conv2d_fixed_padding( - inputs=inputs, filters=out_filters//2, kernel_size=3, strides=1, - data_format=data_format, name=None if name is None else name+'_3x3_conv') - inputs = batch_norm_relu(inputs, is_training, data_format, name=None if name is None else name+'_bn3') - - inputs = conv2d_fixed_padding( - inputs=inputs, filters=out_filters, kernel_size=1, strides=1, - data_format=data_format, name=None if name is None else name+'_1x1_up') - - return tf.add(inputs, shortcut, name=None if name is None else name+'_elem_add') - -def dozen_bottleneck_blocks(inputs, in_filters, out_filters, num_modules, is_training, data_format, name=None): - for m in range(num_modules): - inputs = bottleneck_block(inputs, in_filters, out_filters, is_training, data_format, name=None if name is None else name.format(m)) - - return inputs - -def hourglass(inputs, filters, is_training, data_format, deep_index=1, num_modules=1, name=None): - upchannal1 = dozen_bottleneck_blocks(inputs, filters, filters, num_modules, is_training, data_format, name=None if name is None else name+'_up_{}') - # upchannal1 = inputs - # for m in range(num_modules): - # upchannal1 = bottleneck_block(upchannal1, filters, filters, is_training, data_format, name=None if name is None else name+'_up_{}'.format(m)) - - downchannal1 = tf.layers.max_pooling2d(inputs=inputs, pool_size=2, strides=2, padding='valid', - data_format=data_format, name=None if name is None else name+'_down_pool') - - downchannal1 = dozen_bottleneck_blocks(downchannal1, filters, filters, num_modules, is_training, data_format, name=None if name is None else name+'_down1_{}') - # for m in range(num_modules): - # downchannal1 = bottleneck_block(downchannal1, filters, filters, is_training, data_format, name=None if name is None else name+'_down1_{}'.format(m)) - - if deep_index > 1: - downchannal2 = hourglass(downchannal1, filters, is_training, data_format, deep_index=deep_index-1, num_modules=num_modules, name=None if name is None else name+'_inner_{}'.format(deep_index)) - else: - downchannal2 = dozen_bottleneck_blocks(downchannal1, filters, filters, num_modules, is_training, data_format, name=None if name is None else name+'_down2_{}') - # downchannal2 = downchannal1 - # for m in range(num_modules): - # downchannal2 = bottleneck_block(downchannal2, filters, filters, is_training, data_format, name=None if name is None else name+'_down2_{}'.format(m)) - - downchannal3 = dozen_bottleneck_blocks(downchannal2, filters, filters, num_modules, is_training, data_format, name=None if name is None else name+'_down3_{}') - # downchannal3 = downchannal2 - # for m in range(num_modules): - # downchannal3 = bottleneck_block(downchannal3, filters, filters, is_training, data_format, name=None if name is None else name+'_down3_{}'.format(m)) - - if data_format == 'channels_first': - downchannal3 = tf.transpose(downchannal3, [0, 2, 3, 1], name=None if name is None else name+'_trans') - # for visualise - with tf.name_scope(name+'_get_shape', "get_shape", [downchannal3]) as scope: - input_shape = tf.shape(downchannal3)[-3:-1] * 2 - upchannal2 = tf.image.resize_bilinear(downchannal3, input_shape, name=None if name is None else name+'_resize') - if data_format == 'channels_first': - upchannal2 = tf.transpose(upchannal2, [0, 3, 1, 2], name=None if name is None else name+'_trans_inv') - - return tf.add(upchannal1, upchannal2, name=None if name is None else name+'_elem_add') - -def create_model(inputs, num_stack, feat_channals, output_channals, num_modules, is_training, data_format): - inputs = conv2d_fixed_padding(inputs=inputs, filters=64, kernel_size=7, strides=2, - data_format=data_format, kernel_initializer=conv_bn_initializer_to_use, name='precede/conv_7x7') - inputs = batch_norm_relu(inputs, is_training, data_format, name='precede/inputs_bn') - - inputs = bottleneck_block(inputs, 64, 128, is_training, data_format, name='precede/residual1') - inputs = tf.layers.max_pooling2d(inputs=inputs, pool_size=2, strides=2, padding='valid', - data_format=data_format, name='precede/pool') - - inputs = bottleneck_block(inputs, 128, 128, is_training, data_format, name='precede/residual2') - inputs = bottleneck_block(inputs, 128, feat_channals, is_training, data_format, name='precede/residual3') - - hg_inputs = inputs - outputs_list = [] - for stack_index in range(num_stack): - hg = hourglass(hg_inputs, feat_channals, is_training, data_format, deep_index=4, num_modules=num_modules, name='stack_{}/hg'.format(stack_index)) - - hg = dozen_bottleneck_blocks(hg, feat_channals, feat_channals, num_modules, is_training, data_format, name='stack_{}/'.format(stack_index) + 'output_{}') - # for m in range(num_modules): - # hg = bottleneck_block(hg, feat_channals, feat_channals, is_training, data_format, name='stack_{}/output_{}'.format(stack_index, m)) - - # produce prediction - output_scores = conv2d_fixed_padding(inputs=hg, filters=feat_channals, kernel_size=1, strides=1, data_format=data_format, name='stack_{}/output_1x1'.format(stack_index)) - output_scores = batch_norm_relu(output_scores, is_training, data_format, name='stack_{}/output_bn'.format(stack_index)) - - # produce heatmap from prediction - # use variable_scope to help model resotre name filter - heatmap = tf.layers.conv2d(inputs=output_scores, filters=output_channals, kernel_size=1, - strides=1, padding='same', use_bias=True, activation=None, - kernel_initializer=initializer_to_use(), - bias_initializer=tf.zeros_initializer(), - data_format=data_format, - name='hg_heatmap/stack_{}/heatmap_1x1'.format(stack_index)) - - - outputs_list.append(heatmap) - # no remap conv for the last hourglass - if stack_index < num_stack - 1: - output_scores_ = tf.layers.conv2d(inputs=output_scores, filters=feat_channals, kernel_size=1, - strides=1, padding='same', use_bias=True, activation=None, - kernel_initializer=initializer_to_use(), - bias_initializer=tf.zeros_initializer(), - data_format=data_format, - name='stack_{}/remap_outputs'.format(stack_index)) - # use variable_scope to help model resotre name filter - heatmap_ = tf.layers.conv2d(inputs=heatmap, filters=feat_channals, kernel_size=1, - strides=1, padding='same', use_bias=True, activation=None, - kernel_initializer=initializer_to_use(), - bias_initializer=tf.zeros_initializer(), - data_format=data_format, - name='hg_heatmap/stack_{}/remap_heatmap'.format(stack_index)) - - # next hourglass inputs - fused_heatmap = tf.add(output_scores_, heatmap_, 'stack_{}/fused_heatmap'.format(stack_index)) - hg_inputs = tf.add(hg_inputs, fused_heatmap, 'stack_{}/next_inputs'.format(stack_index)) - #hg_inputs = hg_inputs + output_scores_ + heatmap_ - - return outputs_list - - - - diff --git a/net/simple_xt.py b/net/simple_xt.py new file mode 100644 index 00000000..e39f2dbe --- /dev/null +++ b/net/simple_xt.py @@ -0,0 +1,392 @@ +# Copyright 2018 Changan Wang + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +import math + +_BATCH_NORM_DECAY = 0.9 +_BATCH_NORM_EPSILON = 1e-5 +_USE_FUSED_BN = True + +################################################################################ +# Convenience functions for building the ResNet model. +################################################################################ +def batch_norm(inputs, training, data_format, name=None): + """Performs a batch normalization using a standard set of parameters.""" + # We set fused=True for a significant performance boost. See + # https://www.tensorflow.org/performance/performance_guide#common_fused_ops + return tf.layers.batch_normalization( + inputs=inputs, axis=1 if data_format == 'channels_first' else 3, + momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, + scale=True, training=training, name=name, fused=_USE_FUSED_BN) + + +def fixed_padding(inputs, kernel_size, data_format): + """Pads the input along the spatial dimensions independently of input size. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + Should be a positive integer. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + A tensor with the same format as the input with the data either intact + (if kernel_size == 1) or padded (if kernel_size > 1). + """ + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + + if data_format == 'channels_first': + padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], + [pad_beg, pad_end], [pad_beg, pad_end]]) + else: + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], + [pad_beg, pad_end], [0, 0]]) + return padded_inputs + + +def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format, kernel_initializer=tf.glorot_uniform_initializer, name=None): + """Strided 2-D convolution with explicit padding.""" + # The padding is consistent and is based only on `kernel_size`, not on the + # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). + if strides > 1: + inputs = fixed_padding(inputs, kernel_size, data_format) + + return tf.layers.conv2d( + inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, + padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, + kernel_initializer=kernel_initializer(), + data_format=data_format, name=name) + +# input image order: BGR, range [0-255] +# mean_value: 104, 117, 123 +# only subtract mean is used +def constant_xavier_initializer(shape, group, dtype=tf.float32, uniform=True): + """Initializer function.""" + if not dtype.is_floating: + raise TypeError('Cannot create initializer for non-floating point type.') + # Estimating fan_in and fan_out is not possible to do perfectly, but we try. + # This is the right thing for matrix multiply and convolutions. + if shape: + fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) + fan_out = float(shape[-1])/group + else: + fan_in = 1.0 + fan_out = 1.0 + for dim in shape[:-2]: + fan_in *= float(dim) + fan_out *= float(dim) + + # Average number of inputs and output connections. + n = (fan_in + fan_out) / 2.0 + if uniform: + # To get stddev = math.sqrt(factor / n) need to adjust for uniform. + limit = math.sqrt(3.0 * 1.0 / n) + return tf.random_uniform(shape, -limit, limit, dtype, seed=None) + else: + # To get stddev = math.sqrt(factor / n) need to adjust for truncated. + trunc_stddev = math.sqrt(1.3 * 1.0 / n) + return tf.truncated_normal(shape, 0.0, trunc_stddev, dtype, seed=None) + +def wrapper_initlizer(shape, dtype=None, partition_info=None): + return constant_xavier_initializer(shape, 32, dtype) +# for root block, use dummy input_filters, e.g. 128 rather than 64 for the first block +def se_next_bottleneck_block(inputs, input_filters, name_prefix, is_training, group, data_format='channels_last', need_reduce=True, is_root=False, reduced_scale=16): + bn_axis = -1 if data_format == 'channels_last' else 1 + strides_to_use = 1 + residuals = inputs + if need_reduce: + strides_to_use = 1 if is_root else 2 + #print(strides_to_use) + proj_mapping = tf.layers.conv2d(inputs, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_proj', strides=(strides_to_use, strides_to_use), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + # print(proj_mapping) + residuals = tf.layers.batch_normalization(proj_mapping, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_proj/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + #print(strides_to_use) + reduced_inputs = tf.layers.conv2d(inputs, input_filters // 2, (1, 1), use_bias=False, + name=name_prefix + '_1x1_reduce', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + reduced_inputs_bn = tf.layers.batch_normalization(reduced_inputs, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_reduce/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + reduced_inputs_relu = tf.nn.relu(reduced_inputs_bn, name=name_prefix + '_1x1_reduce/relu') + + if data_format == 'channels_first': + reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [0, 0], [1, 1], [1, 1]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[1]//group, input_filters // 2] + if is_training: + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + else: + weight_ = tf.get_variable(name_prefix + '_3x3/kernel', shape=weight_shape, initializer=wrapper_initlizer, trainable=is_training) + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=1, name=name_prefix + '_inputs_split') + else: + reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [1, 1], [1, 1], [0, 0]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[-1]//group, input_filters // 2] + if is_training: + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + else: + weight_ = tf.get_variable(name_prefix + '_3x3/kernel', shape=weight_shape, initializer=wrapper_initlizer, trainable=is_training) + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=-1, name=name_prefix + '_inputs_split') + + convolved = [tf.nn.convolution(x, weight, padding='VALID', strides=[strides_to_use, strides_to_use], name=name_prefix + '_group_conv', + data_format=('NCHW' if data_format == 'channels_first' else 'NHWC')) for (x, weight) in zip(xs, weight_groups)] + + if data_format == 'channels_first': + conv3_inputs = tf.concat(convolved, axis=1, name=name_prefix + '_concat') + else: + conv3_inputs = tf.concat(convolved, axis=-1, name=name_prefix + '_concat') + + conv3_inputs_bn = tf.layers.batch_normalization(conv3_inputs, momentum=_BATCH_NORM_DECAY, name=name_prefix + '_3x3/bn', + axis=bn_axis, epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + conv3_inputs_relu = tf.nn.relu(conv3_inputs_bn, name=name_prefix + '_3x3/relu') + + + increase_inputs = tf.layers.conv2d(conv3_inputs_relu, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_increase', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + increase_inputs_bn = tf.layers.batch_normalization(increase_inputs, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_increase/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + + if data_format == 'channels_first': + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [2, 3], name=name_prefix + '_global_pool', keep_dims=True) + else: + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [1, 2], name=name_prefix + '_global_pool', keep_dims=True) + + down_inputs = tf.layers.conv2d(pooled_inputs, input_filters // reduced_scale, (1, 1), use_bias=True, + name=name_prefix + '_1x1_down', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + down_inputs_relu = tf.nn.relu(down_inputs, name=name_prefix + '_1x1_down/relu') + + + up_inputs = tf.layers.conv2d(down_inputs_relu, input_filters, (1, 1), use_bias=True, + name=name_prefix + '_1x1_up', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + prob_outputs = tf.nn.sigmoid(up_inputs, name=name_prefix + '_prob') + + rescaled_feat = tf.multiply(prob_outputs, increase_inputs_bn, name=name_prefix + '_mul') + pre_act = tf.add(residuals, rescaled_feat, name=name_prefix + '_add') + return tf.nn.relu(pre_act, name=name_prefix + '/relu') + +def dilated_se_next_bottleneck_block(inputs, input_filters, name_prefix, is_training, group, data_format='channels_last', need_reduce=True, reduced_scale=16): + bn_axis = -1 if data_format == 'channels_last' else 1 + residuals = inputs + if need_reduce: + proj_mapping = tf.layers.conv2d(inputs, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_proj', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + # print(proj_mapping) + residuals = tf.layers.batch_normalization(proj_mapping, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_proj/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + #print(strides_to_use) + reduced_inputs = tf.layers.conv2d(inputs, input_filters // 2, (1, 1), use_bias=False, + name=name_prefix + '_1x1_reduce', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + reduced_inputs_bn = tf.layers.batch_normalization(reduced_inputs, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_reduce/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + reduced_inputs_relu = tf.nn.relu(reduced_inputs_bn, name=name_prefix + '_1x1_reduce/relu') + + if data_format == 'channels_first': + #reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [0, 0], [1, 1], [1, 1]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[1]//group, input_filters // 2] + if is_training: + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + else: + weight_ = tf.get_variable(name_prefix + '_3x3/kernel', shape=weight_shape, initializer=wrapper_initlizer, trainable=is_training) + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=1, name=name_prefix + '_inputs_split') + else: + #reduced_inputs_relu = tf.pad(reduced_inputs_relu, paddings = [[0, 0], [1, 1], [1, 1], [0, 0]]) + weight_shape = [3, 3, reduced_inputs_relu.get_shape().as_list()[-1]//group, input_filters // 2] + if is_training: + weight_ = tf.Variable(constant_xavier_initializer(weight_shape, group=group, dtype=tf.float32), trainable=is_training, name=name_prefix + '_3x3/kernel') + else: + weight_ = tf.get_variable(name_prefix + '_3x3/kernel', shape=weight_shape, initializer=wrapper_initlizer, trainable=is_training) + weight_groups = tf.split(weight_, num_or_size_splits=group, axis=-1, name=name_prefix + '_weight_split') + xs = tf.split(reduced_inputs_relu, num_or_size_splits=group, axis=-1, name=name_prefix + '_inputs_split') + + # !!! before is VALID !!! + convolved = [tf.nn.convolution(x, weight, padding='SAME', strides=[1, 1], dilation_rate=[2, 2], name=name_prefix + '_group_conv', + data_format=('NCHW' if data_format == 'channels_first' else 'NHWC')) for (x, weight) in zip(xs, weight_groups)] + + if data_format == 'channels_first': + conv3_inputs = tf.concat(convolved, axis=1, name=name_prefix + '_concat') + else: + conv3_inputs = tf.concat(convolved, axis=-1, name=name_prefix + '_concat') + + conv3_inputs_bn = tf.layers.batch_normalization(conv3_inputs, momentum=_BATCH_NORM_DECAY, name=name_prefix + '_3x3/bn', + axis=bn_axis, epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + conv3_inputs_relu = tf.nn.relu(conv3_inputs_bn, name=name_prefix + '_3x3/relu') + + + increase_inputs = tf.layers.conv2d(conv3_inputs_relu, input_filters, (1, 1), use_bias=False, + name=name_prefix + '_1x1_increase', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + increase_inputs_bn = tf.layers.batch_normalization(increase_inputs, momentum=_BATCH_NORM_DECAY, + name=name_prefix + '_1x1_increase/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=is_training, reuse=None, fused=_USE_FUSED_BN) + + if data_format == 'channels_first': + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [2, 3], name=name_prefix + '_global_pool', keep_dims=True) + else: + pooled_inputs = tf.reduce_mean(increase_inputs_bn, [1, 2], name=name_prefix + '_global_pool', keep_dims=True) + + down_inputs = tf.layers.conv2d(pooled_inputs, input_filters // reduced_scale, (1, 1), use_bias=True, + name=name_prefix + '_1x1_down', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + down_inputs_relu = tf.nn.relu(down_inputs, name=name_prefix + '_1x1_down/relu') + + + up_inputs = tf.layers.conv2d(down_inputs_relu, input_filters, (1, 1), use_bias=True, + name=name_prefix + '_1x1_up', strides=(1, 1), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + prob_outputs = tf.nn.sigmoid(up_inputs, name=name_prefix + '_prob') + + rescaled_feat = tf.multiply(prob_outputs, increase_inputs_bn, name=name_prefix + '_mul') + pre_act = tf.add(residuals, rescaled_feat, name=name_prefix + '_add') + return tf.nn.relu(pre_act, name=name_prefix + '/relu') + +# the input image should in BGR order, note that this is not the common case in Tensorflow +def sext_backbone(input_image, istraining, data_format, net_depth=101, group=32): + bn_axis = -1 if data_format == 'channels_last' else 1 + + if data_format == 'channels_last': + image_channels = tf.unstack(input_image, axis=-1) + swaped_input_image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=-1) + else: + image_channels = tf.unstack(input_image, axis=1) + swaped_input_image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=1) + #swaped_input_image = input_image + + if net_depth not in [50, 101]: + raise TypeError('Only ResNeXt50 or ResNeXt101 is supprted now.') + + input_depth = [256, 512, 1024] # the input depth of the the first block is dummy input + num_units = [3, 4, 6] if net_depth==50 else [3, 4, 23] + block_name_prefix = ['conv2_{}', 'conv3_{}', 'conv4_{}'] + + if data_format == 'channels_first': + swaped_input_image = tf.pad(swaped_input_image, paddings = [[0, 0], [0, 0], [3, 3], [3, 3]]) + else: + swaped_input_image = tf.pad(swaped_input_image, paddings = [[0, 0], [3, 3], [3, 3], [0, 0]]) + + inputs_features = tf.layers.conv2d(swaped_input_image, input_depth[0]//4, (7, 7), use_bias=False, + name='conv1/7x7_s2', strides=(2, 2), + padding='valid', data_format=data_format, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer()) + #print(ee) + + inputs_features = tf.layers.batch_normalization(inputs_features, momentum=_BATCH_NORM_DECAY, + name='conv1/7x7_s2/bn', axis=bn_axis, + epsilon=_BATCH_NORM_EPSILON, training=istraining, reuse=None, fused=_USE_FUSED_BN) + inputs_features = tf.nn.relu(inputs_features, name='conv1/relu_7x7_s2') + + inputs_features = tf.layers.max_pooling2d(inputs_features, [3, 3], [2, 2], padding='same', data_format=data_format, name='pool1/3x3_s2') + + end_points = [] + is_root = True + for ind, num_unit in enumerate(num_units): + need_reduce = True + for unit_index in range(1, num_unit+1): + inputs_features = se_next_bottleneck_block(inputs_features, input_depth[ind], block_name_prefix[ind].format(unit_index), is_training=istraining, group=group, data_format=data_format, need_reduce=need_reduce, is_root=is_root) + need_reduce = False + is_root = False + end_points.append(inputs_features) + + # conv5 + need_reduce = True + for unit_index in range(1, 4): + inputs_features = dilated_se_next_bottleneck_block(inputs_features, 2048, 'conv5_{}'.format(unit_index), is_training=istraining, group=group, data_format=data_format, need_reduce=need_reduce) + need_reduce = False + end_points.append(inputs_features) + + #print(inputs) + return end_points + + +def simple_net(inputs, output_channals, heatmap_size, istraining, data_format, net_depth=101): + end_points = sext_backbone(inputs, istraining, data_format, net_depth=net_depth) + bn_axis = -1 if data_format == 'channels_last' else 1 + with tf.variable_scope('additional_layer', 'additional_layer', values=end_points, reuse=None): + inputs_features = tf.layers.conv2d_transpose(end_points[-1], 256, 4, strides=(2, 2), padding='same', + data_format=data_format, activation=None, use_bias=False, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=None, + kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, + kernel_constraint=None, bias_constraint=None, + trainable=istraining, name='deconv_1', reuse=None) + inputs_features = tf.layers.batch_normalization(inputs_features, momentum=_BATCH_NORM_DECAY, name='deconv_1_bn', + axis=bn_axis, epsilon=_BATCH_NORM_EPSILON, training=istraining, reuse=None, fused=_USE_FUSED_BN) + inputs_features = tf.nn.relu(inputs_features, name='deconv_1_relu') + + inputs_features = tf.layers.conv2d_transpose(inputs_features, 256, 4, strides=(2, 2), padding='same', + data_format=data_format, activation=None, use_bias=False, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=None, + kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, + kernel_constraint=None, bias_constraint=None, + trainable=istraining, name='deconv_2', reuse=None) + inputs_features = tf.layers.batch_normalization(inputs_features, momentum=_BATCH_NORM_DECAY, name='deconv_2_bn', + axis=bn_axis, epsilon=_BATCH_NORM_EPSILON, training=istraining, reuse=None, fused=_USE_FUSED_BN) + inputs_features = tf.nn.relu(inputs_features, name='deconv_2_relu') + + inputs_features = tf.layers.conv2d_transpose(inputs_features, 256, 4, strides=(2, 2), padding='same', + data_format=data_format, activation=None, use_bias=False, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=None, + kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, + kernel_constraint=None, bias_constraint=None, + trainable=istraining, name='deconv_3', reuse=None) + inputs_features = tf.layers.batch_normalization(inputs_features, momentum=_BATCH_NORM_DECAY, name='deconv_3_bn', + axis=bn_axis, epsilon=_BATCH_NORM_EPSILON, training=istraining, reuse=None, fused=_USE_FUSED_BN) + inputs_features = tf.nn.relu(inputs_features, name='deconv_3_relu') + + heatmap = tf.layers.conv2d(inputs=inputs_features, filters=output_channals, kernel_size=1, + strides=1, padding='same', use_bias=True, activation=None, + kernel_initializer=tf.contrib.layers.xavier_initializer(), + bias_initializer=tf.zeros_initializer(), + data_format=data_format, + name='heatmap_1x1') + + return [heatmap] diff --git a/test.py b/test.py deleted file mode 100644 index d1e3deb6..00000000 --- a/test.py +++ /dev/null @@ -1,181 +0,0 @@ - -# import requests - -# def download_file_from_google_drive(id, destination): -# def get_confirm_token(response): -# for key, value in response.cookies.items(): -# if key.startswith('download_warning'): -# return value - -# return None - -# def save_response_content(response, destination): -# CHUNK_SIZE = 32768 - -# with open(destination, "wb") as f: -# for chunk in response.iter_content(CHUNK_SIZE): -# if chunk: # filter out keep-alive new chunks -# f.write(chunk) - -# URL = "https://docs.google.com/uc?export=download" - -# session = requests.Session() - -# response = session.get(URL, params = { 'id' : id }, stream = True) -# token = get_confirm_token(response) - -# if token: -# params = { 'id' : id, 'confirm' : token } -# response = session.get(URL, params = params, stream = True) - -# save_response_content(response, destination) - -# #?id= -# if __name__ == "__main__": - -# # TAKE ID FROM SHAREABLE LINK -# file_id = '1AwG0nWFUrikd17xQpTmAj2LcwK-MbNqJ' -# # DESTINATION FILE ON YOUR DISK -# destination = './dd.txt' -# download_file_from_google_drive(file_id, destination) - -import torch.nn.functional as F -import torch -print(F.smooth_l1_loss(torch.Tensor([[21]]), torch.Tensor([[0]]), size_average=False).data[0]) -print(F.smooth_l1_loss(torch.Tensor([[21,22,23,24]]), torch.Tensor([[0,0,0,0]]), size_average=False).data[0]) -print(F.smooth_l1_loss(torch.Tensor([[11,12,13,14]]), torch.Tensor([[0,0,0,0]]), size_average=False).data[0]) -print(F.smooth_l1_loss(torch.Tensor([[21,22,23,24], [11,12,13,14]]), torch.Tensor([[0,0,0,0],[0,0,0,0]]), size_average=False).data[0]) - - -import pandas as pd - -df = pd.read_csv("G:/preds.csv", header=0) - -df['real_class'] = df['real_class'].astype('int') -df['pred_class'] = df['pred_class'].astype('int') - -df['equal'] = df['real_class'] == df['pred_class'] - -acc_by_cls = df.groupby(['real_class'])['equal'].mean().reset_index() -acc_by_cls.columns=['class', 'acc'] - -print(acc_by_cls) -import tensorflow as tf -import numpy as np - -mask = np.array([[True, False, True], [False, True, True]]) - - - -targets = tf.constant([[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[115, 116], [117, 118]]], - [[[11, 12], [13, 14]], [[15, 16], [17, 18]], [[25, 26], [27, 28]]]]) - -a = tf.boolean_mask(targets, mask) # [[1, 2], [5, 6]] - -sess = tf.Session() - -with sess.as_default(): - print(a.eval()) - - -pred_outputs = tf.zeros_like(targets) - -sss = tf.losses.mean_squared_error(targets, pred_outputs, weights=1.0, - loss_collection=None, - reduction=tf.losses.Reduction.NONE) -num_topk = 2 -sss = tf.reduce_mean(tf.reshape(sss, [2, 2, -1]), axis=-1) -gather_col = tf.nn.top_k(sss, k=num_topk, sorted=True)[1] - -gather_row = tf.reshape(tf.tile(tf.reshape(tf.range(2), [-1, 1]), [1, num_topk]), [-1, 1]) - -gather_indcies = tf.stack([gather_row, tf.reshape(gather_col, [-1, 1])], axis=-1) - -select_heatmap = tf.gather_nd(targets, gather_indcies) - -sess = tf.Session() -table = tf.contrib.lookup.HashTable( - tf.contrib.lookup.KeyValueTensorInitializer(tf.constant([0,1,2], dtype=tf.int64), tf.constant([1,2,-1], dtype=tf.int64)), 0) -out = table.lookup(tf.constant([0,1,2,3,4], dtype=tf.int64)) -sess.run(tf.group([tf.local_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer()])) -with sess.as_default(): - #table.init.run() - print(sss.eval()) - print(gather_col.eval()) - print(gather_row.eval()) - print(gather_indcies.eval()) - print(select_heatmap.eval()) - - - -heatmap_sigma = 1. -ksize = 6 * heatmap_sigma + 1. - -x = tf.expand_dims(tf.range(tf.round(ksize), delta=1, dtype=tf.float32), axis=1) -y = tf.transpose(x, [1, 0]) -hhh = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * heatmap_sigma ** 2)) - - - -hhh_filter = tf.reshape(hhh, [tf.round(ksize), tf.round(ksize), 1, 1]) -hhh_filter = tf.transpose(hhh_filter, [1, 0, 2, 3]) - - -filtered_x = tf.nn.conv2d(image_resized, sobel_x_filter, - strides=[1, 1, 1, 1], padding='SAME', data_format='NHWC', - dilations=[1, 1, 1, 1], - name=None) - - - - - - - -pred = tf.constant([[[[1.11,1.12],[1.13,1.14]], [[1.21,1.22],[1.23,1.24]]], [[[2.11,2.12],[2.13,2.14]], [[2.21,2.22],[2.23,2.24]]]]) -pred = tf.segment_mean(pred, [0,0]) - -im = tf.constant([[[[1.11,1.12,1.],[1.13,1.14,1.]], [[1.21,1.22,1.],[1.23,1.24,1.]]], [[[2.11,2.12,1.],[2.13,2.14,1.]], [[2.21,2.22,1.],[2.23,2.24,1.]]]]) -#pred = tf.reshape(tf.stack([im, im], axis=1), [-1, 2, 2, 3]) -aaaaaaa1 = tf.tile(tf.reshape(tf.range(tf.shape(pred)[0]), [-1, 1]), [1, 2]) -aaaaaaa2=tf.stack([aaaaaaa1,tf.constant([[1,0],[0,1]])], axis=-1) -indcesss = tf.constant([[[0,1],[0,0]],[[1,1],[1,0]]]) - -aaaaaaa = tf.gather_nd(pred, aaaaaaa2) - -d = {'d':12, 'f':0.12344, 'c':555.5767} -items = d.items() -print(', '.join(['%s=%.6f' % (k, v) for k,v in items])) - -heatmap_size=64 -pred_heatmap = tf.one_hot([34*heatmap_size+23, 1*heatmap_size+60, 32*heatmap_size+1], heatmap_size*heatmap_size, on_value=1., off_value=0., axis=-1, dtype=tf.float32) -pred_max = tf.reduce_max(pred_heatmap, axis=-1) -pred_indices = tf.argmax(pred_heatmap, axis=-1) -pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) - - -a = tf.losses.mean_squared_error(tf.constant([[0,1],[1,2],[2,3],[3,4],[4,5]], dtype=tf.int64),tf.constant([[1,2],[2,3],[3,4],[4,5],[5,6]], dtype=tf.int64), weights=1.0/5., loss_collection=None, reduction=tf.losses.Reduction.MEAN) - - -aa = tf.reduce_sum(tf.squared_difference(tf.constant([[0,1],[1,2],[2,3],[3,4],[4,5]], dtype=tf.int64),tf.constant([[1,2],[2,3],[3,4],[4,5],[5,6]], dtype=tf.int64)), axis=-1) -b = tf.metrics.mean_absolute_error(aa, tf.zeros_like(aa)) -#tf.metrics.mean_squared_error(, - #weights=1.0*2, - #name='last_pred_mse') -sess = tf.Session() -table = tf.contrib.lookup.HashTable( - tf.contrib.lookup.KeyValueTensorInitializer(tf.constant([0,1,2], dtype=tf.int64), tf.constant([1,2,-1], dtype=tf.int64)), 0) -out = table.lookup(tf.constant([0,1,2,3,4], dtype=tf.int64)) -sess.run(tf.group([tf.local_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer()])) -with sess.as_default(): - #table.init.run() - print(pred.eval()) - print(aaaaaaa1.eval()) - print(aaaaaaa2.eval()) - #print(b[0].eval()) - # print(a.eval()) - # print(b[1].eval()) - # print(pred_x.eval()) - - # print(pred_y.eval()) - diff --git a/train_simplenet_onebyone.py b/train_simplenet_onebyone.py new file mode 100644 index 00000000..b0e26b09 --- /dev/null +++ b/train_simplenet_onebyone.py @@ -0,0 +1,553 @@ +# Copyright 2018 Changan Wang + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import numpy as np +#from scipy.misc import imread, imsave, imshow, imresize +import tensorflow as tf + +import tf_replicate_model_fn + +from net import simple_xt + +from utility import train_helper +from utility import mertric + +from preprocessing import preprocessing +from preprocessing import dataset +import config + +# hardware related configuration +tf.app.flags.DEFINE_integer( + 'num_readers', 16,#16 + 'The number of parallel readers that read data from the dataset.') +tf.app.flags.DEFINE_integer( + 'num_preprocessing_threads', 48,#48 + 'The number of threads used to create the batches.') +tf.app.flags.DEFINE_integer( + 'num_cpu_threads', 0, + 'The number of cpu cores used to train.') +tf.app.flags.DEFINE_float( + 'gpu_memory_fraction', 1., 'GPU memory fraction to use.') +# scaffold related configuration +tf.app.flags.DEFINE_string( + 'data_dir', '../Datasets/tfrecords',#'/media/rs/0E06CD1706CD0127/Kapok/Chi/Datasets/tfrecords', + 'The directory where the dataset input data is stored.') +tf.app.flags.DEFINE_string( + 'dataset_name', '{}_????', 'The pattern of the dataset name to load.') +tf.app.flags.DEFINE_string( + 'model_dir', './logs_simple_net/', + 'The parent directory where the model will be stored.') +tf.app.flags.DEFINE_integer( + 'log_every_n_steps', 10, + 'The frequency with which logs are print.') +tf.app.flags.DEFINE_integer( + 'save_summary_steps', 100, + 'The frequency with which summaries are saved, in seconds.') +tf.app.flags.DEFINE_integer( + 'save_checkpoints_steps', 8000, + 'The frequency with which the model is saved, in steps.') +# model related configuration +tf.app.flags.DEFINE_integer( + 'train_image_size', 384, + 'The size of the input image for the model to use.') +tf.app.flags.DEFINE_integer( + 'heatmap_size', 192, + 'The size of the output heatmap of the model.') +tf.app.flags.DEFINE_float( + 'heatmap_sigma', 1., + 'The sigma of Gaussian which generate the target heatmap.') +tf.app.flags.DEFINE_float( + 'bbox_border', 25., + 'The nearest distance of the crop border to al keypoints.') +tf.app.flags.DEFINE_integer( + 'batch_size', 8, + 'Batch size for training and evaluation.') +tf.app.flags.DEFINE_boolean( + 'use_ohkm', True, + 'Wether we will use the ohkm for hard keypoints.') +tf.app.flags.DEFINE_string( + 'data_format', 'channels_first', # 'channels_first' or 'channels_last' + 'A flag to override the data format used in the model. channels_first ' + 'provides a performance boost on GPU but is not always compatible ' + 'with CPU. If left unspecified, the data format will be chosen ' + 'automatically based on whether TensorFlow was built for CPU or GPU.') +# optimizer related configuration +tf.app.flags.DEFINE_integer( + 'tf_random_seed', 20180417, 'Random seed for TensorFlow initializers.') +tf.app.flags.DEFINE_float( + 'weight_decay', 1e-5, 'The weight decay on the model weights.') +tf.app.flags.DEFINE_float( + 'mse_weight', 1., 'The weight decay on the model weights.') +tf.app.flags.DEFINE_float( + 'momentum', 0.9, + 'The momentum for the MomentumOptimizer and RMSPropOptimizer.') +tf.app.flags.DEFINE_float('learning_rate', 8e-3, 'Initial learning rate.')#1e-3 +tf.app.flags.DEFINE_float( + 'end_learning_rate', 0.0000001, + 'The minimal end learning rate used by a polynomial decay learning rate.') +tf.app.flags.DEFINE_float( + 'warmup_learning_rate', 0.0002, + 'The start warm-up learning rate to avoid NAN.') +tf.app.flags.DEFINE_integer( + 'warmup_steps', 100, + 'The total steps to warm-up.') +# for learning rate piecewise_constant decay +tf.app.flags.DEFINE_string( + 'decay_boundaries', '2, 3', + 'Learning rate decay boundaries by global_step (comma-separated list).') +tf.app.flags.DEFINE_string( + 'lr_decay_factors', '1, 0.5, 0.1', + 'The values of learning_rate decay factor for each segment between boundaries (comma-separated list).') +# checkpoint related configuration +tf.app.flags.DEFINE_string( + 'checkpoint_path', './model/seresnext101', + 'The path to a checkpoint from which to fine-tune.') +tf.app.flags.DEFINE_string( + 'checkpoint_model_scope', '', + 'Model scope in the checkpoint. None if the same as the trained model.') +tf.app.flags.DEFINE_string( + #'blouse', 'dress', 'outwear', 'skirt', 'trousers', 'all' + 'model_scope', None, + 'Model scope name used to replace the name_scope in checkpoint.') +tf.app.flags.DEFINE_string( + 'checkpoint_exclude_scopes', None, + 'Comma-separated list of scopes of variables to exclude when restoring from a checkpoint.') +tf.app.flags.DEFINE_boolean( + 'ignore_missing_vars', True, + 'When restoring a checkpoint would ignore missing variables.') +tf.app.flags.DEFINE_boolean( + 'run_on_cloud', True, + 'Wether we will train on cloud.') +tf.app.flags.DEFINE_boolean( + 'multi_gpu', True, + 'Wether we will use multi-GPUs to train.') +tf.app.flags.DEFINE_string( + 'cloud_checkpoint_path', 'seresnext101', + 'The path to a checkpoint from which to fine-tune.') +tf.app.flags.DEFINE_string( + 'model_to_train', 'blouse, dress, outwear, skirt, trousers', #'all, blouse, dress, outwear, skirt, trousers', 'skirt, dress, outwear, trousers', + 'The sub-model to train (comma-separated list).') + +FLAGS = tf.app.flags.FLAGS +#--model_scope=blouse --checkpoint_path=./logs/all --data_format=channels_last --batch_size=1 + +def validate_batch_size_for_multi_gpu(batch_size): + """For multi-gpu, batch-size must be a multiple of the number of + available GPUs. + + Note that this should eventually be handled by replicate_model_fn + directly. Multi-GPU support is currently experimental, however, + so doing the work here until that feature is in place. + """ + if not FLAGS.multi_gpu: + return 0 + + from tensorflow.python.client import device_lib + + local_device_protos = device_lib.list_local_devices() + num_gpus = sum([1 for d in local_device_protos if d.device_type == 'GPU']) + if not num_gpus: + raise ValueError('Multi-GPU mode was specified, but no GPUs ' + 'were found. To use CPU, run without --multi_gpu=False.') + + remainder = batch_size % num_gpus + if remainder: + err = ('When running with multiple GPUs, batch size ' + 'must be a multiple of the number of available GPUs. ' + 'Found {} GPUs with a batch size of {}; try --batch_size={} instead.' + ).format(num_gpus, batch_size, batch_size - remainder) + raise ValueError(err) + return num_gpus + +def input_pipeline(is_training=True, model_scope=FLAGS.model_scope, num_epochs=None): + if 'all' in model_scope: + lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), + tf.constant(config.global_norm_lvalues, dtype=tf.int64)), 0) + rnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.global_norm_key, dtype=tf.int64), + tf.constant(config.global_norm_rvalues, dtype=tf.int64)), 1) + else: + lnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.local_norm_key, dtype=tf.int64), + tf.constant(config.local_norm_lvalues, dtype=tf.int64)), 0) + rnorm_table = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(tf.constant(config.local_norm_key, dtype=tf.int64), + tf.constant(config.local_norm_rvalues, dtype=tf.int64)), 1) + + preprocessing_fn = lambda org_image, classid, shape, key_x, key_y, key_v: preprocessing.preprocess_image(org_image, classid, shape, FLAGS.train_image_size, FLAGS.train_image_size, key_x, key_y, key_v, (lnorm_table, rnorm_table), is_training=is_training, data_format=('NCHW' if FLAGS.data_format=='channels_first' else 'NHWC'), category=(model_scope if 'all' not in model_scope else '*'), bbox_border=FLAGS.bbox_border, heatmap_sigma=FLAGS.heatmap_sigma, heatmap_size=FLAGS.heatmap_size) + + images, shape, classid, targets, key_v, isvalid, norm_value = dataset.slim_get_split(FLAGS.data_dir, preprocessing_fn, FLAGS.batch_size, FLAGS.num_readers, FLAGS.num_preprocessing_threads, num_epochs=num_epochs, is_training=is_training, file_pattern=FLAGS.dataset_name, category=(model_scope if 'all' not in model_scope else '*'), reader=None) + + return images, {'targets': targets, 'key_v': key_v, 'shape': shape, 'classid': classid, 'isvalid': isvalid, 'norm_value': norm_value} + +if config.PRED_DEBUG: + from scipy.misc import imread, imsave, imshow, imresize + def save_image_with_heatmap(image, height, width, heatmap_size, targets, pred_heatmap, indR, indG, indB): + if not hasattr(save_image_with_heatmap, "counter"): + save_image_with_heatmap.counter = 0 # it doesn't exist yet, so initialize it + save_image_with_heatmap.counter += 1 + + img_to_save = np.array(image.tolist()) + 128 + #print(img_to_save.shape) + + img_to_save = img_to_save.astype(np.uint8) + + heatmap0 = np.sum(targets[indR, ...], axis=0).astype(np.uint8) + heatmap1 = np.sum(targets[indG, ...], axis=0).astype(np.uint8) + heatmap2 = np.sum(targets[indB, ...], axis=0).astype(np.uint8) if len(indB) > 0 else np.zeros((heatmap_size, heatmap_size), dtype=np.float32) + + img_to_save = imresize(img_to_save, (height, width), interp='lanczos') + heatmap0 = imresize(heatmap0, (height, width), interp='lanczos') + heatmap1 = imresize(heatmap1, (height, width), interp='lanczos') + heatmap2 = imresize(heatmap2, (height, width), interp='lanczos') + + img_to_save = img_to_save/2 + img_to_save[:,:,0] = np.clip((img_to_save[:,:,0] + heatmap0 + heatmap2), 0, 255) + img_to_save[:,:,1] = np.clip((img_to_save[:,:,1] + heatmap1 + heatmap2), 0, 255) + #img_to_save[:,:,2] = np.clip((img_to_save[:,:,2]/4. + heatmap2), 0, 255) + file_name = 'targets_{}.jpg'.format(save_image_with_heatmap.counter) + imsave(os.path.join(config.DEBUG_DIR, file_name), img_to_save.astype(np.uint8)) + + pred_heatmap = np.array(pred_heatmap.tolist()) + #print(pred_heatmap.shape) + for ind in range(pred_heatmap.shape[0]): + img = pred_heatmap[ind] + img = img - img.min() + img *= 255.0/img.max() + file_name = 'heatmap_{}_{}.jpg'.format(save_image_with_heatmap.counter, ind) + imsave(os.path.join(config.DEBUG_DIR, file_name), img.astype(np.uint8)) + return save_image_with_heatmap.counter + +def get_keypoint(image, targets, predictions, heatmap_size, height, width, category, clip_at_zero=True, data_format='channels_last', name=None): + predictions = tf.reshape(predictions, [1, -1, heatmap_size*heatmap_size]) + + pred_max = tf.reduce_max(predictions, axis=-1) + pred_indices = tf.argmax(predictions, axis=-1) + pred_x, pred_y = tf.cast(tf.floormod(pred_indices, heatmap_size), tf.float32), tf.cast(tf.floordiv(pred_indices, heatmap_size), tf.float32) + + width, height = tf.cast(width, tf.float32), tf.cast(height, tf.float32) + pred_x, pred_y = pred_x * width / tf.cast(heatmap_size, tf.float32), pred_y * height / tf.cast(heatmap_size, tf.float32) + + if clip_at_zero: + pred_x, pred_y = pred_x * tf.cast(pred_max>0, tf.float32), pred_y * tf.cast(pred_max>0, tf.float32) + pred_x = pred_x * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (width / 2.) + pred_y = pred_y * tf.cast(pred_max>0, tf.float32) + tf.cast(pred_max<=0, tf.float32) * (height / 2.) + + if config.PRED_DEBUG: + pred_indices_ = tf.squeeze(pred_indices) + image_ = tf.squeeze(image) * 255. + pred_heatmap = tf.one_hot(pred_indices_, heatmap_size*heatmap_size, on_value=1., off_value=0., axis=-1, dtype=tf.float32) + + pred_heatmap = tf.reshape(pred_heatmap, [-1, heatmap_size, heatmap_size]) + if data_format == 'channels_first': + image_ = tf.transpose(image_, perm=(1, 2, 0)) + save_image_op = tf.py_func(save_image_with_heatmap, + [image_, height, width, + heatmap_size, + tf.reshape(pred_heatmap * 255., [-1, heatmap_size, heatmap_size]), + tf.reshape(predictions, [-1, heatmap_size, heatmap_size]), + config.left_right_group_map[category][0], + config.left_right_group_map[category][1], + config.left_right_group_map[category][2]], + tf.int64, stateful=True) + with tf.control_dependencies([save_image_op]): + pred_x, pred_y = pred_x * 1., pred_y * 1. + return pred_x, pred_y + +def gaussian_blur(inputs, inputs_filters, sigma, data_format, name=None): + with tf.name_scope(name, "gaussian_blur", [inputs]): + data_format_ = 'NHWC' if data_format=='channels_last' else 'NCHW' + if data_format_ == 'NHWC': + inputs = tf.transpose(inputs, [0, 2, 3, 1]) + ksize = int(6 * sigma + 1.) + x = tf.expand_dims(tf.range(ksize, delta=1, dtype=tf.float32), axis=1) + y = tf.transpose(x, [1, 0]) + kernel_matrix = tf.exp(- ((x - ksize/2.) ** 2 + (y - ksize/2.) ** 2) / (2 * sigma ** 2)) + #print(kernel_matrix) + kernel_filter = tf.reshape(kernel_matrix, [ksize, ksize, 1, 1]) + kernel_filter = tf.tile(kernel_filter, [1, 1, inputs_filters, 1]) + #kernel_filter = tf.transpose(kernel_filter, [1, 0, 2, 3]) + outputs = tf.nn.depthwise_conv2d(inputs, kernel_filter, strides=[1, 1, 1, 1], padding='SAME', data_format=data_format_, name='blur') + if data_format_ == 'NHWC': + outputs = tf.transpose(outputs, [0, 3, 1, 2]) + return outputs + +def keypoint_model_fn(features, labels, mode, params): + targets = labels['targets'] + shape = labels['shape'] + classid = labels['classid'] + key_v = labels['key_v'] + isvalid = labels['isvalid'] + norm_value = labels['norm_value'] + + cur_batch_size = tf.shape(features)[0] + #features= tf.ones_like(features) + + with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): + pred_outputs = simple_xt.simple_net(features, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], params['heatmap_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'])[0] + + if params['data_format'] == 'channels_last': + pred_outputs = tf.transpose(pred_outputs, [0, 3, 1, 2], name='outputs_trans') + + score_map = pred_outputs + + pred_x, pred_y = get_keypoint(features, targets, score_map, params['heatmap_size'], params['train_image_size'], params['train_image_size'], (params['model_scope'] if 'all' not in params['model_scope'] else '*'), clip_at_zero=True, data_format=params['data_format']) + + # this is important!!! + targets = 255. * targets + + #with tf.control_dependencies([pred_x, pred_y]): + ne_mertric = mertric.normalized_error(targets, score_map, norm_value, key_v, isvalid, + cur_batch_size, + config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], + params['heatmap_size'], + params['train_image_size']) + + all_visible = tf.expand_dims(tf.expand_dims(tf.cast(tf.logical_and(key_v>0, isvalid>0), tf.float32), axis=-1), axis=-1) + targets = targets * all_visible + pred_outputs = pred_outputs * all_visible + + sq_diff = tf.reduce_sum(tf.squared_difference(targets, pred_outputs), axis=-1) + last_pred_mse = tf.metrics.mean_absolute_error(sq_diff, tf.zeros_like(sq_diff), name='last_pred_mse') + + metrics = {'normalized_error': ne_mertric, 'last_pred_mse':last_pred_mse} + predictions = {'normalized_error': ne_mertric[1]} + ne_mertric = tf.identity(ne_mertric[1], name='ne_mertric') + + base_learning_rate = params['learning_rate'] + mse_loss_list = [] + if params['use_ohkm']: + base_learning_rate = 1. * base_learning_rate + temp_loss = tf.reduce_mean(tf.reshape(tf.losses.mean_squared_error(targets, pred_outputs, weights=1.0, loss_collection=None, reduction=tf.losses.Reduction.NONE), [cur_batch_size, config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')], -1]), axis=-1) + + num_topk = config.class_num_joints[(params['model_scope'] if 'all' not in params['model_scope'] else '*')] // 2 + gather_col = tf.nn.top_k(temp_loss, k=num_topk, sorted=True)[1] + gather_row = tf.reshape(tf.tile(tf.reshape(tf.range(cur_batch_size), [-1, 1]), [1, num_topk]), [-1, 1]) + gather_indcies = tf.stop_gradient(tf.stack([gather_row, tf.reshape(gather_col, [-1, 1])], axis=-1)) + + select_targets = tf.gather_nd(targets, gather_indcies) + select_heatmap = tf.gather_nd(pred_outputs, gather_indcies) + + mse_loss_list.append(tf.losses.mean_squared_error(select_targets, select_heatmap, + weights=1.0 / tf.cast(cur_batch_size, tf.float32), + scope='loss', + loss_collection=None,#tf.GraphKeys.LOSSES, + # mean all elements of all pixels in all batch + reduction=tf.losses.Reduction.MEAN)) + else: + mse_loss_list.append(tf.losses.mean_squared_error(targets, pred_outputs, + weights=1.0 / tf.cast(cur_batch_size, tf.float32), + scope='loss', + loss_collection=None,#tf.GraphKeys.LOSSES, + # mean all elements of all pixels in all batch + reduction=tf.losses.Reduction.MEAN))# SUM, SUM_OVER_BATCH_SIZE, default mean by all elements + + mse_loss = tf.multiply(params['mse_weight'], tf.add_n(mse_loss_list), name='mse_loss') + tf.summary.scalar('mse', mse_loss) + tf.losses.add_loss(mse_loss) + + # bce_loss_list = [] + # for pred_ind in list(range(len(pred_outputs))): + # bce_loss_list.append(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_outputs[pred_ind], labels=targets_list[pred_ind]/255., name='loss_{}'.format(pred_ind)), name='loss_mean_{}'.format(pred_ind))) + + # mse_loss = tf.multiply(params['mse_weight'] / params['num_stacks'], tf.add_n(bce_loss_list), name='mse_loss') + # tf.summary.scalar('mse', mse_loss) + # tf.losses.add_loss(mse_loss) + + # Add weight decay to the loss. We exclude the batch norm variables because + # doing so leads to a small improvement in accuracy. + loss = mse_loss + params['weight_decay'] * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name]) + total_loss = tf.identity(loss, name='total_loss') + tf.summary.scalar('loss', total_loss) + + if mode == tf.estimator.ModeKeys.EVAL: + return tf.estimator.EstimatorSpec(mode=mode, loss=loss, predictions=predictions, eval_metric_ops=metrics) + + if mode == tf.estimator.ModeKeys.TRAIN: + global_step = tf.train.get_or_create_global_step() + + lr_values = [params['warmup_learning_rate']] + [base_learning_rate * decay for decay in params['lr_decay_factors']] + learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), + [params['warmup_steps']] + [int(float(ep)*params['steps_per_epoch']) for ep in params['decay_boundaries']], + lr_values) + truncated_learning_rate = tf.maximum(learning_rate, tf.constant(params['end_learning_rate'], dtype=learning_rate.dtype), name='learning_rate') + tf.summary.scalar('lr', truncated_learning_rate) + + optimizer = tf.train.MomentumOptimizer(learning_rate=truncated_learning_rate, + momentum=params['momentum']) + + optimizer = tf_replicate_model_fn.TowerOptimizer(optimizer) + + # Batch norm requires update_ops to be added as a train_op dependency. + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + train_op = optimizer.minimize(loss, global_step) + else: + train_op = None + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics, + scaffold=tf.train.Scaffold(init_fn=train_helper.get_init_fn_for_scaffold_(params['checkpoint_path'], params['model_dir'], params['checkpoint_exclude_scopes'], params['model_scope'], params['checkpoint_model_scope'], params['ignore_missing_vars']))) + +def parse_comma_list(args): + return [float(s.strip()) for s in args.split(',')] + +def sub_loop(model_fn, model_scope, model_dir, run_config, train_epochs, epochs_per_eval, lr_decay_factors, decay_boundaries, checkpoint_path=None, checkpoint_exclude_scopes='', checkpoint_model_scope='', ignore_missing_vars=True): + steps_per_epoch = config.split_size[(model_scope if 'all' not in model_scope else '*')]['train'] // FLAGS.batch_size + + _replicate_model_fn = tf_replicate_model_fn.replicate_model_fn(model_fn, loss_reduction=tf.losses.Reduction.MEAN) + + fashionAI = tf.estimator.Estimator( + model_fn=_replicate_model_fn, model_dir=model_dir, config=run_config.replace(save_checkpoints_steps=2*steps_per_epoch), + params={ + 'checkpoint_path': checkpoint_path, + 'model_dir': model_dir, + 'checkpoint_exclude_scopes': checkpoint_exclude_scopes, + 'model_scope': model_scope, + 'checkpoint_model_scope': checkpoint_model_scope, + 'ignore_missing_vars': ignore_missing_vars, + 'train_image_size': FLAGS.train_image_size, + 'heatmap_size': FLAGS.heatmap_size, + 'data_format': FLAGS.data_format, + 'steps_per_epoch': steps_per_epoch, + 'use_ohkm': FLAGS.use_ohkm, + 'batch_size': FLAGS.batch_size, + 'weight_decay': FLAGS.weight_decay, + 'mse_weight': FLAGS.mse_weight, + 'momentum': FLAGS.momentum, + 'learning_rate': FLAGS.learning_rate, + 'end_learning_rate': FLAGS.end_learning_rate, + 'warmup_learning_rate': FLAGS.warmup_learning_rate, + 'warmup_steps': FLAGS.warmup_steps, + 'decay_boundaries': parse_comma_list(decay_boundaries), + 'lr_decay_factors': parse_comma_list(lr_decay_factors), + }) + + tf.gfile.MakeDirs(model_dir) + tf.logging.info('Starting to train model {}.'.format(model_scope)) + for _ in range(train_epochs // epochs_per_eval): + tensors_to_log = { + 'lr': 'learning_rate', + 'loss': 'total_loss', + 'mse': 'mse_loss', + 'ne': 'ne_mertric', + } + + logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps, formatter=lambda dicts: '{}:'.format(model_scope) + (', '.join(['%s=%.6f' % (k, v) for k, v in dicts.items()]))) + + tf.logging.info('Starting a training cycle.') + fashionAI.train(input_fn=lambda : input_pipeline(True, model_scope, epochs_per_eval), hooks=[logging_hook], max_steps=(steps_per_epoch*train_epochs)) + + tf.logging.info('Starting to evaluate.') + eval_results = fashionAI.evaluate(input_fn=lambda : input_pipeline(False, model_scope, 1)) + tf.logging.info(eval_results) + tf.logging.info('Finished model {}.'.format(model_scope)) + +def main(_): + # Using the Winograd non-fused algorithms provides a small performance boost. + os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' + + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction) + sess_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = False, intra_op_parallelism_threads = FLAGS.num_cpu_threads, inter_op_parallelism_threads = FLAGS.num_cpu_threads, gpu_options = gpu_options) + + # Set up a RunConfig to only save checkpoints once per training cycle. + run_config = tf.estimator.RunConfig().replace( + save_checkpoints_secs=None).replace( + save_checkpoints_steps=FLAGS.save_checkpoints_steps).replace( + save_summary_steps=FLAGS.save_summary_steps).replace( + keep_checkpoint_max=5).replace( + tf_random_seed=FLAGS.tf_random_seed).replace( + log_step_count_steps=FLAGS.log_every_n_steps).replace( + session_config=sess_config) + + num_gpus = validate_batch_size_for_multi_gpu(FLAGS.batch_size) + + full_model_dir = FLAGS.model_dir if FLAGS.run_on_cloud else FLAGS.model_dir + detail_params = { + 'blouse': { + 'model_dir' : os.path.join(full_model_dir, 'blouse'), + 'train_epochs': 30, + 'epochs_per_eval': 30, + 'lr_decay_factors': '1, 0.5, 0.1, 0.01', + 'decay_boundaries': '15, 20, 28', + 'model_scope': 'blouse', + 'checkpoint_path': os.path.join(FLAGS.data_dir, FLAGS.cloud_checkpoint_path) if FLAGS.run_on_cloud else FLAGS.checkpoint_path, + 'checkpoint_model_scope': '', + 'checkpoint_exclude_scopes': 'blouse/additional_layer', + 'ignore_missing_vars': True, + }, + 'dress': { + 'model_dir' : os.path.join(full_model_dir, 'dress'), + 'train_epochs': 30, + 'epochs_per_eval': 30, + 'lr_decay_factors': '1, 0.5, 0.1, 0.01', + 'decay_boundaries': '15, 20, 28', + 'model_scope': 'dress', + 'checkpoint_path': os.path.join(FLAGS.data_dir, FLAGS.cloud_checkpoint_path) if FLAGS.run_on_cloud else FLAGS.checkpoint_path, + 'checkpoint_model_scope': '', + 'checkpoint_exclude_scopes': 'dress/additional_layer', + 'ignore_missing_vars': True, + }, + 'outwear': { + 'model_dir' : os.path.join(full_model_dir, 'outwear'), + 'train_epochs': 30, + 'epochs_per_eval': 30, + 'lr_decay_factors': '1, 0.5, 0.1, 0.01', + 'decay_boundaries': '15, 20, 28', + 'model_scope': 'outwear', + 'checkpoint_path': os.path.join(FLAGS.data_dir, FLAGS.cloud_checkpoint_path) if FLAGS.run_on_cloud else FLAGS.checkpoint_path, + 'checkpoint_model_scope': '', + 'checkpoint_exclude_scopes': 'outwear/additional_layer', + 'ignore_missing_vars': True, + }, + 'skirt': { + 'model_dir' : os.path.join(full_model_dir, 'skirt'), + 'train_epochs': 30, + 'epochs_per_eval': 30, + 'lr_decay_factors': '1, 0.5, 0.1, 0.01', + 'decay_boundaries': '15, 20, 28', + 'model_scope': 'skirt', + 'checkpoint_path': os.path.join(FLAGS.data_dir, FLAGS.cloud_checkpoint_path) if FLAGS.run_on_cloud else FLAGS.checkpoint_path, + 'checkpoint_model_scope': '', + 'checkpoint_exclude_scopes': 'skirt/additional_layer', + 'ignore_missing_vars': True, + }, + 'trousers': { + 'model_dir' : os.path.join(full_model_dir, 'trousers'), + 'train_epochs': 30, + 'epochs_per_eval': 30, + 'lr_decay_factors': '1, 0.5, 0.1, 0.01', + 'decay_boundaries': '15, 20, 28', + 'model_scope': 'trousers', + 'checkpoint_path': os.path.join(FLAGS.data_dir, FLAGS.cloud_checkpoint_path) if FLAGS.run_on_cloud else FLAGS.checkpoint_path, + 'checkpoint_model_scope': '', + 'checkpoint_exclude_scopes': 'trousers/additional_layer', + 'ignore_missing_vars': True, + }, + } + model_to_train = [s.strip() for s in FLAGS.model_to_train.split(',')] + + for m in model_to_train: + sub_loop(keypoint_model_fn, m, detail_params[m]['model_dir'], run_config, detail_params[m]['train_epochs'], detail_params[m]['epochs_per_eval'], detail_params[m]['lr_decay_factors'], detail_params[m]['decay_boundaries'], detail_params[m]['checkpoint_path'], detail_params[m]['checkpoint_exclude_scopes'], detail_params[m]['checkpoint_model_scope'], detail_params[m]['ignore_missing_vars']) + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run()