From 17d86046be202f46f6e6604f0ca47652770315ab Mon Sep 17 00:00:00 2001 From: Ryan Mukherjee Date: Fri, 17 Nov 2017 17:20:51 -0500 Subject: [PATCH] updated baseline with improved performance, added initial reference to arXiv paper --- README.md | 22 +++- code/data_ml_functions/dataFunctions.py | 110 ++++++++++++----- code/data_ml_functions/mlFunctions.py | 151 ++++++++++++++---------- code/fmowBaseline.py | 122 ++++++++++--------- code/params.py | 11 +- 5 files changed, 258 insertions(+), 158 deletions(-) diff --git a/README.md b/README.md index 5c5f213..42454e7 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,18 @@ # fMoW: Functional Map of the World -This code was developed by [JHU/APL](http://www.jhuapl.edu). +This code was developed by [JHU/APL](https://jhuapl.edu). ## Dependencies The following libraries were used for training/testing the deep learning models: -Keras 2.0.5 +Keras 2.0.8 -Tensorflow 1.2.1 +Tensorflow 1.3.0 + +DenseNet from [titu1994/DenseNet](https://github.com/titu1994/DenseNet) -tqdm ## Dataset @@ -89,6 +90,19 @@ Once this is complete, the following arguments can be passed in to run different Our best performing model is the CNN-only approach, which sums predictions over each temporal view and then takes an argmax. However, we provide code for using an LSTM, which performs slightly worse, so that modifications can be made. +## References + +If you would like to cite this baseline, please reference: +``` +@article{fmow2017, + title={Functional Map of the World}, + author={Christie, Gordon and Fendley, Neil and Wilson, James and Mukherjee, Ryan}, + journal={arXiv:TBD}, + year={2017} +} +``` +**Note: This will be updated on Tuesday when published.** + ## License The license is Apache 2.0. See LICENSE. diff --git a/code/data_ml_functions/dataFunctions.py b/code/data_ml_functions/dataFunctions.py index fbd4e6c..dd60a02 100644 --- a/code/data_ml_functions/dataFunctions.py +++ b/code/data_ml_functions/dataFunctions.py @@ -32,8 +32,6 @@ from tqdm import tqdm import warnings -import code - def prepare_data(params): """ Saves sub images, converts metadata to feature vectors and saves in JSON files, @@ -61,7 +59,7 @@ def prepare_data(params): else: outDir = params.directories['test_data'] - print('Looping through sequences in: ' + currDir) + print('Queuing sequences in: ' + currDir) for root, dirs, files in tqdm(os.walk(os.path.join(params.directories['dataset'], currDir))): if len(files) > 0: slashes = [i for i,ltr in enumerate(root) if ltr == '/'] @@ -71,7 +69,7 @@ def prepare_data(params): task = partial(_process_file, file, slashes, root, isTrain, outDir, paramsDict) futures.append(executor.submit(task)) - print('Preprocessing all files...') + print('Wait for all preprocessing tasks to complete...') results = [] [results.extend(future.result()) for future in tqdm(futures)] allTrainFeatures = [np.array(r[0]) for r in results if r[0] is not None] @@ -84,6 +82,9 @@ def prepare_data(params): trainCount = len(trainingData) testData = [r[2] for r in results if r[2] is not None] + # Shutdown the executor and free resources + executor.shutdown() + metadataMean = metadataTrainSum / trainCount metadataMax = np.zeros(params.metadata_length) for currFeat in allTrainFeatures: @@ -161,13 +162,43 @@ def _process_file(file, slashes, root, isTrain, outDir, params): continue # train with context around box - widthBuffer = int((box[2] * 0.5) / 2.0) - heightBuffer = int((box[2] * 0.5) / 2.0) + + contextMultWidth = 0.15 + contextMultHeight = 0.15 + + wRatio = float(box[2]) / img.shape[0] + hRatio = float(box[3]) / img.shape[1] + + if wRatio < 0.5 and wRatio >= 0.4: + contextMultWidth = 0.2 + if wRatio < 0.4 and wRatio >= 0.3: + contextMultWidth = 0.3 + if wRatio < 0.3 and wRatio >= 0.2: + contextMultWidth = 0.5 + if wRatio < 0.2 and wRatio >= 0.1: + contextMultWidth = 1 + if wRatio < 0.1: + contextMultWidth = 2 + + if hRatio < 0.5 and hRatio >= 0.4: + contextMultHeight = 0.2 + if hRatio < 0.4 and hRatio >= 0.3: + contextMultHeight = 0.3 + if hRatio < 0.3 and hRatio >= 0.2: + contextMultHeight = 0.5 + if hRatio < 0.2 and hRatio >= 0.1: + contextMultHeight = 1 + if hRatio < 0.1: + contextMultHeight = 2 + + + widthBuffer = int((box[2] * contextMultWidth) / 2.0) + heightBuffer = int((box[3] * contextMultHeight) / 2.0) r1 = box[1] - heightBuffer - r2 = r1 + box[3] + heightBuffer + r2 = box[1] + box[3] + heightBuffer c1 = box[0] - widthBuffer - c2 = c1 + box[2] + widthBuffer + c2 = box[0] + box[2] + widthBuffer if r1 < 0: r1 = 0 @@ -186,10 +217,11 @@ def _process_file(file, slashes, root, isTrain, outDir, params): subImg = subImg.resize(params['target_img_size']) subImg.save(imgPath) - features = json_to_feature_vector(params['metadata_length'], jsonData) + features = json_to_feature_vector(params, jsonData, bb) features = features.tolist() json.dump(features, open(featuresPath, 'w')) + if isTrain: allResults.append((features, {"features_path": featuresPath, "img_path": imgPath, "category": params['category_names'].index(category)}, None)) @@ -198,14 +230,9 @@ def _process_file(file, slashes, root, isTrain, outDir, params): return allResults -def json_to_feature_vector(metadata_length, jsonData): - """ - Generates feature vector for CNN fusion from metadata - :param metadata_length: total number of metadata parameters being used - :param jsonData: metadata from a JSON file - :return features: numpy feature vector representation of the metadata - """ - features = np.zeros(metadata_length, dtype=float) + +def json_to_feature_vector(params, jsonData, bb): + features = np.zeros(params['metadata_length'], dtype=float) features[0] = float(jsonData['gsd']) x,y = utm_to_xy(jsonData['utm']) features[1] = x @@ -222,17 +249,44 @@ def json_to_feature_vector(metadata_length, jsonData): else: features[8] = 1.0 features[9] = float(jsonData['pan_resolution_dbl']) - features[10] = float(jsonData['multi_resolution_dbl']) - features[11] = float(jsonData['target_azimuth_dbl']) / 360.0 - features[12] = float(jsonData['sun_azimuth_dbl']) / 360.0 - features[13] = float(jsonData['sun_elevation_dbl']) / 90.0 - features[14] = float(jsonData['off_nadir_angle_dbl']) / 90.0 - features[15] = float(jsonData['bounding_boxes'][0]['box'][2]) - features[16] = float(jsonData['bounding_boxes'][0]['box'][3]) - features[17] = float(jsonData['img_width']) - features[18] = float(jsonData['img_height']) - features[19] = float(len(jsonData['approximate_wavelengths'])) - features[20] = float(date.weekday()) + features[10] = float(jsonData['pan_resolution_start_dbl']) + features[11] = float(jsonData['pan_resolution_end_dbl']) + features[12] = float(jsonData['pan_resolution_min_dbl']) + features[13] = float(jsonData['pan_resolution_max_dbl']) + features[14] = float(jsonData['multi_resolution_dbl']) + features[15] = float(jsonData['multi_resolution_min_dbl']) + features[16] = float(jsonData['multi_resolution_max_dbl']) + features[17] = float(jsonData['multi_resolution_start_dbl']) + features[18] = float(jsonData['multi_resolution_end_dbl']) + features[19] = float(jsonData['target_azimuth_dbl']) / 360.0 + features[20] = float(jsonData['target_azimuth_min_dbl']) / 360.0 + features[21] = float(jsonData['target_azimuth_max_dbl']) / 360.0 + features[22] = float(jsonData['target_azimuth_start_dbl']) / 360.0 + features[23] = float(jsonData['target_azimuth_end_dbl']) / 360.0 + features[24] = float(jsonData['sun_azimuth_dbl']) / 360.0 + features[25] = float(jsonData['sun_azimuth_min_dbl']) / 360.0 + features[26] = float(jsonData['sun_azimuth_max_dbl']) / 360.0 + features[27] = float(jsonData['sun_elevation_min_dbl']) / 90.0 + features[28] = float(jsonData['sun_elevation_dbl']) / 90.0 + features[29] = float(jsonData['sun_elevation_max_dbl']) / 90.0 + features[30] = float(jsonData['off_nadir_angle_dbl']) / 90.0 + features[31] = float(jsonData['off_nadir_angle_min_dbl']) / 90.0 + features[32] = float(jsonData['off_nadir_angle_max_dbl']) / 90.0 + features[33] = float(jsonData['off_nadir_angle_start_dbl']) / 90.0 + features[34] = float(jsonData['off_nadir_angle_end_dbl']) / 90.0 + features[35] = float(bb['box'][2]) + features[36] = float(bb['box'][3]) + features[37] = float(jsonData['img_width']) + features[38] = float(jsonData['img_height']) + features[39] = float(date.weekday()) + features[40] = min([features[35], features[36]]) / max([features[37], features[38]]) + features[41] = features[35] / features[37] + features[42] = features[36] / features[38] + features[43] = date.second + if len(jsonData['bounding_boxes']) == 1: + features[44] = 1.0 + else: + features[44] = 0.0 return features diff --git a/code/data_ml_functions/mlFunctions.py b/code/data_ml_functions/mlFunctions.py index 90a215f..85cfb0d 100644 --- a/code/data_ml_functions/mlFunctions.py +++ b/code/data_ml_functions/mlFunctions.py @@ -20,7 +20,8 @@ __version__ = 0.1 import json -from keras.applications import VGG16,imagenet_utils +from keras import backend as K +from keras.applications import imagenet_utils from keras.layers import Dense,Input,merge,Flatten,Dropout,LSTM from keras.models import Sequential,Model from keras.preprocessing import image @@ -28,12 +29,12 @@ import numpy as np +from .DenseNet import densenet from data_ml_functions.dataFunctions import get_batch_inds -from glob import glob + from concurrent.futures import ProcessPoolExecutor from functools import partial - def get_cnn_model(params): """ Load base CNN model and add metadata fusion layers if 'use_metadata' is set in params.py @@ -42,14 +43,13 @@ def get_cnn_model(params): """ input_tensor = Input(shape=(params.target_img_size[0],params.target_img_size[1],params.num_channels)) - baseModel = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor) + baseModel = densenet.DenseNetImageNet161(input_shape=(params.target_img_size[0], params.target_img_size[1], params.num_channels), include_top=False, input_tensor=input_tensor) - modelStruct = baseModel.output - modelStruct = Flatten(input_shape=baseModel.output_shape[1:])(modelStruct) + modelStruct = baseModel.layers[-1].output if params.use_metadata: auxiliary_input = Input(shape=(params.metadata_length,), name='aux_input') - modelStruct = merge([modelStruct,auxiliary_input],mode='concat') + modelStruct = merge([modelStruct,auxiliary_input],'concat') modelStruct = Dense(params.cnn_last_layer_length, activation='relu', name='fc1')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) @@ -75,28 +75,30 @@ def get_lstm_model(params, codesStats): :return model: LSTM model """ - model = Sequential() if params.use_metadata: - layerLength = params.cnn_last_layer_length + params.metadata_length + layerLength = params.cnn_lstm_layer_length + params.metadata_length else: - layerLength = params.cnn_last_layer_length - model.add(LSTM(layerLength, return_sequences=True, input_shape=(codesStats['max_temporal'], layerLength), dropout=0.5)) + layerLength = params.cnn_lstm_layer_length + + model = Sequential() + model.add(LSTM(4096, return_sequences=True, input_shape=(codesStats['max_temporal'], layerLength), dropout=0.5)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(params.num_labels, activation='softmax')) return model - + + def img_metadata_generator(params, data, metadataStats): """ - Custom generator that yields images or (image,metadata) batches and their - category labels (categorical format). + Custom generator that yields images or (image,metadata) batches and their + category labels (categorical format). :param params: global parameters, used to find location of the dataset and json file - :param data: list of objects containing the category labels and paths to images and metadata features + :param data: list of objects containing the category labels and paths to images and metadata features :param metadataStats: metadata stats used to normalize metadata features - :yield (imgdata,labels) or (imgdata,metadata,labels): image data, metadata (if params set to use), and labels (categorical form) + :yield (imgdata,labels) or (imgdata,metadata,labels): image data, metadata (if params set to use), and labels (categorical form) """ - + N = len(data) idx = np.random.permutation(N) @@ -108,26 +110,27 @@ def img_metadata_generator(params, data, metadataStats): while True: for inds in batchInds: batchData = [data[ind] for ind in inds] - imgdata,metadata,labels = load_cnn_batch(params, batchData, metadataStats, executor) + imgdata, metadata, labels = load_cnn_batch(params, batchData, metadataStats, executor) if params.use_metadata: - yield([imgdata,metadata],labels) + yield ([imgdata, metadata], labels) else: - yield(imgdata,labels) - + yield (imgdata, labels) + def load_cnn_batch(params, batchData, metadataStats, executor): """ Load batch of images and metadata and preprocess the data before returning. :param params: global parameters, used to find location of the dataset and json file - :param batchData: list of objects in the current batch containing the category labels and paths to CNN codes and images + :param batchData: list of objects in the current batch containing the category labels and paths to CNN codes and images :param metadataStats: metadata stats used to normalize metadata features :return imgdata,metadata,labels: numpy arrays containing the image data, metadata, and labels (categorical form) """ + futures = [] - imgdata = np.zeros((params.batch_size_cnn,params.target_img_size[0],params.target_img_size[1],params.num_channels)) - metadata = np.zeros((params.batch_size_cnn,params.metadata_length)) + imgdata = np.zeros((params.batch_size_cnn, params.target_img_size[0], + params.target_img_size[1], params.num_channels)) + metadata = np.zeros((params.batch_size_cnn, params.metadata_length)) labels = np.zeros(params.batch_size_cnn) - inputs = [] - for i in range(0,len(batchData)): + for i in range(0, len(batchData)): currInput = {} currInput['data'] = batchData[i] currInput['metadataStats'] = metadataStats @@ -136,18 +139,17 @@ def load_cnn_batch(params, batchData, metadataStats, executor): results = [future.result() for future in futures] - for i,result in enumerate(results): - metadata[i,:] = result['metadata'] - imgdata[i,:,:,:] = result['img'] + for i, result in enumerate(results): + metadata[i, :] = result['metadata'] + imgdata[i, :, :, :] = result['img'] labels[i] = result['labels'] - + imgdata = imagenet_utils.preprocess_input(imgdata) imgdata = imgdata / 255.0 - + labels = to_categorical(labels, params.num_labels) - - return imgdata,metadata,labels + return imgdata, metadata, labels def _load_batch_helper(inputDict): """ @@ -155,6 +157,7 @@ def _load_batch_helper(inputDict): :param inputDict: dict containing the data and metadataStats that will be used to load imagery :return currOutput: dict with image data, metadata, and the associated label """ + data = inputDict['data'] metadataStats = inputDict['metadataStats'] metadata = np.divide(json.load(open(data['features_path'])) - np.array(metadataStats['metadata_mean']), metadataStats['metadata_max']) @@ -169,7 +172,7 @@ def _load_batch_helper(inputDict): def codes_metadata_generator(params, data, metadataStats, codesStats): """ - Custom generator that yields a vector containign the 4096-d CNN codes output by VGG16 and metadata features (if params set to use). + Custom generator that yields a vector containign the 4096-d CNN codes output by ResNet50 and metadata features (if params set to use). :param params: global parameters, used to find location of the dataset and json file :param data: list of objects containing the category labels and paths to CNN codes and images :param metadataStats: metadata stats used to normalize metadata features @@ -182,14 +185,16 @@ def codes_metadata_generator(params, data, metadataStats, codesStats): batchInds = get_batch_inds(params.batch_size_lstm, idx, N) trainKeys = list(data.keys()) + + executor = ProcessPoolExecutor(max_workers=params.num_workers) while True: for inds in batchInds: batchKeys = [trainKeys[ind] for ind in inds] - codesMetadata,labels = load_lstm_batch(params, data, batchKeys, metadataStats, codesStats) + codesMetadata,labels = load_lstm_batch(params, data, batchKeys, metadataStats, codesStats, executor) yield(codesMetadata,labels) -def load_lstm_batch(params, data, batchKeys, metadataStats, codesStats): +def load_lstm_batch(params, data, batchKeys, metadataStats, codesStats, executor): """ Load batch of CNN codes + metadata and preprocess the data before returning. :param params: global parameters, used to find location of the dataset and json file @@ -199,37 +204,59 @@ def load_lstm_batch(params, data, batchKeys, metadataStats, codesStats): :param codesStats: CNN codes stats used to normalize CNN codes and define the maximum number of temporal views :return codesMetadata,labels: 4096-d CNN codes + metadata (if set) and labels (categorical form) """ - + if params.use_metadata: - codesMetadata = np.zeros((params.batch_size_lstm, codesStats['max_temporal'], params.cnn_last_layer_length+params.metadata_length)) + codesMetadata = np.zeros((params.batch_size_lstm, codesStats['max_temporal'], params.cnn_lstm_layer_length+params.metadata_length)) else: - codesMetadata = np.zeros((params.batch_size_lstm, codesStats['max_temporal'], params.cnn_last_layer_length)) - + codesMetadata = np.zeros((params.batch_size_lstm, codesStats['max_temporal'], params.cnn_lstm_layer_length)) + labels = np.zeros(params.batch_size_lstm) + + futures = [] for i,key in enumerate(batchKeys): - currData = data[key] - labels[i] = currData['category'] - if params.use_metadata: - inds = [] - for file in currData['metadata_paths']: - underscores = [ind for ind,ltr in enumerate(file) if ltr == '_'] - inds.append(int(file[underscores[-3]+1:underscores[-2]])) - inds = np.argsort(np.array(inds)).tolist() - else: - inds = range(len(currData['cnn_codes_paths'])) - - for codesIndex in inds: - cnnCodes = json.load(open(currData['cnn_codes_paths'][codesIndex])) - np.array(codesStats['codes_mean']) - if params.use_metadata: - metadata = np.divide(json.load(open(currData['metadata_paths'][codesIndex])) - np.array(metadataStats['metadata_mean']), metadataStats['metadata_max']) - codesMetadata[i,codesIndex,0:params.metadata_length] = metadata - codesMetadata[i,codesIndex,params.metadata_length:] = cnnCodes - else: - codesMetadata[i,codesIndex,:] = cnnCodes - - labels = to_categorical(labels, params.num_labels) + currInput = {} + currInput['currData'] = data[key] + currInput['lastLayerLength'] = codesMetadata.shape[2] + currInput['codesStats'] = codesStats + currInput['use_metadata'] = params.use_metadata + currInput['metadataStats'] = metadataStats + labels[i] = data[key]['category'] + + task = partial(_load_lstm_batch_helper, currInput) + futures.append(executor.submit(task)) + + results = [future.result() for future in futures] + + for i,result in enumerate(results): + codesMetadata[i,:,:] = result['codesMetadata'] + labels = to_categorical(labels, params.num_labels) return codesMetadata,labels - +def _load_lstm_batch_helper(inputDict): + + currData = inputDict['currData'] + codesStats = inputDict['codesStats'] + currOutput = {} + + codesMetadata = np.zeros((codesStats['max_temporal'], inputDict['lastLayerLength'])) + + timestamps = [] + for codesIndex in range(len(currData['cnn_codes_paths'])): + cnnCodes = json.load(open(currData['cnn_codes_paths'][codesIndex])) + # compute a timestamp for temporally sorting + timestamp = (cnnCodes[4]-1970)*525600 + cnnCodes[5]*12*43800 + cnnCodes[6]*31*1440 + cnnCodes[7]*60 + timestamps.append(timestamp) + + cnnCodes = np.divide(cnnCodes - np.array(codesStats['codes_mean']), np.array(codesStats['codes_max'])) + codesMetadata[codesIndex,:] = cnnCodes + + sortedInds = sorted(range(len(timestamps)), key=lambda k:timestamps[k]) + codesMetadata[range(len(sortedInds)),:] = codesMetadata[sortedInds,:] + + currOutput['codesMetadata'] = codesMetadata + return currOutput + + + diff --git a/code/fmowBaseline.py b/code/fmowBaseline.py index 78db116..bf19712 100644 --- a/code/fmowBaseline.py +++ b/code/fmowBaseline.py @@ -23,16 +23,17 @@ from keras.callbacks import ModelCheckpoint from keras.preprocessing import image from keras.models import Model, load_model -from keras.applications import VGG16,imagenet_utils +from keras.applications import imagenet_utils from data_ml_functions.mlFunctions import get_cnn_model,img_metadata_generator,get_lstm_model,codes_metadata_generator from data_ml_functions.dataFunctions import prepare_data,calculate_class_weights import numpy as np import os -from data_ml_functions.mlFunctions import load_cnn_batch -from data_ml_functions.dataFunctions import get_batch_inds #from data_ml_functions.multi_gpu import make_parallel +from concurrent.futures import ProcessPoolExecutor +from tqdm import tqdm + import time class FMOWBaseline: @@ -71,11 +72,13 @@ def __init__(self, params=None, argv=None): self.params.files['lstm_model'] = os.path.join(self.params.directories['lstm_models'], 'lstm_model_with_metadata.model') self.params.files['cnn_codes_stats'] = os.path.join(self.params.directories['working'], 'cnn_codes_stats_with_metadata.json') self.params.files['lstm_training_struct'] = os.path.join(self.params.directories['working'], 'lstm_training_struct_with_metadata.json') + self.params.files['lstm_test_struct'] = os.path.join(self.params.directories['working'], 'lstm_test_struct_with_metadata.json') else: self.params.files['cnn_model'] = os.path.join(self.params.directories['cnn_models'], 'cnn_model_no_metadata.model') self.params.files['lstm_model'] = os.path.join(self.params.directories['lstm_models'], 'lstm_model_no_metadata.model') self.params.files['cnn_codes_stats'] = os.path.join(self.params.directories['working'], 'cnn_codes_stats_no_metadata.json') self.params.files['lstm_training_struct'] = os.path.join(self.params.directories['working'], 'lstm_training_struct_no_metadata.json') + self.params.files['lstm_test_struct'] = os.path.join(self.params.directories['working'], 'lstm_test_struct_no_metadata.json') def train_cnn(self): """ @@ -85,26 +88,23 @@ def train_cnn(self): """ trainData = json.load(open(self.params.files['training_struct'])) - metadataStats = json.load(open(self.params.files['dataset_stats'])) model = get_cnn_model(self.params) #model = make_parallel(model, 4) model.compile(optimizer=Adam(lr=self.params.cnn_adam_learning_rate), loss='categorical_crossentropy', metrics=['accuracy']) - -# classWeights = np.array(json.load(open(self.params.files['class_weight']))) train_datagen = img_metadata_generator(self.params, trainData, metadataStats) - + print("training") filePath = os.path.join(self.params.directories['cnn_checkpoint_weights'], 'weights.{epoch:02d}.hdf5') - checkpoint = ModelCheckpoint(filepath=filePath, monitor='loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=5) + checkpoint = ModelCheckpoint(filepath=filePath, monitor='loss', verbose=0, save_best_only=False, + save_weights_only=False, mode='auto', period=5) callbacks_list = [checkpoint] model.fit_generator(train_datagen, - steps_per_epoch=(len(trainData) / self.params.batch_size_cnn + 1), - epochs=self.params.cnn_epochs, callbacks=callbacks_list) -# epochs=self.params.cnn_epochs, class_weight=classWeights, callbacks=callbacks_list) + steps_per_epoch=(len(trainData) / self.params.batch_size_cnn + 1), + epochs=self.params.cnn_epochs, callbacks=callbacks_list) model.save(self.params.files['cnn_model']) @@ -114,6 +114,7 @@ def train_lstm(self): :param: :return: """ + codesTrainData = json.load(open(self.params.files['lstm_training_struct'])) codesStats = json.load(open(self.params.files['cnn_codes_stats'])) metadataStats = json.load(open(self.params.files['dataset_stats'])) @@ -122,8 +123,6 @@ def train_lstm(self): #model = make_parallel(model, 4) model.compile(optimizer=Adam(lr=self.params.lstm_adam_learning_rate), loss='categorical_crossentropy', metrics=['accuracy']) -# classWeights = np.array(json.load(open(self.params.files['class_weight']))) - train_datagen = codes_metadata_generator(self.params, codesTrainData, metadataStats, codesStats) print("training") @@ -132,9 +131,9 @@ def train_lstm(self): callbacks_list = [checkpoint] model.fit_generator(train_datagen, - steps_per_epoch=(len(codesTrainData) / self.params.batch_size_lstm + 1), - epochs=self.params.lstm_epochs, callbacks=callbacks_list) -# epochs=self.params.lstm_epochs, class_weight=classWeights, callbacks=callbacks_list) + steps_per_epoch=(len(codesTrainData) / self.params.batch_size_lstm + 1), + epochs=self.params.lstm_epochs, callbacks=callbacks_list, + max_queue_size=20) model.save(self.params.files['lstm_model']) @@ -149,12 +148,10 @@ def generate_cnn_codes(self): metadataStats = json.load(open(self.params.files['dataset_stats'])) trainData = json.load(open(self.params.files['training_struct'])) testData = json.load(open(self.params.files['test_struct'])) - model = load_model(self.params.files['cnn_model']) -# model = get_cnn_model(self.params) -# model.load_weights('../data/working/cnn_checkpoint_weights/weights.14.hdf5') + #cnnModel = load_model(self.params.files['cnn_model']) + cnnModel = get_cnn_model(self.params) + featuresModel = Model(cnnModel.inputs, cnnModel.layers[-6].output) - featuresModel = Model(model.input, model.layers[-3].output) - allTrainCodes = [] featureDirs = ['train', 'test'] @@ -177,7 +174,7 @@ def generate_cnn_codes(self): N = len(data) initBatch = True - for i,currData in enumerate(data): + for i,currData in enumerate(tqdm(data)): if initBatch: if N-i < self.params.batch_size_eval: batchSize = 1 @@ -209,8 +206,6 @@ def generate_cnn_codes(self): batchIndex += 1 - print(i) - if batchIndex == batchSize: imgdata = imagenet_utils.preprocess_input(imgdata) imgdata = imgdata / 255.0 @@ -219,6 +214,7 @@ def generate_cnn_codes(self): cnnCodes = featuresModel.predict([imgdata,metadataFeatures], batch_size=batchSize) else: cnnCodes = featuresModel.predict(imgdata, batch_size=batchSize) + for codeIndex,currCodes in enumerate(cnnCodes): currBasePath = tmpBasePaths[codeIndex] outFile = os.path.join(outDir, '%07d.json' % index) @@ -242,7 +238,6 @@ def generate_cnn_codes(self): codesTrainData = codesData else: codesTestData = codesData - N = len(allTrainCodes[0]) sumCodes = np.zeros(N) @@ -251,14 +246,15 @@ def generate_cnn_codes(self): avgCodes = sumCodes / len(allTrainCodes) maxCodes = np.zeros(N) for currCodes in allTrainCodes: - maxCodes = np.maximum(maxCodes, currCodes-avgCodes) - + maxCodes = np.maximum(maxCodes, np.abs(currCodes-avgCodes)) + maxCodes[maxCodes == 0] = 1 + maxTemporal = 0 for key in codesTrainData.keys(): currTemporal = len(codesTrainData[key]['cnn_codes_paths']) if currTemporal > maxTemporal: maxTemporal = currTemporal - + codesStats = {} codesStats['codes_mean'] = avgCodes.tolist() codesStats['codes_max'] = maxCodes.tolist() @@ -266,45 +262,48 @@ def generate_cnn_codes(self): json.dump(codesTrainData, open(self.params.files['lstm_training_struct'], 'w')) json.dump(codesStats, open(self.params.files['cnn_codes_stats'], 'w')) - - + json.dump(codesTestData, open(self.params.files['lstm_test_struct'], 'w')) + def test_models(self): + + codesTestData = json.load(open(self.params.files['lstm_test_struct'])) metadataStats = json.load(open(self.params.files['dataset_stats'])) metadataMean = np.array(metadataStats['metadata_mean']) metadataMax = np.array(metadataStats['metadata_max']) - cnnModel = load_model(self.params.files['cnn_model']) - #cnnModel = get_cnn_model(self.params) - #cnnModel = make_parallel(cnnModel, 4) - #cnnModel.load_weights('../data/working/cnn_checkpoint_weights/weights.14.hdf5') - #cnnModel = cnnModel.layers[-2] +# cnnModel = load_model(self.params.files['cnn_model']) + cnnModel = get_cnn_model(self.params) if self.params.test_lstm: codesStats = json.load(open(self.params.files['cnn_codes_stats'])) - featuresModel = Model(cnnModel.input, cnnModel.layers[-3].output) - lstmModel = load_model(self.params.files['lstm_model']) -# lstmModel = get_lstm_model(self.params, codesStats) -# lstmModel.load_weights('../data/working/lstm_checkpoint_weights/weights.14.hdf5') - + #lstmModel = load_model(self.params.files['lstm_model']) + lstmModel = get_lstm_model(self.params, codesStats) index = 0 - timestr = time.strftime("%Y%m%d-%H%M%S") if self.params.test_cnn: fidCNN = open(os.path.join(self.params.directories['predictions'], 'predictions-cnn-%s.txt' % timestr), 'w') if self.params.test_lstm: fidLSTM = open(os.path.join(self.params.directories['predictions'], 'predictions-lstm-%s.txt' % timestr), 'w') + + def walkdir(folder): + for root, dirs, files in os.walk(folder): + if len(files) > 0: + yield (root, dirs, files) - for root, dirs, files in os.walk(self.params.directories['test_data']): + num_sequences = 0 + for _ in walkdir(self.params.directories['test_data']): + num_sequences += 1 + + for root, dirs, files in tqdm(walkdir(self.params.directories['test_data']), total=num_sequences): if len(files) > 0: imgPaths = [] metadataPaths = [] slashes = [i for i,ltr in enumerate(root) if ltr == '/'] bbID = int(root[slashes[-1]+1:]) - - + for file in files: if file.endswith('.jpg'): imgPaths.append(os.path.join(root,file)) @@ -320,7 +319,10 @@ def test_models(self): currBatchSize = len(inds) imgdata = np.zeros((currBatchSize, self.params.target_img_size[0], self.params.target_img_size[1], self.params.num_channels)) metadataFeatures = np.zeros((currBatchSize, self.params.metadata_length)) - + + codesIndex = 0 + codesPaths = codesTestData[root[24:]] + codesFeatures = [] for ind in inds: img = image.load_img(imgPaths[ind]) img = image.img_to_array(img) @@ -331,6 +333,9 @@ def test_models(self): features = np.divide(features - metadataMean, metadataMax) metadataFeatures[ind,:] = features + codesFeatures.append(json.load(open(codesPaths['cnn_codes_paths'][codesIndex]))) + codesIndex += 1 + imgdata = imagenet_utils.preprocess_input(imgdata) imgdata = imgdata / 255.0 @@ -341,22 +346,22 @@ def test_models(self): predictionsCNN = np.sum(cnnModel.predict(imgdata, batch_size=currBatchSize), axis=0) if self.params.test_lstm: - if self.params.use_metadata: - codesMetadata = np.zeros((1, codesStats['max_temporal'], self.params.cnn_last_layer_length+self.params.metadata_length)) - currFeatures = featuresModel.predict([imgdata, metadataFeatures], batch_size=currBatchSize) + codesMetadata = np.zeros((1, codesStats['max_temporal'], self.params.cnn_seq2seq_layer_length+self.params.metadata_length)) else: - codesMetadata = np.zeros((1, codesStats['max_temporal'], self.params.cnn_last_layer_length)) - currFeatures = featuresModel.predict(imgdata, batch_size=currBatchSize) - + codesMetadata = np.zeros((1, codesStats['max_temporal'], self.params.cnn_seq2seq_layer_length)) + + timestamps = [] for codesIndex in range(currBatchSize): - metadata = metadataFeatures[codesIndex,:] - cnnCodes = currFeatures[codesIndex,:] - if self.params.use_metadata: - codesMetadata[0,codesIndex,0:self.params.metadata_length] = metadata - codesMetadata[0,codesIndex,self.params.metadata_length:] = cnnCodes - else: - codesMetadata[0,codesIndex,:] = cnnCodes + cnnCodes = codesFeatures[codesIndex] + timestamp = (cnnCodes[4]-1970)*525600 + cnnCodes[5]*12*43800 + cnnCodes[6]*31*1440 + cnnCodes[7]*60 + timestamps.append(timestamp) + cnnCodes = np.divide(cnnCodes - np.array(codesStats['codes_mean']), np.array(codesStats['codes_max'])) + codesMetadata[0,codesIndex,:] = cnnCodes + + sortedInds = sorted(range(len(timestamps)), key=lambda k:timestamps[k]) + codesMetadata[0,range(len(sortedInds)),:] = codesMetadata[0,sortedInds,:] + predictionsLSTM = lstmModel.predict(codesMetadata, batch_size=1) if len(files) > 0: @@ -369,7 +374,6 @@ def test_models(self): oursLSTMStr = self.params.category_names[predLSTM] fidLSTM.write('%d,%s\n' % (bbID,oursLSTMStr)) index += 1 - print(index) if self.params.test_cnn: fidCNN.close() diff --git a/code/params.py b/code/params.py index e59be5a..21b2ca7 100644 --- a/code/params.py +++ b/code/params.py @@ -25,12 +25,13 @@ use_metadata = True -batch_size_cnn = 64 -batch_size_lstm = 64 +batch_size_cnn = 128 +batch_size_lstm = 512 batch_size_eval = 128 -metadata_length = 21 +metadata_length = 45 num_channels = 3 cnn_last_layer_length = 4096 +cnn_lstm_layer_length = 2208 target_img_size = (224,224) @@ -45,10 +46,10 @@ #LEARNING PARAMS cnn_adam_learning_rate = 1e-4 cnn_adam_loss = 'categorical_crossentropy' -cnn_epochs = 15 +cnn_epochs = 50 lstm_adam_learning_rate = 1e-4 -lstm_epochs = 5 +lstm_epochs = 100 lstm_loss = 'categorical_crossentropy' #DIRECTORIES AND FILES