Skip to content

Commit

Permalink
Sync internal code changes of tflite/tfhub.
Browse files Browse the repository at this point in the history
Mostly contributed by Yiqi Li (yuqili@google.com)
  • Loading branch information
mingxingtan committed Dec 23, 2020
1 parent 890eadd commit e419394
Show file tree
Hide file tree
Showing 19 changed files with 587 additions and 228 deletions.
14 changes: 7 additions & 7 deletions efficientdet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ You can run inference for a video and show the results online:
// Run eval.
!python main.py --mode=eval \
--model_name=${MODEL} --model_dir=${CKPT_PATH} \
--validation_file_pattern=tfrecord/val* \
--val_file_pattern=tfrecord/val* \
--val_json_file=annotations/instances_val2017.json

You can also run eval on test-dev set with the following command:
Expand All @@ -259,7 +259,7 @@ You can also run eval on test-dev set with the following command:
# Also, test-dev has 20288 images rather than val 5000 images.
!python main.py --mode=eval \
--model_name=${MODEL} --model_dir=${CKPT_PATH} \
--validation_file_pattern=tfrecord/testdev* \
--val_file_pattern=tfrecord/testdev* \
--testdev_dir='testdev_output' --eval_samples=20288
# Now you can submit testdev_output/detections_test-dev2017_test_results.json to
# coco server: https://competitions.codalab.org/competitions/20794#participate
Expand Down Expand Up @@ -288,8 +288,8 @@ Create a config file for the PASCAL VOC dataset called voc_config.yaml and put t
Finetune needs to use --ckpt rather than --backbone_ckpt.

!python main.py --mode=train_and_eval \
--training_file_pattern=tfrecord/pascal*.tfrecord \
--validation_file_pattern=tfrecord/pascal*.tfrecord \
--train_file_pattern=tfrecord/pascal*.tfrecord \
--val_file_pattern=tfrecord/pascal*.tfrecord \
--model_name=efficientdet-d0 \
--model_dir=/tmp/efficientdet-d0-finetune \
--ckpt=efficientdet-d0 \
Expand Down Expand Up @@ -326,8 +326,8 @@ Download efficientdet coco checkpoint.
Finetune needs to use --ckpt rather than --backbone_ckpt.

python main.py --mode=train \
--training_file_pattern=tfrecord/pascal*.tfrecord \
--validation_file_pattern=tfrecord/pascal*.tfrecord \
--train_file_pattern=tfrecord/pascal*.tfrecord \
--val_file_pattern=tfrecord/pascal*.tfrecord \
--model_name=efficientdet-d0 \
--model_dir=/tmp/efficientdet-d0-finetune \
--ckpt=efficientdet-d0 \
Expand Down Expand Up @@ -358,7 +358,7 @@ To train this model on Cloud TPU, you will need:
Then train the model:

!export PYTHONPATH="$PYTHONPATH:/path/to/models"
!python main.py --tpu=TPU_NAME --training_file_pattern=DATA_DIR/*.tfrecord --model_dir=MODEL_DIR --strategy=tpu
!python main.py --tpu=TPU_NAME --train_file_pattern=DATA_DIR/*.tfrecord --model_dir=MODEL_DIR --strategy=tpu

# TPU_NAME is the name of the TPU node, the same name that appears when you run gcloud compute tpus list, or ctpu ls.
# MODEL_DIR is a GCS location (a URL starting with gs:// where both the GCE VM and the associated Cloud TPU have write access.
Expand Down
52 changes: 4 additions & 48 deletions efficientdet/dataloader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,63 +13,19 @@
# limitations under the License.
# ==============================================================================
"""Data loader and processing test cases."""
import os
import tempfile

import tensorflow as tf

import dataloader
import hparams_config
from dataset import tfrecord_util
from brain_automl.efficientdet import test_util

from keras import anchors
from object_detection import tf_example_decoder


class DataloaderTest(tf.test.TestCase):

def _make_fake_tfrecord(self):
tfrecord_path = os.path.join(tempfile.mkdtemp(), 'test.tfrecords')
writer = tf.io.TFRecordWriter(tfrecord_path)
encoded_jpg = tf.io.encode_jpeg(tf.ones([512, 512, 3], dtype=tf.uint8))
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/height':
tfrecord_util.int64_feature(512),
'image/width':
tfrecord_util.int64_feature(512),
'image/filename':
tfrecord_util.bytes_feature('test_file_name.jpg'.encode(
'utf8')),
'image/source_id':
tfrecord_util.bytes_feature('123456'.encode('utf8')),
'image/key/sha256':
tfrecord_util.bytes_feature('qwdqwfw12345'.encode('utf8')),
'image/encoded':
tfrecord_util.bytes_feature(encoded_jpg.numpy()),
'image/format':
tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
tfrecord_util.float_list_feature([0.1]),
'image/object/bbox/xmax':
tfrecord_util.float_list_feature([0.1]),
'image/object/bbox/ymin':
tfrecord_util.float_list_feature([0.2]),
'image/object/bbox/ymax':
tfrecord_util.float_list_feature([0.2]),
'image/object/class/text':
tfrecord_util.bytes_list_feature(['test'.encode('utf8')]),
'image/object/class/label':
tfrecord_util.int64_list_feature([1]),
'image/object/difficult':
tfrecord_util.int64_list_feature([]),
'image/object/truncated':
tfrecord_util.int64_list_feature([]),
'image/object/view':
tfrecord_util.bytes_list_feature([]),
}))
writer.write(example.SerializeToString())
return tfrecord_path

def test_parser(self):
tf.random.set_seed(111111)
params = hparams_config.get_detection_config('efficientdet-d0').as_dict()
Expand All @@ -81,7 +37,7 @@ def test_parser(self):
anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes'])
example_decoder = tf_example_decoder.TfExampleDecoder(
regenerate_source_id=params['regenerate_source_id'])
tfrecord_path = self._make_fake_tfrecord()
tfrecord_path = test_util.make_fake_tfrecord(self.get_temp_dir())
dataset = tf.data.TFRecordDataset([tfrecord_path])
value = next(iter(dataset))
reader = dataloader.InputReader(tfrecord_path, True)
Expand Down
45 changes: 24 additions & 21 deletions efficientdet/dataset/create_pascal_tfrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,6 @@

from dataset import tfrecord_util

flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord and json.')
flags.DEFINE_string('label_map_json_path', None,
'Path to label map json file with a dictionary.')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
flags.DEFINE_integer('num_shards', 100, 'Number of shards for output file.')
flags.DEFINE_integer('num_images', None, 'Max number of imags to process.')
FLAGS = flags.FLAGS

SETS = ['train', 'val', 'trainval', 'test']
Expand Down Expand Up @@ -79,6 +66,24 @@
GLOBAL_ANN_ID = 0 # global annotation id.


def define_flags():
"""Define the flags."""
flags.DEFINE_string('data_dir', '',
'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord and json.')
flags.DEFINE_string('label_map_json_path', None,
'Path to label map json file with a dictionary.')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
flags.DEFINE_integer('num_shards', 100, 'Number of shards for output file.')
flags.DEFINE_integer('num_images', None, 'Max number of imags to process.')


def get_image_id(filename):
"""Convert a string to a integer."""
# Warning: this function is highly specific to pascal filename!!
Expand All @@ -101,10 +106,9 @@ def get_ann_id():


def dict_to_tf_example(data,
dataset_directory,
images_dir,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages',
ann_json_dict=None):
"""Convert XML derived dict to tf.Example proto.
Expand All @@ -114,12 +118,10 @@ def dict_to_tf_example(data,
Args:
data: dict holding PASCAL XML fields for a single image (obtained by running
tfrecord_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
images_dir: Path to the directory holding raw images.
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the PASCAL dataset
directory holding the actual image data.
ann_json_dict: annotation json dictionary.
Returns:
Expand All @@ -128,8 +130,7 @@ def dict_to_tf_example(data,
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)
full_path = os.path.join(images_dir, data['filename'])
with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
Expand Down Expand Up @@ -297,9 +298,10 @@ def main(_):
xml = etree.fromstring(xml_str)
data = tfrecord_util.recursive_parse_xml_to_dict(xml)['annotation']

img_dir = os.path.join(FLAGS.data_dir, data['folder'], 'JPEGImages')
tf_example = dict_to_tf_example(
data,
FLAGS.data_dir,
img_dir,
label_map_dict,
FLAGS.ignore_difficult_instances,
ann_json_dict=ann_json_dict)
Expand All @@ -316,4 +318,5 @@ def main(_):


if __name__ == '__main__':
define_flags()
app.run(main)
5 changes: 3 additions & 2 deletions efficientdet/dataset/create_pascal_tfrecord_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ def test_dict_to_tf_example(self):
'notperson': 2,
}

example = create_pascal_tfrecord.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
example = create_pascal_tfrecord.dict_to_tf_example(data,
self.get_temp_dir(),
label_map_dict)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
Expand Down
2 changes: 1 addition & 1 deletion efficientdet/dataset/tfrecord_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def recursive_parse_xml_to_dict(xml):
Python dictionary holding XML contents.
"""
if not xml:
return {xml.tag: xml.text}
return {xml.tag: xml.text if xml.text else ''}
result = {}
for child in xml:
child_result = recursive_parse_xml_to_dict(child)
Expand Down
2 changes: 1 addition & 1 deletion efficientdet/hparams_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def default_detection_configs():
h.max_level = 7
h.num_scales = 3
# ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset.
h.aspect_ratios = [1.0, 2.0, 0.5] #[[0.7, 1.4], [1.0, 1.0], [1.4, 0.7]]
h.aspect_ratios = [1.0, 2.0, 0.5] # [[0.7, 1.4], [1.0, 1.0], [1.4, 0.7]]
h.anchor_scale = 4.0
# is batchnorm training mode
h.is_training_bn = True
Expand Down
4 changes: 2 additions & 2 deletions efficientdet/keras/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ Create a config file for the PASCAL VOC dataset called voc_config.yaml and put t
Finetune needs to use --pretrained_ckpt.

!python train.py
--training_file_pattern=tfrecord/pascal*.tfrecord \
--train_file_pattern=tfrecord/pascal*.tfrecord \
--val_file_pattern=tfrecord/pascal*.tfrecord \
--val_file_pattern=tfrecord/*.json \
--model_name=efficientdet-d0 \
Expand Down Expand Up @@ -273,7 +273,7 @@ To train this model on Cloud TPU, you will need:
Then train the model:

!export PYTHONPATH="$PYTHONPATH:/path/to/models"
!python train.py --tpu=TPU_NAME --training_file_pattern=DATA_DIR/*.tfrecord --model_dir=MODEL_DIR --strategy=tpu
!python train.py --tpu=TPU_NAME --train_file_pattern=DATA_DIR/*.tfrecord --model_dir=MODEL_DIR --strategy=tpu

# TPU_NAME is the name of the TPU node, the same name that appears when you run gcloud compute tpus list, or ctpu ls.
# MODEL_DIR is a GCS location (a URL starting with gs:// where both the GCE VM and the associated Cloud TPU have write access.
Expand Down
Loading

0 comments on commit e419394

Please sign in to comment.