代码拉取完成,页面将自动刷新
同步操作将从 AmazingU/ImageNetPreprocess 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Helper functions to generate the Visual WakeWords dataset.
It filters raw COCO annotations file to Visual WakeWords Dataset
annotations. The resulting annotations and COCO images are then converted
to TF records.
See download_and_convert_visualwakewords.py for the sample usage.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import hashlib
import io
import json
import os
import contextlib2
import PIL.Image
import six
import tensorflow.compat.v1 as tf
from datasets import dataset_utils
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.flags.DEFINE_string(
'coco_train_url',
'http://images.cocodataset.org/zips/train2014.zip',
'Link to zip file containing coco training data')
tf.app.flags.DEFINE_string(
'coco_validation_url',
'http://images.cocodataset.org/zips/val2014.zip',
'Link to zip file containing coco validation data')
tf.app.flags.DEFINE_string(
'coco_annotations_url',
'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',
'Link to zip file containing coco annotation data')
FLAGS = tf.app.flags.FLAGS
def download_coco_dataset(dataset_dir):
"""Download the coco dataset.
Args:
dataset_dir: Path where coco dataset should be downloaded.
"""
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_train_url,
dataset_dir)
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_validation_url,
dataset_dir)
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_annotations_url,
dataset_dir)
def create_labels_file(foreground_class_of_interest,
visualwakewords_labels_file):
"""Generate visualwakewords labels file.
Args:
foreground_class_of_interest: category from COCO dataset that is filtered by
the visualwakewords dataset
visualwakewords_labels_file: output visualwakewords label file
"""
labels_to_class_names = {0: 'background', 1: foreground_class_of_interest}
with open(visualwakewords_labels_file, 'w') as fp:
for label in labels_to_class_names:
fp.write(str(label) + ':' + str(labels_to_class_names[label]) + '\n')
def create_visual_wakeword_annotations(annotations_file,
visualwakewords_annotations_file,
small_object_area_threshold,
foreground_class_of_interest):
"""Generate visual wakewords annotations file.
Loads COCO annotation json files to generate visualwakewords annotations file.
Args:
annotations_file: JSON file containing COCO bounding box annotations
visualwakewords_annotations_file: path to output annotations file
small_object_area_threshold: threshold on fraction of image area below which
small object bounding boxes are filtered
foreground_class_of_interest: category from COCO dataset that is filtered by
the visual wakewords dataset
"""
# default object of interest is person
foreground_class_of_interest_id = 1
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
# Create category index
category_index = {}
for category in groundtruth_data['categories']:
if category['name'] == foreground_class_of_interest:
foreground_class_of_interest_id = category['id']
category_index[category['id']] = category
# Create annotations index, a map of image_id to it's annotations
tf.logging.info('Building annotations index...')
annotations_index = collections.defaultdict(
lambda: collections.defaultdict(list))
# structure is { "image_id": {"objects" : [list of the image annotations]}}
for annotation in groundtruth_data['annotations']:
annotations_index[annotation['image_id']]['objects'].append(annotation)
missing_annotation_count = len(images) - len(annotations_index)
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
# Create filtered annotations index
annotations_index_filtered = {}
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations = annotations_index[image['id']]
annotations_filtered = _filter_annotations(
annotations, image, small_object_area_threshold,
foreground_class_of_interest_id)
annotations_index_filtered[image['id']] = annotations_filtered
with open(visualwakewords_annotations_file, 'w') as fp:
json.dump(
{
'images': images,
'annotations': annotations_index_filtered,
'categories': category_index
}, fp)
def _filter_annotations(annotations, image, small_object_area_threshold,
foreground_class_of_interest_id):
"""Filters COCO annotations to visual wakewords annotations.
Args:
annotations: dicts with keys: {
u'objects': [{u'id', u'image_id', u'category_id', u'segmentation',
u'area', u'bbox' : [x,y,width,height], u'iscrowd'}] } Notice
that bounding box coordinates in the official COCO dataset
are given as [x, y, width, height] tuples using absolute
coordinates where x, y represent the top-left (0-indexed)
corner.
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
small_object_area_threshold: threshold on fraction of image area below which
small objects are filtered
foreground_class_of_interest_id: category of COCO dataset which visual
wakewords filters
Returns:
annotations_filtered: dict with keys: {
u'objects': [{"area", "bbox" : [x,y,width,height]}],
u'label',
}
"""
objects = []
image_area = image['height'] * image['width']
for annotation in annotations['objects']:
normalized_object_area = annotation['area'] / image_area
category_id = int(annotation['category_id'])
# Filter valid bounding boxes
if category_id == foreground_class_of_interest_id and \
normalized_object_area > small_object_area_threshold:
objects.append({
u'area': annotation['area'],
u'bbox': annotation['bbox'],
})
label = 1 if objects else 0
return {
'objects': objects,
'label': label,
}
def create_tf_record_for_visualwakewords_dataset(annotations_file, image_dir,
output_path, num_shards):
"""Loads Visual WakeWords annotations/images and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
num_shards: number of output file shards.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = dataset_utils.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
annotations_index = groundtruth_data['annotations']
annotations_index = {int(k): v for k, v in six.iteritems(annotations_index)}
# convert 'unicode' key to 'int' key after we parse the json file
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations = annotations_index[image['id']]
tf_example = _create_tf_example(image, annotations, image_dir)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def _create_tf_example(image, annotations, image_dir):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
annotations: dict with objects (a list of image annotations) and a label.
{u'objects':[{"area", "bbox" : [x,y,width,height}], u'label'}. Notice
that bounding box coordinates in the COCO dataset are given as[x, y,
width, height] tuples using absolute coordinates where x, y represent
the top-left (0-indexed) corner. This function also converts to the format
that can be used by the Tensorflow Object Detection API (which is [ymin,
xmin, ymax, xmax] with coordinates normalized relative to image size).
image_dir: directory containing the image files.
Returns:
tf_example: The converted tf.Example
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin, xmax, ymin, ymax, area = [], [], [], [], []
for obj in annotations['objects']:
(x, y, width, height) = tuple(obj['bbox'])
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
area.append(obj['area'])
feature_dict = {
'image/height':
dataset_utils.int64_feature(image_height),
'image/width':
dataset_utils.int64_feature(image_width),
'image/filename':
dataset_utils.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_utils.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_utils.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_utils.bytes_feature(encoded_jpg),
'image/format':
dataset_utils.bytes_feature('jpeg'.encode('utf8')),
'image/class/label':
dataset_utils.int64_feature(annotations['label']),
'image/object/bbox/xmin':
dataset_utils.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_utils.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_utils.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_utils.float_list_feature(ymax),
'image/object/area':
dataset_utils.float_list_feature(area),
}
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return example
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。