1 Star 1 Fork 1

织女的牛郎/tfrecord_generator

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
dataset_util.py 4.49 KB
一键复制 编辑 原始数据 按行查看 历史
peiyuan.luo 提交于 2018-04-16 14:51 . init
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for creating TFRecord data sets."""
import tensorflow as tf
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def read_examples_list(path):
"""Read list of training or validation examples.
The file is assumed to contain a single example per line where the first
token in the line is an identifier that allows us to find the image and
annotation xml for that example.
For example, the line:
xyz 3
would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
Args:
path: absolute path to examples list file.
Returns:
list of example identifiers (strings).
"""
with tf.gfile.GFile(path) as fid:
lines = fid.readlines()
return [line.strip().split(' ')[0] for line in lines]
def recursive_parse_xml_to_dict(xml):
"""Recursively parses XML contents to python dict.
We assume that `object` tags are the only ones that can appear
multiple times at the same level of a tree.
Args:
xml: xml tree obtained by parsing XML file contents using lxml.etree
Returns:
Python dictionary holding XML contents.
"""
if not xml:
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = recursive_parse_xml_to_dict(child)
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result:
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
def make_initializable_iterator(dataset):
"""Creates an iterator, and initializes tables.
This is useful in cases where make_one_shot_iterator wouldn't work because
the graph contains a hash table that needs to be initialized.
Args:
dataset: A `tf.data.Dataset` object.
Returns:
A `tf.data.Iterator`.
"""
iterator = dataset.make_initializable_iterator()
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
return iterator
def read_dataset(file_read_func, decode_func, input_files, config):
"""Reads a dataset, and handles repetition and shuffling.
Args:
file_read_func: Function to use in tf.data.Dataset.interleave, to read
every individual file into a tf.data.Dataset.
decode_func: Function to apply to all records.
input_files: A list of file paths to read.
config: A input_reader_builder.InputReader object.
Returns:
A tf.data.Dataset based on config.
"""
# Shard, shuffle, and read files.
filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files],
0)
filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
if config.shuffle:
filename_dataset = filename_dataset.shuffle(
config.filenames_shuffle_buffer_size)
elif config.num_readers > 1:
tf.logging.warning('`shuffle` is false, but the input data stream is '
'still slightly shuffled since `num_readers` > 1.')
filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply(
tf.contrib.data.parallel_interleave(
file_read_func, cycle_length=config.num_readers,
block_length=config.read_block_length, sloppy=True))
if config.shuffle:
records_dataset.shuffle(config.shuffle_buffer_size)
tensor_dataset = records_dataset.map(
decode_func, num_parallel_calls=config.num_parallel_map_calls)
return tensor_dataset.prefetch(config.prefetch_size)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/leoluopy/tfrecord_generator.git
git@gitee.com:leoluopy/tfrecord_generator.git
leoluopy
tfrecord_generator
tfrecord_generator
master

搜索帮助