cannot input data by use tensorflow method: tf.train.string_input_producer

I use the code below to transform the data from .libfm to .tfrecords.

def generate_tfrecords(input_filename, split_term, output_filename):
    writer = tf.python_io.TFRecordWriter(output_filename)
    num = 0
    exampleee = []
    for line in open(input_filename, "r"):
        data = line.split(split_term)
        label = float(data[0])
        ids = []
        values = []
        values2 = []
        for feat in data[3:]:
            id, value = feat.split(":")

    # Write each example one by one 
    example = tf.train.Example(features=tf.train.Features(feature={

def main():
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    generate_tfrecords(input_file, "\t", output_file)

For test I made three files and used tf.train.string_input_producer to read content of these files and decode them. However the script doesn’t work. The script stucks at the last line, and it then does not cost cpu resource and cannot be terminated by Ctrl+C.
I guess the problem occurs at my string_input_producer step. Can anybody help me to solve it?

train_file = ["./train1.tfrecords","./train2.tfrecords","./train3.tfrecords"]
train_batch_size = 50
test_batch_size = 50
min_after_dequeue = 5
feature_num = 3
capacity = min_after_dequeue + 3 * train_batch_size

def file_queue_read(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example =
    return serialized_example

def decode(batch_serialized_example):
    features = tf.parse_example(
                "label": tf.FixedLenFeature([], tf.float32),
                "ids": tf.VarLenFeature(tf.int64),
                "values": tf.VarLenFeature(tf.float32),
                "values2": tf.VarLenFeature(tf.float32),
    batch_labels = features["label"]
    batch_ids = features["ids"]
    batch_values = features["values"]
    batch_values2 = features["values2"]
    return batch_labels, batch_ids, batch_values, batch_values2

def get_data(filename, batch_size, min_after_dequeue, capacity):
    filename_queue = tf.train.string_input_producer(filename)
    serialized_example = file_queue_read(filename_queue)
    batch_serialized_example = tf.train.shuffle_batch(
            batch_size = batch_size,
            capacity = capacity,
            min_after_dequeue = min_after_dequeue,
            allow_smaller_final_batch = True)
    batch_labels, batch_ids, batch_values, batch_values2 = decode(batch_serialized_example)
    return serialized_example, batch_labels, batch_ids, batch_values, batch_values2

init = [tf.global_variables_initializer(), tf.local_variables_initializer()]
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    serialized_example, batch_labels, batch_ids, batch_values, batch_values2 = get_data(train_file, train_batch_size, min_after_dequeue, capacity)
    ids, values, values2 =[batch_ids, batch_values, batch_values2])

Leave a Reply

Your email address will not be published.