2017-10-09 16 views
1

Je forme le modèle "Show and tell" en utilisant tensorflow dans lequel le modèle génère automatiquement les légendes des images. Comment je reçois cette erreur.Tensorflow Typeerror: L'utilisation d'un `tf.Tensor` comme Python` bool` n'est pas autorisée.

C'est le retraçage:

TypeError         Traceback (most recent call 
last) 
<ipython-input-15-b6da0a27b701> in <module>() 
    1 try: 
    2  #train(.001,False,False) #train from scratch 
----> 3  train(.001,True,True) #continue training from pretrained weights @epoch500 
    4  #train(.001) #train from previously saved weights 
    5 except KeyboardInterrupt: 

<ipython-input-14-39693d0edd0a> in train(learning_rate, continue_training, transfer) 
23  n_words = len(wordtoix) 
24  maxlen = np.max([x for x in map(lambda x: len(x.split(' ')), captions) ]) 
---> 25  caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b) 
26 
27  loss, image, sentence, mask = caption_generator.build_model() 

<ipython-input-12-7ef491a16183> in __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b) 
11   # declare the variables to be used for our word embeddings 
12   with tf.device("/cpu:0"): 
---> 13    self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1)) 
14 
15    self.embedding_bias = tf.get_variable("embedding_bias", tf.zeros([dim_embed])) 

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter) 
1063  collections=collections, caching_device=caching_device, 
1064  partitioner=partitioner, validate_shape=validate_shape, 
-> 1065  use_resource=use_resource, custom_getter=custom_getter) 
1066 get_variable_or_local_docstring = (
1067  """%s 

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter) 
960   collections=collections, caching_device=caching_device, 
961   partitioner=partitioner, validate_shape=validate_shape, 
--> 962   use_resource=use_resource, custom_getter=custom_getter) 
963 
964 def _get_partitioned_variable(self, 

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter) 
365   reuse=reuse, trainable=trainable, collections=collections, 
366   caching_device=caching_device, partitioner=partitioner, 
--> 367   validate_shape=validate_shape, use_resource=use_resource) 
368 
369 def _get_partitioned_variable(

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource) 
301      trainable=True, collections=None, caching_device=None, 
302      partitioner=None, validate_shape=True, use_resource=None): 
--> 303  is_scalar = shape is not None and not shape 
304  # Partitioned variable case 
305  if partitioner is not None and not is_scalar: 

/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in __nonzero__(self) 
511  `TypeError`. 
512  """ 
--> 513  raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. " 
514      "Use `if t is not None:` instead of `if t:` to test if a " 
515      "tensor is defined, and use TensorFlow ops such as " 

TypeError: L'utilisation d'un tf.Tensor comme Python bool n'est pas autorisé. Utilisez if t is not None: au lieu de if t: pour tester si un tenseur est défini et utilisez des opérations TensorFlow telles que tf.cond pour exécuter des sous-graphes conditionnés sur la valeur d'un tenseur.

Voici le code:

def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk 
print('preprocessing %d word vocab' % (word_count_threshold,)) 
word_counts = {} 
nsents = 0 
for sent in sentence_iterator: 
    nsents += 1 
    for w in sent.lower().split(' '): 
    word_counts[w] = word_counts.get(w, 0) + 1 
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold] 
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab))) 


ixtoword = {} 
ixtoword[0] = '.' 
wordtoix = {} 
wordtoix['#START#'] = 0 
ix = 1 
for w in vocab: 
    wordtoix[w] = ix 
    ixtoword[ix] = w 
    ix += 1 

word_counts['.'] = nsents 
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword]) 
bias_init_vector /= np.sum(bias_init_vector) 
bias_init_vector = np.log(bias_init_vector) 
bias_init_vector -= np.max(bias_init_vector) 
return wordtoix, ixtoword, bias_init_vector.astype(np.float32) 

class Caption_Generator(): 
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b): 

    self.dim_in = dim_in 
    self.dim_embed = dim_embed 
    self.dim_hidden = dim_hidden 
    self.batch_size = batch_size 
    self.n_lstm_steps = n_lstm_steps 
    self.n_words = n_words 

    # declare the variables to be used for our word embeddings 
    with tf.device("/cpu:0"): 
     self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1)) 

     self.embedding_bias = tf.get_variable("embedding_bias", tf.zeros([dim_embed])) 

    # declare the LSTM itself 
     self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden) 

    # declare the variables to be used to embed the image feature embedding to the word embedding space 
     self.img_embedding = tf.get_variable("img_embedding", tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1)) 
     self.img_embedding_bias = tf.get_variable("img_embedding_bias", tf.zeros([dim_hidden])) 

    # declare the variables to go from an LSTM output to a word encoding output 
     self.word_encoding = tf.get_variable("word_encoding", tf.random_uniform([dim_hidden, n_words], -0.1, 0.1)) 
    # initialize this bias variable from the preProBuildWordVocab output 
     self.word_encoding_bias = tf.get_variable("word_encoding_bias", init_b) 

def build_model(self): 
    # declaring the placeholders for our extracted image feature vectors, our caption, and our mask 
    # (describes how long our caption is with an array of 0/1 values of length `maxlen` 
    img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in]) 
    caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps]) 
    mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) 

    # getting an initial LSTM embedding from our image_imbedding 
    image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias 

    # setting initial state of our LSTM 
    state = self.lstm.zero_state(self.batch_size, dtype=tf.float32) 

    total_loss = 0.0 
    with tf.variable_scope("RNN"): 
     for i in range(self.n_lstm_steps): 
      if i > 0: 
       #if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding 
       # to the (i-1)th word in our caption 
       with tf.device("/cpu:0"): 
        current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias 
      else: 
       #if this is the first iteration of our LSTM we utilize the embedded image as our input 
       current_embedding = image_embedding 
      if i > 0: 
       # allows us to reuse the LSTM tensor variable on each iteration 
       tf.get_variable_scope().reuse_variables() 

       out, state = self.lstm(current_embedding, state) 
        #out, state = self.tf.nn.dynamic_rnn(current_embedding, state) 


      if i > 0: 
       #get the one-hot representation of the next word in our caption 
       labels = tf.expand_dims(caption_placeholder[:, i], 1) 
       ix_range=tf.range(0, self.batch_size, 1) 
       ixs = tf.expand_dims(ix_range, 1) 
       concat = tf.concat([ixs, labels],1) 
       onehot = tf.sparse_to_dense(
       concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0) 


       #perform a softmax classification to generate the next word in the caption 
       logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias 
       xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot) 
       xentropy = xentropy * mask[:,i] 

       loss = tf.reduce_sum(xentropy) 
       total_loss += loss 

     total_loss = total_loss/tf.reduce_sum(mask[:,1:]) 
     return total_loss, img, caption_placeholder, mask 

### Parameters ### 
dim_embed = 256 
dim_hidden = 256 
dim_in = 4096 
batch_size = 128 
momentum = 0.9 
n_epochs = 150 

def train(learning_rate=0.001, continue_training=False, transfer=True): 

tf.reset_default_graph() 

feats, captions = get_data(annotation_path, feature_path) 
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions) 

np.save('data/ixtoword', ixtoword) 

index = (np.arange(len(feats)).astype(int)) 
np.random.shuffle(index) 


sess = tf.InteractiveSession() 
n_words = len(wordtoix) 
maxlen = np.max([x for x in map(lambda x: len(x.split(' ')), captions) ]) 
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b) 

loss, image, sentence, mask = caption_generator.build_model() 

saver = tf.train.Saver(max_to_keep=100) 
global_step=tf.Variable(0,trainable=False) 
learning_rate = tf.train.exponential_decay(learning_rate, global_step, 
            int(len(index)/batch_size), 0.95) 
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) 
tf.global_variables_initializer().run() 

if continue_training: 
    if not transfer: 
     saver.restore(sess,tf.train.latest_checkpoint(model_path)) 
    else: 
     saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer)) 
losses=[] 
for epoch in range(n_epochs): 
    for start, end in zip(range(0, len(index), batch_size), range(batch_size, len(index), batch_size)): 

     current_feats = feats[index[start:end]] 
     current_captions = captions[index[start:end]] 
     current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)] 

     current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1) 
     current_caption_matrix = np.hstack([np.full((len(current_caption_matrix),1), 0), current_caption_matrix]) 

     current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1])) 
     nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix)]) 

     for ind, row in enumerate(current_mask_matrix): 
      row[:nonzeros[ind]] = 1 

     _, loss_value = sess.run([train_op, loss], feed_dict={ 
      image: current_feats.astype(np.float32), 
      sentence : current_caption_matrix.astype(np.int32), 
      mask : current_mask_matrix.astype(np.float32) 
      }) 

     print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats))) 
    print("Saving the model from epoch: ", epoch) 
    saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch) 

Répondre

1

Le problème vient de passer un tf.Tensor comme argument shape de tf.get_variable(name, shape=None, ...) sur cette ligne:

self.word_embedding = tf.get_variable("word_embedding", tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1)) 

Je suppose que vous vouliez passer le tenseur aléatoire comme l'argument initializer. La meilleure façon de résoudre ce problème est en spécifiant un nom pour l'argument:

self.word_embedding = tf.get_variable(
    "word_embedding", 
    initializer=tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1)) 

Il ressemble à tous vos appels tf.get_variable() aura besoin d'une solution similaire.