0

Je forme le modèle "Show and tell" en utilisant tensorflow dans lequel le modèle génère automatiquement les légendes des images. Comment je reçois cette erreur.ValueError: Aucun dégradé fourni pour une variable, vérifiez votre graphique pour les ops qui ne supportent pas les dégradés, entre les variables

C'est le retraçage:

------------------------------------------------------------------------ 
--- 
ValueError        Traceback (most recent call 
last) 
<ipython-input-36-b6da0a27b701> in <module>() 
    1 try: 
    2  #train(.001,False,False) #train from scratch 
----> 3  train(.001,True,True) #continue training from pretrained weights @epoch500 
    4  #train(.001) #train from previously saved weights 
    5 except KeyboardInterrupt: 
    ipython-input-35-39693d0edd0a> in train(learning_rate, continue_training, transfer) 
31  learning_rate = tf.train.exponential_decay(learning_rate, global_step, 
32          int(len(index)/batch_size), 0.95) 
---> 33  train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) 
34  tf.global_variables_initializer().run() 
35 
/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/optimizer.pyc in minimize(self, loss, global_step, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, name, grad_loss) 
320   "No gradients provided for any variable, check your graph for ops" 
321   " that do not support gradients, between variables %s 
and loss %s." % 
--> 322   ([str(v) for _, v in grads_and_vars], loss)) 
323 
324  return self.apply_gradients(grads_and_vars, 
global_step=global_step, 

ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["tf.Variable 'word_embedding:0' shape=(2943, 256) dtype=float32_ref>", "tf.Variable 'embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'img_embedding:0' shape=(4096, 256) dtype=float32_ref>", "tf.Variable 'img_embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'word_encoding:0' shape=(256, 2943) dtype=float32_ref>", "tf.Variable 'word_encoding_bias:0' shape=(2943,) dtype=float32_ref>"] and loss Tensor("RNN/div:0", shape=(), dtype=float32).

Je sais que l'erreur est due au fait qu'il ya une variable qui doesen't maintient le gradient lors de l'optimisation qui est à son tour couper le graphique, mais je suis incapable de le choisir. J'utilise des paramètres de modèle VGG-net 16 déjà formés et le jeu de données d'images FLICKR-30 avec des annotations correspondantes.

Voici le code:

def get_data(annotation_path, feature_path): 
annotations = pd.read_table(annotation_path, sep='\t', header=None, names=['image', 'caption']) 
return np.load(feature_path,'r'), annotations['caption'].values 

def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk 
print('preprocessing %d word vocab' % (word_count_threshold,)) 
word_counts = {} 
nsents = 0 
for sent in sentence_iterator: 
    nsents += 1 
    for w in sent.lower().split(' '): 
    word_counts[w] = word_counts.get(w, 0) + 1 
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold] 
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab))) 


ixtoword = {} 
ixtoword[0] = '.' 
wordtoix = {} 
wordtoix['#START#'] = 0 
ix = 1 
for w in vocab: 
    wordtoix[w] = ix 
    ixtoword[ix] = w 
    ix += 1 

word_counts['.'] = nsents 
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword]) 
bias_init_vector /= np.sum(bias_init_vector) 
bias_init_vector = np.log(bias_init_vector) 
bias_init_vector -= np.max(bias_init_vector) 
return wordtoix, ixtoword, bias_init_vector.astype(np.float32) 

class Caption_Generator(): 
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b): 

    self.dim_in = dim_in 
    self.dim_embed = dim_embed 
    self.dim_hidden = dim_hidden 
    self.batch_size = batch_size 
    self.n_lstm_steps = n_lstm_steps 
    self.n_words = n_words 

    # declare the variables to be used for our word embeddings 
    with tf.device("/cpu:0"): 
     self.word_embedding = tf.Variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding') 

    self.embedding_bias = tf.Variable(tf.zeros([dim_embed]), name='embedding_bias') 

    # declare the LSTM itself 
    self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden) 

    # declare the variables to be used to embed the image feature embedding to the word embedding space 
    self.img_embedding = tf.Variable(tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1), name='img_embedding') 
    self.img_embedding_bias = tf.Variable(tf.zeros([dim_hidden]), name='img_embedding_bias') 

    # declare the variables to go from an LSTM output to a word encoding output 
    self.word_encoding = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='word_encoding') 
    # initialize this bias variable from the preProBuildWordVocab output 
    self.word_encoding_bias = tf.Variable(init_b, name='word_encoding_bias') 

def build_model(self): 
    # declaring the placeholders for our extracted image feature vectors, our caption, and our mask 
    # (describes how long our caption is with an array of 0/1 values of length `maxlen` 
    img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in]) 
    caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps]) 
    mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) 

    # getting an initial LSTM embedding from our image_imbedding 
    image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias 

    # setting initial state of our LSTM 
    state = self.lstm.zero_state(self.batch_size, dtype=tf.float32) 

    total_loss = 0.0 
    with tf.variable_scope("RNN"): 
     for i in range(self.n_lstm_steps): 
      if i > 0: 
       #if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding 
       # to the (i-1)th word in our caption 
       with tf.device("/cpu:0"): 
        current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias 
      else: 
       #if this is the first iteration of our LSTM we utilize the embedded image as our input 
       current_embedding = image_embedding 
       if i > 0: 
       # allows us to reuse the LSTM tensor variable on each iteration 
        tf.get_variable_scope().reuse_variables() 

        out, state = self.lstm(current_embedding, state) 
        #out, state = self.tf.nn.dynamic_rnn(current_embedding, state) 


       if i > 0: 
       #get the one-hot representation of the next word in our caption 
        labels = tf.expand_dims(caption_placeholder[:, i], 1) 
        ix_range=tf.range(0, self.batch_size, 1) 
        ixs = tf.expand_dims(ix_range, 1) 
        concat = tf.concat([ixs, labels],1) 
        onehot = tf.sparse_to_dense(
         concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0) 


       #perform a softmax classification to generate the next word in the caption 
        logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias 
        xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot) 
        xentropy = xentropy * mask[:,i] 

        loss = tf.reduce_sum(xentropy) 
        total_loss += loss 

     total_loss = total_loss/tf.reduce_sum(mask[:,1:]) 
     return total_loss, img, caption_placeholder, mask 


### Parameters ### 
dim_embed = 256 
dim_hidden = 256 
dim_in = 4096 
batch_size = 128 
momentum = 0.9 
n_epochs = 150 

def train(learning_rate=0.001, continue_training=False, transfer=True): 

tf.reset_default_graph() 

feats, captions = get_data(annotation_path, feature_path) 
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions) 

np.save('data/ixtoword', ixtoword) 

index = (np.arange(len(feats)).astype(int)) 
np.random.shuffle(index) 


sess = tf.InteractiveSession() 
n_words = len(wordtoix) 
maxlen = np.max([x for x in map(lambda x: len(x.split(' ')), captions) ]) 
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b) 

loss, image, sentence, mask = caption_generator.build_model() 

saver = tf.train.Saver(max_to_keep=100) 
global_step=tf.Variable(0,trainable=False) 
learning_rate = tf.train.exponential_decay(learning_rate, global_step, 
            int(len(index)/batch_size), 0.95) 
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) 
tf.global_variables_initializer().run() 

if continue_training: 
    if not transfer: 
     saver.restore(sess,tf.train.latest_checkpoint(model_path)) 
    else: 
     saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer)) 
losses=[] 
for epoch in range(n_epochs): 
    for start, end in zip(range(0, len(index), batch_size), range(batch_size, len(index), batch_size)): 

     current_feats = feats[index[start:end]] 
     current_captions = captions[index[start:end]] 
     current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)] 

     current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1) 
     current_caption_matrix = np.hstack([np.full((len(current_caption_matrix),1), 0), current_caption_matrix]) 

     current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1])) 
     nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix)]) 

     for ind, row in enumerate(current_mask_matrix): 
      row[:nonzeros[ind]] = 1 

     _, loss_value = sess.run([train_op, loss], feed_dict={ 
      image: current_feats.astype(np.float32), 
      sentence : current_caption_matrix.astype(np.int32), 
      mask : current_mask_matrix.astype(np.float32) 
      }) 

     print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats))) 
    print("Saving the model from epoch: ", epoch) 
    saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch) 

Répondre

0

Branching dans la routine de la construction de la perte est invalide.

with tf.variable_scope("RNN"): 
     for i in range(self.n_lstm_steps): 
      if i > 0: 
       [...] 
      else: 
       [...] 
       if i > 0: 
        [...] 
       if i > 0: 
        [...] 

Notez que deux ifs ne jamais à court, car ils sont dans la clause else, ce qui signifie que je < = 0. Par conséquent, votre perte est en fait une constante, égale à 0, et donc TF ne pas voir comment l'optimiser par rapport à. variables

+0

@lejiot Ce problème est résolu. Mais maintenant je reçois cette erreur. ValueError: La variable RNN/basic_lstm_cell/kernel n'existe pas ou n'a pas été créée avec tf.get_variable(). Vouliez-vous définir set reuse = None dans VarScope? – clarky

+0

bien que vous appeliez des variables de réutilisation avant ** first ** appel à .lstm, donc il se plaint qu'il ne peut pas réutiliser ses variables car elles n'existent pas encore. Les variables de réutilisation ne doivent être appelées que lors d'appels successifs. – lejlot