2017-09-21 1 views
1

Ce code conçu comme un modèle Seq2Seq ...tensorflow Seq2Seq ValueError

# imports 
from utils import get_sorted_buckets 

import logging 
from six.moves import xrange 

import numpy as np 
import tensorflow as tf 


# classes 
class Seq2Seq: 
    def __init__(self, 
       input_vocab_size, 
       output_vocab_size, 
       buckets, 
       layer_size=256, 
       n_layers=3, 
       max_gradient_norm=5.0, 
       batch_size=64, 
       learning_rate=0.5, 
       learning_rate_decay_factor=0.99, 
       rnn_cell=tf.contrib.rnn.GRUCell, 
       n_samples=512, 
       forward_only=False): 
     logging.info('initializing Seq2Seq model') 
     buckets = get_sorted_buckets(buckets) 

     self.input_vocab_size = input_vocab_size 
     self.output_vocab_size = output_vocab_size 
     self.buckets = buckets 
     self.layer_size = layer_size 
     self.n_layers = n_layers 
     self.max_gradient_norm = max_gradient_norm 
     self.batch_size = batch_size 
     self.learning_rate = learning_rate 
     self.learning_rate_decay_factor = learning_rate_decay_factor 
     self.rnn_cell = rnn_cell 
     self.n_samples = n_samples 
     self.forward_only = forward_only 

     self.learning_rate = tf.Variable(float(self.learning_rate), 
             trainable=False) 
     self.learning_rate_decay_operation = self.learning_rate.assign(
         self.learning_rate * self.learning_rate_decay_factor) 
     self.global_step = tf.Variable(0, trainable=False) 

     self.encoder_inputs = [self._get_val_placeholder('encoder', i) 
           for i in xrange(buckets[-1][0])] 

     self.decoder_inputs = [self._get_val_placeholder('decoder', i) 
           for i in xrange(buckets[-1][1] + 1)] 
     self.target_weights = [self._get_val_placeholder('weight', i, 
                 dtype=tf.float32) 
           for i in xrange(buckets[-1][1] + 1)] 

     logging.debug('getting model') 
     self.outputs, self.losses = self._get_model_with_buckets() 

     self.saver = tf.train.Saver(tf.all_variables()) 

     if not forward_only: 
      logging.debug('setting gradient norms and updates') 
      out = self._get_gradient_norms_and_updates() 
      self.gradient_norms, self.updates = out 

    def _get_softmax_loss_func_and_output_proj(self): 
     logging.debug('function: _get_softmax_loss_func_and_output_proj') 
     use_sampled_softmax = self.n_samples > 0 and \ 
           self.n_samples < self.output_vocab_size 
     if use_sampled_softmax: 
      w = tf.get_variable('proj_w', 
           [self.layer_size, self.output_vocab_size], 
           dtype=tf.int32) 
      w_t = tf.transpose(w) 
      b = tf.get_variable('proj_b', 
           [self.output_vocab_size], 
           dtype=tf.int32) 

      def get_sampled_loss(inputs, labels): 
       labels = tf.reshape(labels, [-1, 1]) 

       print(w_t) 
       print(b) 
       print(labels) 
       print(inputs) 

       return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

      softmax_loss_function = get_sampled_loss 
      output_projection = (w, b) 
     else: 
      softmax_loss_function = None 
      output_projection = None 

     return softmax_loss_function, output_projection 

    def _get_cell(self): 
     logging.debug('function: _get_cell') 
     single_cell = self.rnn_cell(self.layer_size) 
     # single_cell = self.rnn_cell 

     if self.n_layers > 1: 
      cell = tf.contrib.rnn.MultiRNNCell([single_cell] * self.n_layers) 
     else: 
      cell = single_cell 

     return cell 

    def _get_val_placeholder(self, name, idx, dtype=tf.int32): 
     return tf.placeholder(dtype, shape=[None], name='{}_{}'.format(name, 
                     idx)) 

    def _get_model_with_buckets(self): 
     targets = [self.decoder_inputs[i + 1] 
        for i in xrange(len(self.decoder_inputs) - 1)] 

     out = self._get_softmax_loss_func_and_output_proj() 
     softmax_loss_function, output_projection = out 

     cell = self._get_cell() 

     def seq2seq_func(encoder_inputs, decoder_inputs, do_decode): 
      print('seq2seq: {} {} {}'.format(encoder_inputs, decoder_inputs, do_decode)) 
      return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
       encoder_inputs, decoder_inputs, cell, 
       num_encoder_symbols=self.input_vocab_size, 
       num_decoder_symbols=self.output_vocab_size, 
       embedding_size=self.layer_size, 
       output_projection=output_projection, 
       feed_previous=do_decode) 

     logging.debug('getting model with buckets') 
     outputs, losses = tf.contrib.legacy_seq2seq.model_with_buckets(
      self.encoder_inputs, self.decoder_inputs, targets, 
      self.target_weights, self.buckets, 
      lambda x, y: seq2seq_func(x, y, self.forward_only), 
      softmax_loss_function=softmax_loss_function) 

     logging.debug('forward only') 
     if self.forward_only: 
      if output_projection is not None: 
       for i in xrange(len(self.buckets)): 
        w = output_projection[0] 
        b = output_projection[1] 
        new_bucket_outputs = [tf.matmul(output, w) + b 
              for output in outputs[i]] 

        self.outputs[i] = new_bucket_outputs 

     return outputs, losses 

    def _get_gradient_norms_and_updates(self): 
     params = tf.trainable_variables() 

     gradient_norms = [] 
     updates = [] 

     optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) 
     for i in xrange(len(self.buckets)): 
      gradients = tf.gradients(self.losses[i], params) 
      clipped_gradients, norm = tf.clip_by_global_norm(
       gradients, self.max_gradient_norm) 
      gradient_norms.append(norm) 
      updates.append(optimizer.apply_gradients(
       zip(clipped_gradients, params), global_step=self.global_step)) 

     return gradient_norms, updates 

    def step(self, session, encoder_inputs, decoder_inputs, target_weights, 
      bucket_id, forward_only): 
     encoder_size, decoder_size = self.buckets[bucket_id] 
     if len(encoder_inputs) != encoder_size: 
      raise ValueError('Encoder length must be equal to one in bucket.') 
     elif len(decoder_inputs) != decoder_size: 
      raise ValueError('Decoder length must be equal to one in bucket.') 
     elif len(target_weights) != decoder_size: 
      raise ValueError('Weights length must be equal to one in bucket.') 

     input_feed = {} 
     for i in xrange(encoder_size): 
      input_feed[self.encoder_inputs[i].name] = encoder_inputs[i] 
     for i in xrange(decoder_size): 
      input_feed[self.decoder_inputs[i].name] = decoder_inputs[i] 
      input_feed[self.target_weights[i].name] = target_weights[i] 

     last_target = self.decoder_inputs[decoder_size].name 
     input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32) 

     if forward_only: 
      output_feed = [self.losses[bucket_id]] 

      for i in xrange(decoder_size): 
       output_feed.append(self.outputs[bucket_id][i]) 
     else: 
      output_feed = [self.updates[bucket_id], 
          self.gradient_norms[bucket_id], 
          self.losses[bucket_id]] 

     outputs = session.run(output_feed, input_feed) 

     if not forward_only: 
      return outputs[1], outputs[2], None 
     else: 
      return None, outputs[0], outputs[1:] 

génère cette erreur ...

INFO:root:initializing Seq2Seq model 
DEBUG:root:getting model 
DEBUG:root:function: _get_softmax_loss_func_and_output_proj 
DEBUG:root:function: _get_cell 
DEBUG:root:getting model with buckets 
seq2seq: [<tf.Tensor 'encoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_9:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_10:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_11:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_12:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_13:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_14:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_15:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_16:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_17:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_18:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_19:0' shape=(?,) dtype=int32>] [<tf.Tensor 'decoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_9:0' shape=(?,) dtype=int32>] False 
Tensor("transpose:0", shape=(20000, 256), dtype=int32) 
Tensor("proj_b/read:0", shape=(20000,), dtype=int32) 
Tensor("model_with_buckets/sequence_loss/sequence_loss_by_example/Reshape:0", shape=(?, 1), dtype=float32) 
Tensor("decoder_1:0", shape=(?,), dtype=int32) 
Traceback (most recent call last): 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 670, in _call_cpp_shape_fn_impl 
    status) 
    File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 89, in __exit__ 
    next(self.gen) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status 
    pywrap_tensorflow.TF_GetCode(status)) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256]. 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "train.py", line 87, in <module> 
    main() 
    File "train.py", line 82, in main 
    model = get_model() 
    File "train.py", line 76, in get_model 
    get_rnn_cell(), FLAGS.n_samples, FLAGS.forward_only) 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 58, in __init__ 
    self.outputs, self.losses = self._get_model_with_buckets() 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 141, in _get_model_with_buckets 
    softmax_loss_function=softmax_loss_function) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1195, in model_with_buckets 
    softmax_loss_function=softmax_loss_function)) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1110, in sequence_loss 
    softmax_loss_function=softmax_loss_function)) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1067, in sequence_loss_by_example 
    crossent = softmax_loss_function(target, logit) 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 91, in get_sampled_loss 
    num_classes=self.output_vocab_size) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 1191, in sampled_softmax_loss 
    name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 995, in _compute_sampled_logits 
    inputs, sampled_w, transpose_b=True) + sampled_b 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1855, in matmul 
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1454, in _mat_mul 
    transpose_b=transpose_b, name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op 
    op_def=op_def) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2397, in create_op 
    set_shapes_for_outputs(ret) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1757, in set_shapes_for_outputs 
    shapes = shape_func(op) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1707, in call_with_requiring 
    return call_cpp_shape_fn(op, require_shape_fn=True) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn 
    debug_python_shape_fn, require_shape_fn) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 675, in _call_cpp_shape_fn_impl 
    raise ValueError(err.message) 
ValueError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256]. 

L'erreur a probablement à voir avec la perte softmax échantillonné, je J'ai eu beaucoup d'erreurs avant que cela a à voir avec la nouvelle mise à jour de Tensorflow, nouvel ordre de syntaxe des arguments et ainsi de suite. Il y a peut-être plus d'erreurs dans le code mais je vais devoir résoudre celui-ci avant d'en rencontrer d'autres.

Deux jours ont passé et je ne sais toujours pas quoi faire. Quels changements dans le code dois-je faire pour que cela fonctionne?

Répondre

1

Dans une nouvelle mise à jour de Tensorflow, ils ont inversé l'ordre des arguments dans sampled_softmax_loss.

Ne l'utilisez pas.

def get_sampled_loss(inputs, labels): 
    labels = tf.reshape(labels, [-1, 1]) 

    return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

Utilisez plutôt ceci.

def get_sampled_loss(labels, inputs): 
    labels = tf.reshape(labels, [-1, 1]) 

    return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

La différence entre les deux extraits est au lieu d'avoir les arguments dans cet ordre: get_sampled_loss(inputs, labels).

Utilisez cette commande: get_sampled_loss(labels, inputs)