1

J'ai expérimenté avec Seq2Seq dans Tensorflow (python) depuis quelques semaines, j'ai un modèle de travail qui utilise un encodeur bidirectionnel et un décodeur basé sur l'attention qui fonctionnait bien, j'ai ajouté Beam recherche aujourd'hui, mais j'ai remarqué que l'inférence prend maintenant pour toujours avec une largeur de faisceau de 1 ou plus, l'inférence prenait quelques secondes lorsque je n'utilisais que l'encodeur bidirectionnel et le décodeur d'attention.BeamSearch prend une éternité dans Tensorflow

détails de l'environnement: tensorflow Version: 1.3.0 MacOS 10.12.4

ci-dessous est la partie pertinente de mon code:

def decoding_layer(dec_input, encoder_state, 
        target_sequence_length, max_target_sequence_length, 
        rnn_size, 
        num_layers, target_vocab_to_int, target_vocab_size, 
        batch_size, keep_prob, decoding_embedding_size , encoder_outputs): 
    """ 
    Create decoding layer 
    :param dec_input: Decoder input 
    :param encoder_state: Encoder state 
    :param target_sequence_length: The lengths of each sequence in the target batch 
    :param max_target_sequence_length: Maximum length of target sequences 
    :param rnn_size: RNN Size 
    :param num_layers: Number of layers 
    :param target_vocab_to_int: Dictionary to go from the target words to an id 
    :param target_vocab_size: Size of target vocabulary 
    :param batch_size: The size of the batch 
    :param keep_prob: Dropout keep probability 
    :param decoding_embedding_size: Decoding embedding size 
    :encoder_outputs : encoder's output 
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput) 
    """ 
    encoder_outputs_tr =encoder_outputs #tf.transpose(encoder_outputs,[1,0,2]) 
    # 1. Decoder Embedding 
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size])) 
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input) 

    # 2. Construct the decoder cell 
    def create_cell(rnn_size): 
     lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size, 
              initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2)) 
     drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) 
     return drop 

    def create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , infer): 

     if infer and beam_width >0: 
      encoder_outputs_tr = tf.contrib.seq2seq.tile_batch(encoder_outputs_tr, multiplier=beam_width) 

      encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=beam_width) 

      batch_size = batch_size * beam_width 


     dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)]) 
     attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs_tr) 
     attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size , output_attention=False) 
     attn_zero = attn_cell.zero_state(batch_size , tf.float32) 
     attn_zero = attn_zero.clone(cell_state = encoder_state) 
     return attn_zero , attn_cell 


    intial_train_state , train_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , False) 
    intial_infer_state , infer_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , True) 
    output_layer = Dense(target_vocab_size, 
         kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1)) 

    with tf.variable_scope("decode"): 
     train_decoder_out = decoding_layer_train(intial_train_state, train_cell, dec_embed_input, 
         target_sequence_length, max_target_sequence_length, output_layer, keep_prob) 

    with tf.variable_scope("decode", reuse=True): 
     if beam_width == 0 : 
      infer_decoder_out = decoding_layer_infer(intial_infer_state, infer_cell, dec_embeddings, 
           target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
           target_vocab_size, output_layer, batch_size, keep_prob) 
     else : 
      infer_decoder_out = decoding_layer_infer_with_Beam(intial_infer_state, infer_cell, dec_embeddings, 
           target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
           target_vocab_size, output_layer, batch_size, keep_prob) 
      print('beam search') 

    return (train_decoder_out, infer_decoder_out) 

""" 
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE 
""" 
#tests.test_decoding_layer(decoding_layer) 


def decoding_layer_infer_with_Beam(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id, 
         end_of_sequence_id, max_target_sequence_length, 
         vocab_size, output_layer, batch_size, keep_prob): 
    """ 
    Create a decoding layer for inference 
    :param encoder_state: Encoder state 
    :param dec_cell: Decoder RNN Cell 
    :param dec_embeddings: Decoder embeddings 
    :param start_of_sequence_id: GO ID 
    :param end_of_sequence_id: EOS Id 
    :param max_target_sequence_length: Maximum length of target sequences 
    :param vocab_size: Size of decoder/target vocabulary 
    :param decoding_scope: TenorFlow Variable Scope for decoding 
    :param output_layer: Function to apply the output layer 
    :param batch_size: Batch size 
    :param keep_prob: Dropout keep probability 
    :return: BasicDecoderOutput containing inference logits and sample_id 
    """ 

    start_tokens = tf.tile(tf.constant([start_of_sequence_id], dtype=tf.int32), [batch_size], name='start_tokens') 



    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
       cell=dec_cell, 
       embedding=dec_embeddings, 
       start_tokens=start_tokens, 
       end_token=end_of_sequence_id, 
       initial_state=encoder_state, 
       beam_width=beam_width, 
       output_layer=output_layer) 


    inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                  impute_finished=False 
                  )[0] 
    return inference_decoder_output 



""" 
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE 
""" 
#tests.test_decoding_layer_infer(decoding_layer_infer) 

ci-dessous sont les paramètres modèles:

# Number of Epochs 
epochs = 200 
# Batch Size 
batch_size = 30 
# RNN Size 
rnn_size = 512 
# Number of Layers 
num_layers = 2 
# Embedding Size 
encoding_embedding_size = 100 
decoding_embedding_size = 100 
# Learning Rate 
learning_rate = 0.001 
# Dropout Keep Probability 
keep_probability = 0.55 
display_step = 10 
beam_width=1 

J'apprécierais vraiment votre aide, je ne suis pas sûr de ce qui ne va pas exactement.

merci

+0

Quelle est la taille du vocabulaire? – Aaron

+0

le vocabulaire source est de 47 mots, le vocabulaire cible autour de 74 mots –

Répondre

1

ok Je viens de découvrir ce que je faisais mal.

Je avais juste besoin de définir la valeur d'itération maximale dans la fonction de décodage dynamique comme suit:

inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                 impute_finished=False, 
                 maximum_iterations=max_target_sequence_length)[0]