J'ai expérimenté avec Seq2Seq dans Tensorflow (python) depuis quelques semaines, j'ai un modèle de travail qui utilise un encodeur bidirectionnel et un décodeur basé sur l'attention qui fonctionnait bien, j'ai ajouté Beam recherche aujourd'hui, mais j'ai remarqué que l'inférence prend maintenant pour toujours avec une largeur de faisceau de 1 ou plus, l'inférence prenait quelques secondes lorsque je n'utilisais que l'encodeur bidirectionnel et le décodeur d'attention.BeamSearch prend une éternité dans Tensorflow
détails de l'environnement: tensorflow Version: 1.3.0 MacOS 10.12.4
ci-dessous est la partie pertinente de mon code:
def decoding_layer(dec_input, encoder_state,
target_sequence_length, max_target_sequence_length,
rnn_size,
num_layers, target_vocab_to_int, target_vocab_size,
batch_size, keep_prob, decoding_embedding_size , encoder_outputs):
"""
Create decoding layer
:param dec_input: Decoder input
:param encoder_state: Encoder state
:param target_sequence_length: The lengths of each sequence in the target batch
:param max_target_sequence_length: Maximum length of target sequences
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param target_vocab_to_int: Dictionary to go from the target words to an id
:param target_vocab_size: Size of target vocabulary
:param batch_size: The size of the batch
:param keep_prob: Dropout keep probability
:param decoding_embedding_size: Decoding embedding size
:encoder_outputs : encoder's output
:return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
"""
encoder_outputs_tr =encoder_outputs #tf.transpose(encoder_outputs,[1,0,2])
# 1. Decoder Embedding
dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
# 2. Construct the decoder cell
def create_cell(rnn_size):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,
initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2))
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
def create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , infer):
if infer and beam_width >0:
encoder_outputs_tr = tf.contrib.seq2seq.tile_batch(encoder_outputs_tr, multiplier=beam_width)
encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=beam_width)
batch_size = batch_size * beam_width
dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs_tr)
attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size , output_attention=False)
attn_zero = attn_cell.zero_state(batch_size , tf.float32)
attn_zero = attn_zero.clone(cell_state = encoder_state)
return attn_zero , attn_cell
intial_train_state , train_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , False)
intial_infer_state , infer_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , True)
output_layer = Dense(target_vocab_size,
kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))
with tf.variable_scope("decode"):
train_decoder_out = decoding_layer_train(intial_train_state, train_cell, dec_embed_input,
target_sequence_length, max_target_sequence_length, output_layer, keep_prob)
with tf.variable_scope("decode", reuse=True):
if beam_width == 0 :
infer_decoder_out = decoding_layer_infer(intial_infer_state, infer_cell, dec_embeddings,
target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length,
target_vocab_size, output_layer, batch_size, keep_prob)
else :
infer_decoder_out = decoding_layer_infer_with_Beam(intial_infer_state, infer_cell, dec_embeddings,
target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length,
target_vocab_size, output_layer, batch_size, keep_prob)
print('beam search')
return (train_decoder_out, infer_decoder_out)
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
#tests.test_decoding_layer(decoding_layer)
def decoding_layer_infer_with_Beam(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
end_of_sequence_id, max_target_sequence_length,
vocab_size, output_layer, batch_size, keep_prob):
"""
Create a decoding layer for inference
:param encoder_state: Encoder state
:param dec_cell: Decoder RNN Cell
:param dec_embeddings: Decoder embeddings
:param start_of_sequence_id: GO ID
:param end_of_sequence_id: EOS Id
:param max_target_sequence_length: Maximum length of target sequences
:param vocab_size: Size of decoder/target vocabulary
:param decoding_scope: TenorFlow Variable Scope for decoding
:param output_layer: Function to apply the output layer
:param batch_size: Batch size
:param keep_prob: Dropout keep probability
:return: BasicDecoderOutput containing inference logits and sample_id
"""
start_tokens = tf.tile(tf.constant([start_of_sequence_id], dtype=tf.int32), [batch_size], name='start_tokens')
inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
cell=dec_cell,
embedding=dec_embeddings,
start_tokens=start_tokens,
end_token=end_of_sequence_id,
initial_state=encoder_state,
beam_width=beam_width,
output_layer=output_layer)
inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
impute_finished=False
)[0]
return inference_decoder_output
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
#tests.test_decoding_layer_infer(decoding_layer_infer)
ci-dessous sont les paramètres modèles:
# Number of Epochs
epochs = 200
# Batch Size
batch_size = 30
# RNN Size
rnn_size = 512
# Number of Layers
num_layers = 2
# Embedding Size
encoding_embedding_size = 100
decoding_embedding_size = 100
# Learning Rate
learning_rate = 0.001
# Dropout Keep Probability
keep_probability = 0.55
display_step = 10
beam_width=1
J'apprécierais vraiment votre aide, je ne suis pas sûr de ce qui ne va pas exactement.
merci
Quelle est la taille du vocabulaire? – Aaron
le vocabulaire source est de 47 mots, le vocabulaire cible autour de 74 mots –