2016-08-04 1 views

J'essaie d'apprendre un réseau qui génère une valeur dans la plage -1.0..1.0. Il n'y a que six caractéristiques à ce jour, toutes flottantes. J'ai de la difficulté à aligner les types et les formes. Jusqu'à présent, j'ai:Obtenir des cotes correctes pour la régression avec lasagnes

#!/usr/bin/env python3 

import lasagne 
import numpy as np 
import sys 
import theano 
import theano.tensor as T 

infilename = sys.argv[1] 
split_size = 500 
epochs = 100 
theano.config.exception_verbosity = 'high' 

examples = np.genfromtxt(infilename, delimiter=' ') 

examples = examples.reshape(-1, 7) 

train, test = examples[:split_size,:], examples[split_size:,:] 

# input and target 
train_y = train[:,0] 
train_X = train[:,1:] 

test_y = test[:,0] 
test_X = test[:,1:] 

input_var = T.matrix() 
target_var = T.vector() 

def iterate_minibatches(inputs, targets, batchsize, shuffle=False): 
    assert len(inputs) == len(targets) 
    if shuffle: 
     indices = np.arange(len(inputs)) 
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): 
     if shuffle: 
      excerpt = indices[start_idx:start_idx + batchsize] 
      excerpt = slice(start_idx, start_idx + batchsize) 
     yield inputs[excerpt], targets[excerpt] 

# nn structure 
from lasagne.nonlinearities import tanh, softmax, leaky_rectify 
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var) 
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh) 
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax) 

prediction = lasagne.layers.get_output(net) 
loss = lasagne.objectives.aggregate(prediction, target_var) 
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2) 

# parameter update expressions 
params = lasagne.layers.get_all_params(net, trainable=True) 
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9) 

# training function 
train_fn = theano.function([input_var, target_var], loss, updates=updates) 

for epoch in range(epochs): 
    loss = 0 
    for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True): 
     print('input', input_batch.shape) 
     print('target', target_batch.shape) 
     loss += train_fn(input_batch, target_batch) 
    print('epoch', epoch, 'loss', loss/len(training_data)) 

test_prediction = lasagne.layers.get_output(network, deterministic=True) 
predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) 
print('predicted score for first test input', predict_fn(test_X[0])) 


Les données d'entrée est un fichier 7 colonnes de chars, séparés par des espaces. Voici quelques exemples de lignes:

-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0 
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142 
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0 
1.0 1.0 3.0 0.0 0.0 4.0 1.0 
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0 
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0 
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0 
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0 
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0 
1.0 1.0 1.0 1.0 0.0 2.0 0.0 

Ceci est basé assez étroitement sur l'exemple de référence de la lasagne. L'erreur qui sort est:

/usr/local/lib/python3.5/dist-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module. 
    "downsample module has been moved to the theano.tensor.signal.pool module.") 
input (50, 6) 
target (50,) 
Traceback (most recent call last): 
    File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__ 
    outputs = self.fn() 
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50) 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "./nn_cluster.py", line 66, in <module> 
    loss += train_fn(input_batch, target_batch) 
    File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 871, in __call__ 
    storage_map=getattr(self.fn, 'storage_map', None)) 
    File "/usr/local/lib/python3.5/dist-packages/theano/gof/link.py", line 314, in raise_with_op 
    reraise(exc_type, exc_value, exc_trace) 
    File "/usr/lib/python3/dist-packages/six.py", line 685, in reraise 
    raise value.with_traceback(tb) 
    File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__ 
    outputs = self.fn() 
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50) 
Apply node that caused the error: Elemwise{Mul}[(0, 0)](SoftmaxWithBias.0, InplaceDimShuffle{x,0}.0) 
Toposort index: 21 
Inputs types: [TensorType(float64, matrix), TensorType(float64, row)] 
Inputs shapes: [(50, 1), (1, 50)] 
Inputs strides: [(8, 8), (400, 8)] 
Inputs values: ['not shown', 'not shown'] 
Outputs clients: [[Sum{acc_dtype=float64}(Elemwise{Mul}[(0, 0)].0)]] 

Debugprint of the apply node: 
Elemwise{Mul}[(0, 0)] [id A] <TensorType(float64, matrix)> '' 
|SoftmaxWithBias [id B] <TensorType(float64, matrix)> '' 
| |Dot22 [id C] <TensorType(float64, matrix)> '' 
| | |Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)] [id D] <TensorType(float64, matrix)> '' 
| | | |Dot22 [id E] <TensorType(float64, matrix)> '' 
| | | | |<TensorType(float64, matrix)> [id F] <TensorType(float64, matrix)> 
| | | | |W [id G] <TensorType(float64, matrix)> 
| | | |InplaceDimShuffle{x,0} [id H] <TensorType(float64, row)> '' 
| | | |b [id I] <TensorType(float64, vector)> 
| | |W [id J] <TensorType(float64, matrix)> 
| |b [id K] <TensorType(float64, vector)> 
|InplaceDimShuffle{x,0} [id L] <TensorType(float64, row)> '' 
    |<TensorType(float64, vector)> [id M] <TensorType(float64, vector)> 

Storage map footprint: 
- Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)].0, Shape: (50, 10), ElemSize: 8 Byte(s), TotalSize: 4000 Byte(s) 
- <TensorType(float64, matrix)>, Input, Shape: (50, 6), ElemSize: 8 Byte(s), TotalSize: 2400 Byte(s) 
- W, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s) 
- <TensorType(float64, matrix)>, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s) 
- SoftmaxWithBias.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s) 
- InplaceDimShuffle{x,0}.0, Shape: (1, 50), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s) 
- SoftmaxGrad.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s) 
- <TensorType(float64, vector)>, Input, Shape: (50,), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s) 
- W, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s) 
- b, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s) 
- <TensorType(float64, vector)>, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s) 
- <TensorType(float64, matrix)>, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s) 
- TensorConstant{0.02}, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- b, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
- TensorConstant{0.0001}, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- TensorConstant{(1, 1) of 0.9}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
- TensorConstant{4.00000000..000001e-06}, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- TensorConstant{(1,) of 0.02}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
- Constant{0}, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- Subtensor{int64}.0, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- TensorConstant{(1,) of 0.9}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
- Constant{1}, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- Subtensor{int64}.0, Shape:(), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s) 
- TensorConstant{(1, 1) of 1.0}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
- <TensorType(float64, vector)>, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s) 
TotalSize: 8984.0 Byte(s) 0.000 GB 
TotalSize inputs: 4168.0 Byte(s) 0.000 GB 

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. 

Une exception similaire est soulevée lors de l'utilisation lasagne.objectives.squared_error. Des idées? Je ne peux pas déterminer où la forme des données est fausse, si c'est le problème, et si c'est la bonne façon d'utiliser la fonction objectif.



Je copie votre code et votre entrée de données, modifie quelque chose et couru pour aucune erreur.



import lasagne 
import numpy as np 
import sys 
import theano 
import theano.tensor as T 

infilename = 'tt_lasagne.input' #sys.argv[1] 
split_size = 500 
epochs = 100 
theano.config.exception_verbosity = 'high' 

examples = np.genfromtxt(infilename, delimiter=' ') 

examples = examples.reshape(-1, 7) 

train, test = examples[:split_size,:], examples[split_size:,:] 

# input and target 
train_y = train[:,0] 
train_X = train[:,1:] 

test_y = test[:,0] 
test_X = test[:,1:] 

input_var = T.matrix() 
target_var = T.vector() 

def iterate_minibatches(inputs, targets, batchsize, shuffle=False): 
    assert len(inputs) == len(targets) 
    if shuffle: 
     indices = np.arange(len(inputs)) 
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): 
     if shuffle: 
      excerpt = indices[start_idx:start_idx + batchsize] 
      excerpt = slice(start_idx, start_idx + batchsize) 
     yield inputs[excerpt], targets[excerpt] 

# nn structure 
from lasagne.nonlinearities import tanh, softmax, leaky_rectify 
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var) 
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh) 
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax) 

prediction = lasagne.layers.get_output(net) 
loss = lasagne.objectives.aggregate(prediction, target_var) 
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2) 

# parameter update expressions 
params = lasagne.layers.get_all_params(net, trainable=True) 
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9) 

# training function 
train_fn = theano.function([input_var, target_var], loss, updates=updates) 

for epoch in range(epochs): 
    loss = 0 
    for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True): 
     print('input', input_batch.shape) 
     print('target', target_batch.shape) 
     loss += train_fn(input_batch, target_batch) 
    print('epoch', epoch, 'loss', loss/len(train_X)) 

#test_prediction = lasagne.layers.get_output(net, deterministic=True) 
#predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) 
#print('predicted score for first test input', predict_fn(test_X[0])) 



-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0 
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142 
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0 
1.0 1.0 3.0 0.0 0.0 4.0 1.0 
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0 
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0 
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0 
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0 
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0 
1.0 1.0 1.0 1.0 0.0 2.0 0.0