1

J'ai fait ce réseau de neurones pour déterminer si une maison est un bon achat ou un mauvais achat. Pour certaines raisons, le code ne met pas à jour les poids et les biais. Ma perte reste la même. C'est mon code:Les poids et les biais ne sont pas mis à jour dans tensorflow

J'ai fait ce réseau de neurones pour déterminer si une maison est un bon achat ou un mauvais achat. Pour certaines raisons, le code ne met pas à jour les poids et les biais. Ma perte reste la même. Voici mon code:

import pandas as pd 
import tensorflow as tf 

data = pd.read_csv("E:/workspace_py/datasets/good_bad_buy.csv") 

features = data.drop(['index', 'good buy'], axis = 1) 
lbls = data.drop(['index', 'area', 'bathrooms', 'price', 'sq_price'], axis = 1) 

features = features[0:20] 
lbls = lbls[0:20] 

print(features) 
print(lbls) 
n_examples = len(lbls) 

# Model 

# Hyper parameters 

epochs = 100 
learning_rate = 0.1 
batch_size = 1 

input_data = tf.placeholder('float', [None, 4]) 
labels = tf.placeholder('float', [None, 1]) 

weights = { 
      'hl1': tf.Variable(tf.random_normal([4, 10])), 
      'hl2': tf.Variable(tf.random_normal([10, 10])), 
      'hl3': tf.Variable(tf.random_normal([10, 4])), 
      'ol': tf.Variable(tf.random_normal([4, 1])) 
      } 

biases = { 
      'hl1': tf.Variable(tf.random_normal([10])), 
      'hl2': tf.Variable(tf.random_normal([10])), 
      'hl3': tf.Variable(tf.random_normal([4])), 
      'ol': tf.Variable(tf.random_normal([1])) 
      } 

hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1'])) 
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2'])) 
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3'])) 
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol'])) 

loss = tf.reduce_mean((labels - ol)**2) 
train = tf.train.AdamOptimizer(learning_rate).minimize(loss) 

sess = tf.Session() 
sess.run(tf.global_variables_initializer()) 

iterations = int(n_examples/batch_size) 


for epoch_no in range(epochs): 
    ptr = 0 
    for iteration_no in range(iterations): 
     epoch_input = features[ptr:ptr+batch_size] 
     epoch_label = lbls[ptr: ptr+batch_size] 
     ptr = ptr + batch_size 
     _, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls}) 
    print("Error at epoch ", epoch_no, ": ", err) 

print(sess.run(ol, feed_dict={input_data: [[2104, 3, 399900, 190.0665]]})) 

Ceci est le jeu de données:

Features: 

    area bathrooms price sq_price 
0 2104   3 399900 190.066540 
1 1600   3 329900 206.187500 
2 2400   3 369000 153.750000 
3 1416   2 232000 163.841808 
4 3000   4 539900 179.966667 
5 1985   4 299900 151.083123 
6 1534   3 314900 205.280313 
7 1427   3 198999 139.452698 
8 1380   3 212000 153.623188 
9 1494   3 242500 162.315930 
10 1940   4 239999 123.710825 
11 2000   3 347000 173.500000 
12 1890   3 329999 174.602645 
13 4478   5 699900 156.297454 
14 1268   3 259900 204.968454 
15 2300   4 449900 195.608696 
16 1320   2 299900 227.196970 
17 1236   3 199900 161.731392 
18 2609   4 499998 191.643542 
19 3031   4 599000 197.624546 

labels: 

    good buy 
0  1.0 
1  0.0 
2  1.0 
3  0.0 
4  1.0 
5  0.0 
6  0.0 
7  1.0 
8  0.0 
9  0.0 
10  1.0 
11  1.0 
12  1.0 
13  1.0 
14  0.0 
15  1.0 
16  0.0 
17  1.0 
18  1.0 
19  1.0 

Toutes les suggestions sur la façon de résoudre ce problème? J'ai essayé tf.reduce_sum autre que tf.reduce_mean. J'ai également essayé un plus grand batch_size.

Répondre

2

Quelques choses à considérer

  • Minibatch pas évalué correctement puisque vous nourrissez des fonctionnalités et LBLS au lieu de epoch_input et epoch_label.
  • Vous ne conditionnez pas vos données de quelque façon que ce soit, elles sont donc complètement hors de portée. C'est à dire. mon code ci-dessous normalise les fonctionnalités dans stddev et mean. Vous pourriez envisager d'utiliser batch_normalization.
  • Vous n'évaluez pas les erreurs à aucun moment. Vous avez besoin d'un ensemble de formation et de tests. Mon code ci-dessous ne contient pas de données, mais il teste en termes d'erreur% plutôt que simplement de perte (ce qui est un proxy faible pour l'erreur, donc vous ne devriez pas l'appeler erreur).
  • Vous initialisez les biais aux normales aléatoires. Vous voulez probablement commencer à zéro.
  • Vous devriez probablement utiliser tf.layers ou une autre API de haut niveau.

Le code ci-dessous permet d'obtenir une erreur d'apprentissage de 95%. Vous voudriez tester avec un jeu de données qui n'est pas utilisé pour l'entraînement pour évaluer l'erreur de test.

#!/usr/bin/env python 
import sys 
import pandas as pd 
import numpy as np 
import tensorflow as tf 


data = pd.read_csv("data.csv") 

features = data.drop(['good buy'], axis = 1) 
lbls = data.drop([ 'area', 'bathrooms', 'price', 'sq_price'], axis = 1) 

features = features[0:20] 
lbls = lbls[0:20] 

mu = np.mean(features, axis=0) 
sigma = (np.std(features, axis=0)) 
features = (features - mu)/sigma 

n_examples = len(lbls) 

# Model 

# Hyper parameters 

epochs = 100 
learning_rate = 0.01 
batch_size = 5 

input_data = tf.placeholder('float', [None, 4]) 
labels = tf.placeholder('float', [None, 1]) 

weights = { 
     'hl1': tf.Variable(tf.random_normal([4, 10])), 
     'hl2': tf.Variable(tf.random_normal([10, 10])), 
     'hl3': tf.Variable(tf.random_normal([10, 4])), 
     'ol': tf.Variable(tf.random_normal([4, 1])) 
     } 

biases = { 
     'hl1': tf.Variable(tf.zeros([10])), 
     'hl2': tf.Variable(tf.zeros([10])), 
     'hl3': tf.Variable(tf.zeros([4])), 
     'ol': tf.Variable(tf.zeros([1])) 
     } 



hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1'])) 
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2'])) 
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3'])) 
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol'])) 

loss = tf.reduce_mean((labels - ol)**2) 
train = tf.train.AdamOptimizer(learning_rate).minimize(loss) 

sess = tf.Session() 
sess.run(tf.global_variables_initializer()) 

iterations = int(n_examples/batch_size) 


def training_accuracy(): 
    foo, = sess.run([ol], feed_dict={input_data: features, labels: lbls}) 
    return (float(np.count_nonzero(np.equal(np.round(foo), lbls)))/float(lbls.shape[0])) 


print("Initial training accuracy %f" % training_accuracy()) 


for epoch_no in range(epochs): 
    ptr = 0 
    for iteration_no in range(iterations): 
    epoch_input = features[ptr:ptr+batch_size] 
    epoch_label = lbls[ptr: ptr+batch_size] 
    ptr = (ptr + batch_size)%len(features) 
    _, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label}) 
    print("Error at epoch ", epoch_no, ": ", err) 
    print(" Training accuracy %f" % training_accuracy()) 

En outre, s'il vous plaît ne pas poser des questions sur l'utilisation de ce genre sur github, ils appartiennent ici sur StackOverflow.

0

Je ne sais pas si c'est le problème pour vous. Mais le gradient des fonctions sigmoïdes peut devenir très faible si son entrée est trop grande, ce qui peut rendre les mises à jour très lentes.

Pour vérifier si c'est le cas, essayez d'initialiser tous vos poids à des valeurs très petites. Vous pouvez ajuster cela en définissant un écart-type pour vos normes aléatoires.

tf.Variable(tf.random_normal([4, 10], stddev=0.1)) 
4

Il y a plusieurs choses qui ne vont pas avec votre code. D'abord, vous dire

epoch_input = features[ptr:ptr+batch_size] 
    epoch_label = lbls[ptr: ptr+batch_size] 
    ptr = ptr + batch_size 
    // _, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls} 
    _, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label} 

il utilise maintenant minibatch.

Débogage gradient:

Vous pouvez toujours vérifier certaines choses en ajoutant

loss = tf.Print(loss, [tf.reduce_sum(weights['hl1'])]) 

Cela affichera les éléments de cette liste [tf.reduce_sum(weights['hl1'])]. Pour étudier plus votre problème, vous pouvez vérifier les gradients au lieu d'utiliser minimiser

grads = tf.reduce_sum(tf.gradients(loss, ol)[0]) 
sess.run(grads, {input_data: features, labels: lbls}) 

Et enfin, la fonction de perte est inappropriée/numérique pour la classification instable. Avec votre version, j'obtiens:

variables 
    Variable:0 
    Variable_1:0 
    Variable_2:0 
    Variable_3:0 
    Variable_4:0 
    Variable_5:0 
    Variable_6:0 
    Variable_7:0 
I tensorflow/core/kernels/logging_ops.cc:79] [-6.2784553] 
----------------------------------------- 
name MatMul_grad 
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 
value [[-0.59977376 -0.30060738 0.55068201 0.15304407 1.39992142 0.07495346 
    -0.87189424 -0.22595075 -0.30094525 -1.2688272 ] 
[-0.44018757 1.08651936 -0.26267499 -0.54463315 0.47019768 0.69873857 
    0.56195319 0.20222363 0.38143152 -0.92212462] 
[-0.39977714 -1.07244122 0.41926911 1.4951371 -2.28751612 0.45676312 
    0.88010246 -0.88077509 -1.25860023 0.56874037] 
[-0.98260719 -1.30747247 -1.4460088 1.0717535 0.08794415 -0.53184992 
    -1.17537284 -0.51598179 -0.15323587 0.91142744]] 
----------------------------------------- 
name MatMul_1_grad 
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 
value [[-0.1170694 0.12174897 0.91696155 0.59427398 0.90844423 0.29010534 
    -0.34039831 -0.62824941 0.37833953 0.27777222] 
[-0.34947088 1.09264851 0.27353975 1.31722498 -0.42032316 -2.74952078 
    -0.66349608 -0.61844724 -0.82141227 1.21691799] 
[ 0.10453336 -1.68631995 0.45700032 -1.58120835 -1.23378754 -0.05648948 
    -1.64761281 -0.57684237 -0.06499017 -0.49623618] 
[ 1.47821534 -0.5329541 0.09209292 1.78089786 1.71149898 0.30547267 
    0.39544162 1.00369155 1.0097307 -0.92320329] 
[ 1.27038908 -2.17246103 -0.31276336 0.8945803 0.30964327 1.15329361 
    0.9711507 -0.36301252 -0.05652813 0.63399518] 
[-0.30909851 -0.41660413 -0.50603527 0.11735299 -0.26837045 0.16547598 
    -0.33875859 -0.46821991 0.25723135 -0.80380815] 
[-0.86255074 -1.11751068 0.01365725 0.66119182 0.48947951 1.6353699 
    -0.794447 0.43182942 -0.97692633 -1.62605619] 
[ 1.38552308 0.83679706 -0.87287223 2.59401655 -0.61855  0.38301265 
    1.09983373 0.49209142 1.03003716 -1.33537853] 
[ 0.74452382 1.57940936 -0.90974236 -1.2211293 -1.1076287 0.92846316 
    -0.46856263 -0.3179535 0.75120807 -0.86442506] 
[ 0.31622764 -0.35965034 -0.02351121 -0.0650174 0.4714573 0.35687482 
    1.43354905 0.39608309 0.42744714 -0.37226421]] 
----------------------------------------- 
name MatMul_2_grad 
gradient [[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.] 
[ 0. 0. 0. 0.]] 
value [[-1.50904143 0.00228321 1.45787132 0.68312413] 
[-0.16627057 1.31303644 1.16326404 0.72901946] 
[ 0.8004092 0.37329885 0.89361066 -0.19850619] 
[ 1.58354807 -1.05612624 0.69891322 -0.32565734] 
[-1.57602286 -0.41256282 0.69086516 -0.54095054] 
[ 1.72376788 -0.53928965 -0.71574098 -0.94974124] 
[-0.62061429 1.51380932 -0.72585452 -0.07695383] 
[ 0.35537818 1.49691582 0.03931179 0.93435526] 
[ 0.20697887 1.39266443 0.73217523 -0.64737892] 
[ 1.00519872 0.90984046 1.68565321 -0.28157935]] 
----------------------------------------- 
name MatMul_3_grad 
gradient [[ 0.] 
[ 0.] 
[ 0.] 
[ 0.]] 
value [[ 0.94082022] 
[ 0.14753926] 
[-0.08765228] 
[ 1.32516992]] 
----------------------------------------- 
name Add_grad 
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
value [ 1.71239722 1.12632215 0.75409448 0.01951236 0.32135537 -1.46281374 
    0.40413955 0.54653352 -0.57894999 0.2746354 ] 
----------------------------------------- 
name Add_1_grad 
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 
value [ 0.74800217 -0.43517059 -0.77706921 1.46858656 1.09103405 -0.46681881 
    0.6126743 -2.27877688 1.48809242 -1.19616997] 
----------------------------------------- 
name Add_2_grad 
gradient [ 0. 0. 0. 0.] 
value [-0.12137324 -0.23238407 0.17909229 -0.75496733] 
----------------------------------------- 
name Add_3_grad 
gradient [ 0.] 
value [-0.91176724] 

Comme vous le voyez, presque tous les gradients sont zéro. Pourquoi?

  • par définition (labels - ol) est dans [0, 1]
  • la valeur élevée au carré est beaucoup plus petit que l'un
  • le dérivé de sigmoïde s(x) est s'(x) = s(x)*(1-s(x)) les gradients sont multipliés par cette valeur qui est encore beaucoup plus faible que un.

Mais après avoir utilisé sparse_softmax_cross_entropy_with_logits qui est numériquement stable et fonctionne dans le journal domaine je reçois

variables 
    Variable:0 
    Variable_1:0 
    Variable_2:0 
    Variable_3:0 
    Variable_4:0 
    Variable_5:0 
    Variable_6:0 
    Variable_7:0 
----------------------------------------- 
name MatMul_grad 
gradient [[ -1.42780918e-05 -1.96137808e-05 -2.44040220e-05 -2.25691911e-05 
    0.00000000e+00 2.95208647e-05 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ -2.54181440e-08 -3.49168410e-08 -4.34445262e-08 -4.01781257e-08 
    0.00000000e+00 5.25536308e-08 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ -2.45539122e-03 -3.37296468e-03 -4.19673882e-03 -3.88120394e-03 
    0.00000000e+00 5.07667707e-03 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ -1.42123906e-06 -1.95235293e-06 -2.42917258e-06 -2.24653377e-06 
    0.00000000e+00 2.93850212e-06 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00]] 
value [[ 0.43133125 -0.40009859 -0.08456381 0.59587955 0.57171088 -0.9824872 
    1.18876612 0.9704771 0.74798232 0.15660612] 
[-1.18380785 0.22617982 -1.15734088 -0.50478351 1.43819618 1.55950046 
    -1.1510663 -0.88835335 0.58378232 0.56860197] 
[ 0.29826403 0.02192715 0.62225986 2.47716165 -0.9223454 1.70159853 
    -1.03968358 -0.26019615 -0.33808291 -0.30873826] 
[ 0.59774327 -1.28855145 -0.43420359 -0.4413566 -0.19220066 0.96984953 
    -0.04922202 0.32994318 -1.05539823 -0.80112725]] 
----------------------------------------- 
name MatMul_1_grad 
gradient [[ 0.00000000e+00 1.15650124e-03 0.00000000e+00 0.00000000e+00 
    6.59449317e-04 -1.09400018e-03 0.00000000e+00 -4.02117817e-04 
    5.44495881e-04 -8.90314346e-04] 
[ 0.00000000e+00 7.24206184e-05 0.00000000e+00 0.00000000e+00 
    4.12950030e-05 -6.85067716e-05 0.00000000e+00 -2.51807924e-05 
    3.40965707e-05 -5.57518724e-05] 
[ 0.00000000e+00 2.38713808e-03 0.00000000e+00 0.00000000e+00 
    1.36117137e-03 -2.25812919e-03 0.00000000e+00 -8.30012548e-04 
    1.12389564e-03 -1.83770037e-03] 
[ 0.00000000e+00 9.52679198e-03 0.00000000e+00 0.00000000e+00 
    5.43227792e-03 -9.01193265e-03 0.00000000e+00 -3.31248436e-03 
    4.48533799e-03 -7.33405072e-03] 
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ 0.00000000e+00 6.51591457e-03 0.00000000e+00 0.00000000e+00 
    3.71544389e-03 -6.16377220e-03 0.00000000e+00 -2.26559630e-03 
    3.06777749e-03 -5.01617463e-03] 
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00]] 
value [[ 0.38902158 -2.14370036 -1.02228141 -0.6492967 1.87193418 -0.06453216 
    1.0013988 -1.26857054 0.59826601 0.45045251] 
[ 0.51465249 -1.09108925 -0.21368918 -0.49310678 -0.87893176 -0.07944249 
    -0.15810326 1.65703297 1.01812947 -0.95572269] 
[-1.76351583 -1.46950841 1.43533802 2.15617752 1.30682683 0.77409673 
    -1.50309181 0.81978178 0.6672287 -0.434971 ] 
[-0.7291944 2.16516733 -1.39850736 -1.06059277 0.40035763 1.23335707 
    -0.03707252 1.88107574 0.09459961 2.11439633] 
[-1.39152992 -1.39924514 -0.35704514 -0.71152836 -2.68857026 0.78129828 
    -1.0077033 -1.26149333 0.4403404 -0.10159389] 
[ 0.37354535 0.12654085 0.7632165 -0.76493222 0.68177891 -0.34254205 
    -1.11582613 2.60665917 1.53196526 -0.867055 ] 
[ 0.62746197 -0.01072595 3.26629376 1.28371656 -0.88725293 3.55530715 
    0.67065352 -0.61927503 1.20604384 -0.87207574] 
[-0.68954837 1.89912283 0.90083456 0.02054735 -0.23425011 0.39949065 
    -0.08969283 -0.75943565 1.0924015 0.28920195] 
[-0.64865923 -1.29299021 -0.39945969 0.02289505 1.46024895 0.94282049 
    -0.99704605 -1.36124468 0.76788425 0.86770487] 
[ 0.63794595 1.68530416 -0.15548207 -0.22658408 -0.45446202 -0.77308726 
    -0.12694608 1.17369819 2.25879693 0.20346723]] 
----------------------------------------- 
name MatMul_2_grad 
gradient [[ 0.   0.   0.   0.  ] 
[-0.02205572 0.   0.00960038 0.  ] 
[ 0.   0.   0.   0.  ] 
[ 0.   0.   0.   0.  ] 
[-0.01932034 0.   0.00840973 0.  ] 
[-0.01617817 0.   0.00704201 0.  ] 
[ 0.   0.   0.   0.  ] 
[-0.05091252 0.   0.02216113 0.  ] 
[-0.0189826 0.   0.00826272 0.  ] 
[-0.01993647 0.   0.00867792 0.  ]] 
value [[-0.18724969 -0.0544498 -0.69153035 0.47535184] 
[-0.75444973 -1.33321464 -0.13066645 1.56889391] 
[-0.6458627 1.17859495 -0.75926393 0.30138403] 
[ 1.0069555 -0.69344127 0.49295315 0.54917085] 
[-0.55954564 -1.13277721 -0.37167427 -0.64837182] 
[ 0.93753678 1.12197697 0.63789612 0.52438796] 
[ 0.77543265 -1.241382 1.78230286 -0.6928125 ] 
[ 0.95383584 -2.00331807 1.63409865 -0.36474878] 
[-0.73891008 2.066082 -0.94303596 -0.42322466] 
[ 0.38519588 0.03278512 -0.3487882 -1.50447905]] 
----------------------------------------- 
name MatMul_3_grad 
gradient [[ 0.08460998] 
[ 0.  ] 
[ 0.16564058] 
[ 0.  ]] 
value [[-0.35376808] 
[-0.07330427] 
[ 0.15398768] 
[-0.06484076]] 
----------------------------------------- 
name Add_grad 
gradient [ -8.22783885e-09 -1.13025616e-08 -1.40629695e-08 -1.30056375e-08 
    0.00000000e+00 1.70115797e-08 0.00000000e+00 0.00000000e+00 
    0.00000000e+00 0.00000000e+00] 
value [-1.00038147 -0.56519473 0.59372097 -1.1646167 -0.16213787 -0.69313556 
    0.62788707 1.03768504 0.57876503 -0.5201084 ] 
----------------------------------------- 
name Add_1_grad 
gradient [ 0.00000000e+00 1.28705375e-08 0.00000000e+00 0.00000000e+00 
    7.33891703e-09 -1.21749730e-08 0.00000000e+00 -4.47511184e-09 
    6.05961770e-09 -9.90818183e-09] 
value [ 0.02854451 -1.46039021 -0.03916361 0.40116394 0.16030532 0.88267213 
-0.46328214 0.18927227 -1.7536788 -0.46590349] 
----------------------------------------- 
name Add_2_grad 
gradient [ -1.84504412e-08 0.00000000e+00 8.03108247e-09 0.00000000e+00] 
value [ 0.94534302 -0.9080081 -1.86719894 -1.31547296] 
----------------------------------------- 
name Add_3_grad 
gradient [ 0.29727879 -0.29727876] 
value [ 0.07999782 -0.75647992] 

Les gradients sont (bien que très faible) cette fois non nul. Le code pour la reproduction qui est

import numpy as np 
import tensorflow as tf 

features = [ 
[2104, 3, 399900, 190.066540], 
[1600, 3, 329900, 206.187500], 
[2400, 3, 369000, 153.750000], 
[1416, 2, 232000, 163.841808], 
[3000, 4, 539900, 179.966667], 
[1985, 4, 299900, 151.083123], 
[1534, 3, 314900, 205.280313], 
[1427, 3, 198999, 139.452698], 
[1380, 3, 212000, 153.623188], 
[1494, 3, 242500, 162.315930], 
[1940, 4, 239999, 123.710825], 
[2000, 3, 347000, 173.500000], 
[1890, 3, 329999, 174.602645], 
[4478, 5, 699900, 156.297454], 
[1268, 3, 259900, 204.968454], 
[2300, 4, 449900, 195.608696], 
[1320, 2, 299900, 227.196970], 
[1236, 3, 199900, 161.731392], 
[2609, 4, 499998, 191.643542], 
[3031, 4, 599000, 197.624546]] 

lbls = [1,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1] 
features = np.array(features, dtype=np.float32) 
lbls = np.array(lbls, dtype=np.int32) 

n_examples = len(lbls) 
epochs = 100 
learning_rate = 0.1 
batch_size = 1 

input_data = tf.placeholder('float', [None, 4]) 
labels = tf.placeholder('int32', [None]) 

weights = { 
      'hl1': tf.Variable(tf.random_normal([4, 10])), 
      'hl2': tf.Variable(tf.random_normal([10, 10])), 
      'hl3': tf.Variable(tf.random_normal([10, 4])), 
      'ol': tf.Variable(tf.random_normal([4, 1])) 
      } 

biases = { 
      'hl1': tf.Variable(tf.random_normal([10])), 
      'hl2': tf.Variable(tf.random_normal([10])), 
      'hl3': tf.Variable(tf.random_normal([4])), 
      # 'ol': tf.Variable(tf.random_normal([1])), 
      'ol': tf.Variable(tf.random_normal([2])) 
      } 

hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1'])) 
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2'])) 
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3'])) 
# ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol'])) 
logits = tf.add(tf.matmul(hl3, weights['ol']), biases['ol']) 

# ol = tf.Print(ol, [tf.reduce_sum(weights['hl1'])]) 
# loss = tf.reduce_mean((labels - ol)**2) 
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) 
# loss = tf.reduce_mean((labels - ol)**2) 
loss = tf.reduce_mean(cost) 
optimizer = tf.train.AdamOptimizer(learning_rate) 

iterations = int(n_examples/batch_size) 

def debug_minimize(optimizer, loss, sess): 
    from tensorflow.python.ops import variables 
    from tensorflow.python.framework import ops 
    # get all varibles 
    var_list = (variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) 
    print 'variables' 
    for v in var_list: 
     print ' ', v.name 
    # get all gradients 
    grads_and_vars = optimizer.compute_gradients(loss) 
    train_op = optimizer.apply_gradients(grads_and_vars) 

    zipped_val = sess.run(grads_and_vars, {input_data: features, labels: lbls}) 

    for rsl, tensor in zip(zipped_val, grads_and_vars): 
     print '-----------------------------------------' 
     print 'name', tensor[0].name.replace('/tuple/control_dependency_1:0', '').replace('gradients/', '') 
     print 'gradient', rsl[0] 
     print 'value', rsl[1] 
    return train_op 

sess = tf.Session() 
sess.run(tf.global_variables_initializer()) 
debug_minimize(optimizer, loss, sess)