2017-06-29 1 views
0

Salut, j'essayais de former un modèle quand je reçois ce message d'erreur: Si je comprends bien, il dit que je suis hors de la VRAM mais j'ai un Asus GTX1080 A8G Gaming qui devrait avoir assez de Vram . Je l'ai essayé avant que tout fonctionne, mais soudainement, ça ne marchera plus. Mon profond net:OOM de la carte graphique CUDA/Tensorflow

# -*- coding: utf-8 -*- 
""" 
Created on Thu Jun 29 11:52:11 2017 
@author: tobia 
""" 
#importing pre_processing libaries 
import numpy as np 
from keras.models import load_model 
import os 
#importing Deep Learning Libaries 
from keras import layers 
from keras.models import Sequential 
from keras.callbacks import TensorBoard 
from keras.layers import Flatten,Dense,Conv2D,MaxPooling2D,Dropout,BatchNormalization,Activation 
def load_data(): 


    key_values = np.empty((0,8),dtype = 'uint8') 
    picture_data = np.empty((0,60,80), dtype = 'uint8') 


    for i in range(len(os.listdir('data/key_values'))): 
     buffer = np.load('data/key_values/values_{0}.npy'.format(i+1)) 
     key_values = np.append(key_values,buffer,axis = 0) 
     buffer_2 = np.load('data/video/video_{}.npy'.format(i+1)) 
     picture_data = np.append(picture_data,buffer_2,axis = 0) 
    picture_data = picture_data.reshape((len(key_values),60,80,1))  
    """ 
    train_data = np.load("data/Processed/train_data.npy") 
    train = train_data[:] 


    picture_data = np.array([i[1] for i in train]).reshape(-1,60,80,1) 
    key_values = np.array([i[0] for i in train]) 
    key_values = np.squeeze(key_values) 
    """ 
    # key_values = np.reshape(key_values,(len(key_values[:]),1,7))  
    # picture_data = np.reshape(picture_data,(len(picture_data[:]),1,60,80,1)) 
    return key_values,picture_data 
class Network: 
    def __init__(self): 

     pass 

    def model_1(self,picture_data,key_values): 
     model = Sequential() 
     model.add(Conv2D(96, 11,input_shape = (60,80,1),activation = "relu")) 
     model.add(MaxPooling2D(pool_size = 3,strides =1)) 
     model.add(BatchNormalization(axis = 1)) 
     model.add(Flatten()) 
     model.add(Dense(units = 8, activation ="softmax")) 
     model.compile(optimizer ='adam', loss = 'categorical_crossentropy',metrics = ['accuracy']) 

     model.summary() 
     return model 
    def start(self,picture_data,key_values): 
     model = self.model_1(picture_data,key_values) 
     tbCallBack= TensorBoard(log_dir = "./logs",histogram_freq=0,write_graph=True,write_images=True) 
     model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack]) 

    def predict_key(self,live_image,model): 
     self.model = model 

     x = self.model.predict(live_image,batch_size =3) 

     return x 

input_k = input("Start new Training press: N or to contiune learning press C") 
if(input_k == 'N'): 
    key_values,picture_data= load_data() 
    test = Network() 
    test.start(picture_data,key_values) 
elif(input_k == 'C'): 

    model = load_model('Models/Modell.h5') 
    visual = TensorBoard(log_dir = "./logs",histogram_freq=0,write_graph=True,write_images=True) 
    key_values,picture_data = load_data() 
    model.fit(picture_data,key_values,batch_size = 1000,epochs=1,validation_split = 0.1,callbacks = [visual]) 
    model.save("Models/Modell.h5") 

Message d'erreur:

File "<ipython-input-1-73951c078cac>", line 1, in <module> 
    runfile('C:/Users/tobia/Desktop/Ai_Star/ai_train.py', wdir='C:/Users/tobia/Desktop/Ai_Star') 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile 
    execfile(filename, namespace) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile 
    exec(compile(f.read(), filename, 'exec'), namespace) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module> 
    test.start(picture_data,key_values) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 66, in start 
    model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack]) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 870, in fit 
    initial_epoch=initial_epoch) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1507, in fit 
    initial_epoch=initial_epoch) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1156, in _fit_loop 
    outs = f(ins_batch) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 2269, in __call__ 
    **self.session_kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 789, in run 
    run_metadata_ptr) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 997, in _run 
    feed_dict_string, options, run_metadata) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 1132, in _do_run 
    target_list, options, run_metadata) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 1152, in _do_call 
    raise type(e)(node_def, op, message) 
ResourceExhaustedError: OOM when allocating tensor with shape[313344,8] 
[[Node: gradients/dense_1/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, _class=["loc:@dense_1/MatMul"], transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](flatten_1/Reshape, gradients/dense_1/Softmax_grad/mul_1)]] 
Caused by op 'gradients/dense_1/MatMul_grad/MatMul_1', defined at: 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module> 
    main() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 227, in main 
    kernel.start() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelapp.py", line 477, in start 
    ioloop.IOLoop.instance().start() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start 
    super(ZMQIOLoop, self).start() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\ioloop.py", line 888, in start 
    handler_func(fd_obj, events) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper 
    return fn(*args, **kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events 
    self._handle_recv() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv 
    self._run_callback(callback, msg) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback 
    callback(*args, **kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper 
    return fn(*args, **kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher 
    return self.dispatch_shell(stream, msg) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell 
    handler(stream, idents, msg) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request 
    user_expressions, allow_stdin) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute 
    res = shell.run_cell(code, store_history=store_history, silent=silent) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell 
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell 
    interactivity=interactivity, compiler=compiler, result=result) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2808, in run_ast_nodes 
    if self.run_code(code, result): 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code 
    exec(code_obj, self.user_global_ns, self.user_ns) 
    File "<ipython-input-1-73951c078cac>", line 1, in <module> 
    runfile('C:/Users/tobia/Desktop/Ai_Star/ai_train.py', wdir='C:/Users/tobia/Desktop/Ai_Star') 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile 
    execfile(filename, namespace) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile 
    exec(compile(f.read(), filename, 'exec'), namespace) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module> 
    test.start(picture_data,key_values) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 66, in start 
    model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack]) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 870, in fit 
    initial_epoch=initial_epoch) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1490, in fit 
    self._make_train_function() 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1014, in _make_train_function 
    self.total_loss) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\optimizers.py", line 405, in get_updates 
    grads = self.get_gradients(loss, params) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\optimizers.py", line 71, in get_gradients 
    grads = K.gradients(loss, params) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 2307, in gradients 
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 540, in gradients 
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads)) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 346, in _MaybeCompile 
    return grad_fn() # Exit early 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 540, in <lambda> 
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads)) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_grad.py", line 825, in _MatMulGrad 
    grad_b = math_ops.matmul(a, grad, transpose_a=True) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1816, in matmul 
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1217, in _mat_mul 
    transpose_b=transpose_b, name=name) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op 
    op_def=op_def) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op 
    original_op=self._default_original_op, op_def=op_def) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__ 
    self._traceback = _extract_stack() 
...which was originally created as op 'dense_1/MatMul', defined at: 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module> 
    main() 
[elided 20 identical lines from previous traceback] 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module> 
    test.start(picture_data,key_values) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 64, in start 
    model = self.model_1(picture_data,key_values) 
    File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 57, in model_1 
    model.add(Dense(units = 8, activation ="softmax")) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 476, in add 
    output_tensor = layer(self.outputs[0]) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\topology.py", line 596, in __call__ 
    output = self.call(inputs, **kwargs) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\layers\core.py", line 843, in call 
    output = K.dot(inputs, self.kernel) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 976, in dot 
    out = tf.matmul(x, y) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1816, in matmul 
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1217, in _mat_mul 
    transpose_b=transpose_b, name=name) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op 
    op_def=op_def) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op 
    original_op=self._default_original_op, op_def=op_def) 
    File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__ 
    self._traceback = _extract_stack() 
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[313344,8] 
[[Node: gradients/dense_1/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, _class=["loc:@dense_1/MatMul"], transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](flatten_1/Reshape, gradients/dense_1/Softmax_grad/mul_1)]] 
+2

à ce sujet: « Je l'ai essayé avant tout fonctionnait mais soudainement de nulle part, il ne marchera pas plus. » Peut-être que votre GPU a des processus zombies qui maintiennent la mémoire allouée. Vous pouvez généralement observer ceci avec 'nvidia-smi'. Une solution possible dans ce cas serait de redémarrer. –

+0

Vous pouvez jeter un oeil à https://stackoverflow.com/documentation/tensorflow/10621/tensorflow-gpu-setup/31879/control-the-gpu-memory-allocation#t=201706291458179435665 pour gérer la mémoire CUDA. – npf

Répondre

0

Essayez à nouveau avec le redémarrage de python. La mémoire GPU ne libère pas jusqu'à ce que vous mentionnez dans le code. Un certain temps lors de l'exécution du programme d'apprentissage en profondeur dans le même shell python à nouveau sans mentionner combien de fraction de mémoire besoin d'utiliser cette erreur OOM se produisent. Se reporter ce poste

How to prevent tensorflow from allocating the totality of a GPU memory?