Je souhaite créer un contexte entre un rang MPI sur un noeud et un point gpu et le faire pour plusieurs nœuds.Affectation des rangs MPI aux unités centrales
J'ai trouvé un code here, qui est en C. Je travaille en Fortran et donc j'essaie de convertir ce code en Fortran.
Le code C est donné ci-dessous:
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
int stringCmp(const void *a, const void *b)
{
return strcmp(a,b);
}
void assignDeviceToProcess(int *p2myrank)
{
char host_name[MPI_MAX_PROCESSOR_NAME];
char (*host_names)[MPI_MAX_PROCESSOR_NAME];
MPI_Comm nodeComm;
int i, n, namelen, color, rank, nprocs, myrank,gpu_per_node;
size_t bytes;
int dev, err1;
struct cudaDeviceProp deviceProp;
/* Check if the device has been alreasy assigned */
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Get_processor_name(host_name,&namelen);
bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);
strcpy(host_names[rank], host_name);
for (n=0; n<nprocs; n++)
{
MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
}
qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
color = 0;
for (n=0; n<nprocs; n++)
{
if(n>0&&strcmp(host_names[n-1], host_names[n])) color++;
if(strcmp(host_name, host_names[n]) == 0) break;
}
MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
MPI_Comm_rank(nodeComm, &myrank);
MPI_Comm_size(nodeComm, &gpu_per_node);
p2myrank[0]=myrank;
return;
/* Find out how many DP capable GPUs are in the system and their device number */
int deviceCount,slot=0;
int *devloc;
cudaGetDeviceCount(&deviceCount);
devloc=(int *)malloc(deviceCount*sizeof(int));
devloc[0]=999;
for (dev = 0; dev < deviceCount; ++dev)
{
cudaGetDeviceProperties(&deviceProp, dev);
if(deviceProp.major>1)
{
devloc[slot]=dev;
slot++;
};
}
//printf ("Assigning device %d to process on node %s rank %d \n",devloc[myrank], host_name, rank);
/* Assign device to MPI process and probe device properties */
cudaSetDevice(devloc[myrank]);
cudaGetDevice(&dev);
cudaGetDeviceProperties(&deviceProp, dev);
size_t free_bytes, total_bytes;
cudaMemGetInfo(&free_bytes, &total_bytes);
printf("Host: %s Rank=%d Device= %d (%s) ECC=%s Free = %lu, Total = %lu\n",host_name,rank, devloc[myrank],deviceProp.name, deviceProp.ECCEnabled ? "Enabled " : "Disabled", (unsigned long)free_bytes, (unsigned long)total_bytes);
}
et mon code Fortran est:
subroutine MPI_to_gpu_assign(comm,nprocs)
use cudafor
use sort
implicit none
include "mpif.h"
integer:: max_len, rank, code, comm,i,size, ierr,totaldev,n, namelen, color, nprocs
integer:: nodeComm,first_time ,myrank, proc_len
character:: host_name(MPI_MAX_PROCESSOR_NAME)
character:: host_names(nprocs*MPI_MAX_PROCESSOR_NAME)
proc_len = MPI_MAX_PROCESSOR_NAME
!Check if the device has been assigned already
if(first_time) then
first_time=0
call MPI_Comm_rank(comm, rank,code)
call MPI_Get_processor_name(host_name,namelen,code)
host_names((rank-1)*proc_len+1:rank*proc_len) = host_name
do n=1,nprocs
if (n.gt.1) then
call MPI_Bcast(host_names((n-1)*proc_len+1:n*proc_len),MPI_MAX_PROCESSOR_NAME, MPI_CHARACTER, n-1, comm,code)
else
call MPI_Bcast(host_names(1:proc_len),MPI_MAX_PROCESSOR_NAME, MPI_CHARACTER, n-1, comm,code)
end if
end do
call a_sort(host_names,my_compare)
color = 0
DO n = 1,nprocs
if((n.gt.1)) then
if((my_compare(host_names(((n-2)*proc_len+1):(n-1)*proc_len), host_names(((n-1)*proc_len+1):n*proc_len))) == 1) then !!line 1!!
color = color+1
end if
if(my_compare(host_name, host_names((n-1)*proc_len+1:n*proc_len)) == 1) then !!line 2!!
exit
end if
else
if(my_compare(host_name, host_names(1:proc_len)) == 1) then !!line 3!!
exit
end if
end if
END DO
call MPI_Comm_split(comm, color, 0, nodeComm,code)
CALL MPI_Comm_rank(nodeComm, myrank,code)
write(*,*) 'Assigning device', myrank, 'to process on node', host_name,' on rank', rank,''
! Assign device to MPI process
ierr = cudaSetDevice(myrank)
if (ierr.ne.0) then
print *, cudaGetErrorString(ierr)
stop
endif
end if
end subroutine MPI_to_gpu_assign
avec le genre et d'autres fonctions nécessaires étant définies comme (que je me sers de here) :
module sort
implicit none
contains
subroutine To_lower(str)
character(len=*), intent(in out) :: str
integer :: i
do i = 1, len(str)
select case(str(i:i))
case("A":"Z")
str(i:i) = achar(iachar(str(i:i))+32)
end select
end do
end subroutine To_Lower
integer function my_compare(a, b)
character(*), intent(in) :: a, b
character(len=max(len(a),len(b))) :: a1, b1
a1 = a
b1 = b
call to_lower(b1)
call to_lower(a1)
if (len(trim(a)) > len(trim(b))) then
my_compare = -1
elseif (len(trim(a)) == len(trim(b))) then
if (a1 > b1) then
my_compare = 1
else
my_compare = -1
end if
else
my_compare = 1
end if
end function my_compare
subroutine a_sort(a, cc)
character(len=*), dimension(:), intent(inout) :: a
interface
integer function cc(a, b)
character(len=*), intent(in) :: a, b
end function cc
end interface
integer :: i, j, increment
character(len=max(len(a), 10)) :: temp
increment = size(a)/2
do while (increment > 0)
do i = increment+1, size(a)
j = i
temp = a(i)
do while (j >= increment+1 .and. cc(a(j-increment), temp) > 0)
a(j) = a(j-increment)
j = j - increment
end do
a(j) = temp
end do
if (increment == 2) then
increment = 1
else
increment = increment * 5/11
end if
end do
end subroutine a_sort
end module Sort
Mais cela ne semble pas fonctionner et me donne l'erreur suivante:
PGF90-S-0446-Argument number 1 to my_compare: rank mismatch (line 1)
PGF90-S-0446-Argument number 2 to my_compare: rank mismatch (line 1)
PGF90-S-0446-Argument number 1 to my_compare: rank mismatch (line 2)
PGF90-S-0446-Argument number 2 to my_compare: rank mismatch (line 2)
PGF90-S-0446-Argument number 1 to my_compare: rank mismatch (line 3)
PGF90-S-0446-Argument number 2 to my_compare: rank mismatch (line 3)
Quelqu'un peut-il m'aider pourquoi cette erreur se produit?
CUDA n'est pas C! – Olaf
@Olaf dans ce cas, l'étiquetage était parfaitement approprié – talonmies
@talonmies: Le code ne peut pas être exécuté par un compilateur C! CUDA est un dérivé C++, pas un dérivé C. Le simple fait d'utiliser des fonctions C du code C++ ne justifie pas la balise C. – Olaf