J'essaie de paralléliser un programme appelé streamcluster. Plus spécifiquement, la fonction nommée pgain passe le plus clair de son temps en fonction de l'outil Scalasca que j'ai utilisé, donc c'est la fonction que je devrais paralléliser. Ici vous pouvez voir la fonction et mon effort pour la paralléliser. Le problème est que la seule chose que j'ai accomplie, c'est que le programme passe encore plus de temps à être exécuté.paralléliser streamcluster avec openmp
La fonction PGain originale streamcluster:
double pgain (long x, Points *points, double z, long int *numcenters)
{
int i;
int number_of_centers_to_close = 0;
static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;
int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof (double);
if (stride % cl != 0) {
stride = cl * (stride/cl + 1);
}
int K = stride - 2 ; // K==*numcenters
//my own cost of opening x
double cost_of_opening_x = 0;
work_mem = (double*) malloc (2 * stride * sizeof (double));
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;
/*
* For each center, we have a *lower* field that indicates
* how much we will save by closing the center.
*/
int count = 0;
for (int i = 0; i < points->num; i++) {
if (is_center[i]) {
center_table[i] = count++;
}
}
work_mem[0] = 0;
//now we finish building the table. clear the working memory.
memset (switch_membership, 0, points->num * sizeof (bool));
memset (work_mem, 0, stride*sizeof (double));
memset (work_mem+stride,0,stride*sizeof (double));
//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
for (i = 0; i < points->num; i++) {
float x_cost = dist (points->p[i], points->p[x], points->dim) * points->p[i].weight;
float current_cost = points->p[i].cost;
if (x_cost < current_cost) {
// point i would save cost just by switching to x
// (note that i cannot be a median,
// or else dist(p[i], p[x]) would be 0)
switch_membership[i] = 1;
cost_of_opening_x += x_cost - current_cost;
} else {
// cost of assigning i to x is at least current assignment cost of i
// consider the savings that i's **current** median would realize
// if we reassigned that median and all its members to x;
// note we've already accounted for the fact that the median
// would save z by closing; now we have to subtract from the savings
// the extra cost of reassigning that median and its members
int assign = points->p[i].assign;
lower[center_table[assign]] += current_cost - x_cost;
}
}
// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for (int i = 0; i < points->num; i++) {
if (is_center[i]) {
double low = z + work_mem[center_table[i]];
gl_lower[center_table[i]] = low;
if (low > 0) {
// i is a median, and
// if we were to open x (which we still may not) we'd close i
// note, we'll ignore the following quantity unless we do open x
++number_of_centers_to_close;
cost_of_opening_x -= low;
}
}
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;
gl_number_of_centers_to_close = (int) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];
// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing
if (gl_cost_of_opening_x < 0) {
// we'd save money by opening x; we'll do it
for (int i = 0; i < points->num; i++) {
bool close_center = gl_lower[center_table[points->p[i].assign]] > 0 ;
if (switch_membership[i] || close_center) {
// Either i's median (which may be i itself) is closing,
// or i is closer to x than to its current median
points->p[i].cost = points->p[i].weight * dist (points->p[i], points->p[x], points->dim);
points->p[i].assign = x;
}
}
for (int i = 0; i < points->num; i++) {
if (is_center[i] && gl_lower[center_table[i]] > 0) {
is_center[i] = false;
}
}
if (x >= 0 && x < points->num) {
is_center[x] = true;
}
*numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
gl_cost_of_opening_x = 0; // the value we'll return
}
free (work_mem);
return -gl_cost_of_opening_x;
}
Et voici ce que je l'ai fait à paralléliser:
double pgain (long x, Points *points, double z, long int *numcenters)
{
int i;
int number_of_centers_to_close = 0;
static double *work_mem;
static double gl_cost_of_opening_x;
static int gl_number_of_centers_to_close;
int stride = *numcenters + 2;
//make stride a multiple of CACHE_LINE
int cl = CACHE_LINE/sizeof (double);
if (stride % cl != 0) {
stride = cl * (stride/cl + 1);
}
int K = stride - 2 ; // K==*numcenters
//my own cost of opening x
double cost_of_opening_x = 0;
work_mem = (double*) malloc (2 * stride * sizeof (double));
gl_cost_of_opening_x = 0;
gl_number_of_centers_to_close = 0;
/*
* For each center, we have a *lower* field that indicates
* how much we will save by closing the center.
*/
int count = 0;
for (int i = 0; i < points->num; i++) {
if (is_center[i]) {
center_table[i] = count++;
}
}
work_mem[0] = 0;
//now we finish building the table. clear the working memory.
memset (switch_membership, 0, points->num * sizeof (bool));
memset (work_mem, 0, stride*sizeof (double));
memset (work_mem+stride,0,stride*sizeof (double));
//my *lower* fields
double* lower = &work_mem[0];
//global *lower* fields
double* gl_lower = &work_mem[stride];
float x_cost=0.0;
float current_cost=0.0;
#pragma omp parallel for private(current_cost,x_cost)
shared(cost_of_opening_x)
for (i = 0; i < points->num; i++) {
x_cost = dist (points->p[i], points->p[x], points->dim) * points->p[i].weight;
current_cost = points->p[i].cost;
if (x_cost < current_cost) {
// point i would save cost just by switching to // x
// (note that i cannot be a median,
// or else dist(p[i], p[x]) would be 0)
switch_membership[i] = 1;
cost_of_opening_x += x_cost - current_cost;
{
#pragma omp flush(cost_of_opening_x)
}
} else {
// cost of assigning i to x is at least current assignment cost of i
// consider the savings that i's **current** median would realize
// if we reassigned that median and all its members to x;
// note we've already accounted for the fact that the median
// would save z by closing; now we have to subtract from the savings
// the extra cost of reassigning that median and its members
int assign = points->p[i].assign;
lower[center_table[assign]] += current_cost - x_cost;
{
#pragma omp flush(lower)
}
}
#pragma omp barrier
{
#pragma omp flush(lower,cost_of_opening_x)
}
}
// at this time, we can calculate the cost of opening a center
// at x; if it is negative, we'll go through with opening it
for (int i = 0; i < points->num; i++) {
if (is_center[i]) {
double low = z + work_mem[center_table[i]];
gl_lower[center_table[i]] = low;
if (low > 0) {
// i is a median, and
// if we were to open x (which we still may not) we'd close i
// note, we'll ignore the following quantity unless we do open x
++number_of_centers_to_close;
cost_of_opening_x -= low;
}
}
}
//use the rest of working memory to store the following
work_mem[K] = number_of_centers_to_close;
work_mem[K+1] = cost_of_opening_x;
gl_number_of_centers_to_close = (int) work_mem[K];
gl_cost_of_opening_x = z + work_mem[K+1];
// Now, check whether opening x would save cost; if so, do it, and
// otherwise do nothing
if (gl_cost_of_opening_x < 0) {
// we'd save money by opening x; we'll do it
#pragma omp parallel for
for (int i = 0; i < points->num; i++) {
bool close_center = gl_lower[center_table[points->p[i].assign]] > 0
;
if (switch_membership[i] || close_center) {
// Either i's median (which may be i itself) is closing,
// or i is closer to x than to its current median
points->p[i].cost = points->p[i].weight * dist (points->p[i],
points->p[x], points->dim);
points->p[i].assign = x;
}
}
for (int i = 0; i < points->num; i++) {
if (is_center[i] && gl_lower[center_table[i]] > 0) {
is_center[i] = false;
}
}
if (x >= 0 && x < points->num) {
is_center[x] = true;
}
*numcenters = *numcenters + 1 - gl_number_of_centers_to_close;
} else {
gl_cost_of_opening_x = 0; // the value we'll return
}
free (work_mem);
return -gl_cost_of_opening_x;
}
Voyez-vous une amélioration ou un changement qui rendrait plus rapide? Merci d'avance.