Actual source code: vecstash.c


  2: #include <petsc/private/vecimpl.h>

  4: #define DEFAULT_STASH_SIZE   100

  6: /*
  7:   VecStashCreate_Private - Creates a stash,currently used for all the parallel
  8:   matrix implementations. The stash is where elements of a matrix destined
  9:   to be stored on other processors are kept until matrix assembly is done.

 11:   This is a simple minded stash. Simply adds entries to end of stash.

 13:   Input Parameters:
 14:   comm - communicator, required for scatters.
 15:   bs   - stash block size. used when stashing blocks of values

 17:   Output Parameters:
 18:   stash    - the newly created stash
 19: */
 20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
 21: {
 22:   PetscInt       max,*opt,nopt;
 23:   PetscBool      flg;

 25:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 26:   stash->comm = comm;
 27:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 28:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 29:   MPI_Comm_size(stash->comm,&stash->size);
 30:   MPI_Comm_rank(stash->comm,&stash->rank);

 32:   nopt = stash->size;
 33:   PetscMalloc1(nopt,&opt);
 34:   PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);
 35:   if (flg) {
 36:     if (nopt == 1)                max = opt[0];
 37:     else if (nopt == stash->size) max = opt[stash->rank];
 38:     else if (stash->rank < nopt)  max = opt[stash->rank];
 39:     else                          max = 0; /* use default */
 40:     stash->umax = max;
 41:   } else {
 42:     stash->umax = 0;
 43:   }
 44:   PetscFree(opt);

 46:   if (bs <= 0) bs = 1;

 48:   stash->bs       = bs;
 49:   stash->nmax     = 0;
 50:   stash->oldnmax  = 0;
 51:   stash->n        = 0;
 52:   stash->reallocs = -1;
 53:   stash->idx      = NULL;
 54:   stash->array    = NULL;

 56:   stash->send_waits   = NULL;
 57:   stash->recv_waits   = NULL;
 58:   stash->send_status  = NULL;
 59:   stash->nsends       = 0;
 60:   stash->nrecvs       = 0;
 61:   stash->svalues      = NULL;
 62:   stash->rvalues      = NULL;
 63:   stash->rmax         = 0;
 64:   stash->nprocs       = NULL;
 65:   stash->nprocessed   = 0;
 66:   stash->donotstash   = PETSC_FALSE;
 67:   stash->ignorenegidx = PETSC_FALSE;
 68:   return 0;
 69: }

 71: /*
 72:    VecStashDestroy_Private - Destroy the stash
 73: */
 74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 75: {
 76:   PetscFree2(stash->array,stash->idx);
 77:   PetscFree(stash->bowners);
 78:   return 0;
 79: }

 81: /*
 82:    VecStashScatterEnd_Private - This is called as the fial stage of
 83:    scatter. The final stages of message passing is done here, and
 84:    all the memory used for message passing is cleanedu up. This
 85:    routine also resets the stash, and deallocates the memory used
 86:    for the stash. It also keeps track of the current memory usage
 87:    so that the same value can be used the next time through.
 88: */
 89: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
 90: {
 91:   PetscInt       nsends=stash->nsends,oldnmax;
 92:   MPI_Status     *send_status;

 94:   /* wait on sends */
 95:   if (nsends) {
 96:     PetscMalloc1(2*nsends,&send_status);
 97:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
 98:     PetscFree(send_status);
 99:   }

101:   /* Now update nmaxold to be app 10% more than max n, this way the
102:      wastage of space is reduced the next time this stash is used.
103:      Also update the oldmax, only if it increases */
104:   if (stash->n) {
105:     oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
106:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
107:   }

109:   stash->nmax       = 0;
110:   stash->n          = 0;
111:   stash->reallocs   = -1;
112:   stash->rmax       = 0;
113:   stash->nprocessed = 0;

115:   PetscFree2(stash->array,stash->idx);
116:   stash->array = NULL;
117:   stash->idx   = NULL;
118:   PetscFree(stash->send_waits);
119:   PetscFree(stash->recv_waits);
120:   PetscFree2(stash->svalues,stash->sindices);
121:   PetscFree2(stash->rvalues,stash->rindices);
122:   PetscFree(stash->nprocs);
123:   return 0;
124: }

126: /*
127:    VecStashGetInfo_Private - Gets the relevant statistics of the stash

129:    Input Parameters:
130:    stash    - the stash
131:    nstash   - the size of the stash
132:    reallocs - the number of additional mallocs incurred.

134: */
135: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
136: {
137:   if (nstash) *nstash = stash->n*stash->bs;
138:   if (reallocs) {
139:     if (stash->reallocs < 0) *reallocs = 0;
140:     else                     *reallocs = stash->reallocs;
141:   }
142:   return 0;
143: }

145: /*
146:    VecStashSetInitialSize_Private - Sets the initial size of the stash

148:    Input Parameters:
149:    stash  - the stash
150:    max    - the value that is used as the max size of the stash.
151:             this value is used while allocating memory. It specifies
152:             the number of vals stored, even with the block-stash
153: */
154: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
155: {
156:   stash->umax = max;
157:   return 0;
158: }

160: /* VecStashExpand_Private - Expand the stash. This function is called
161:    when the space in the stash is not sufficient to add the new values
162:    being inserted into the stash.

164:    Input Parameters:
165:    stash - the stash
166:    incr  - the minimum increase requested

168:    Notes:
169:    This routine doubles the currently used memory.
170: */
171: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
172: {
173:   PetscInt       *n_idx,newnmax,bs=stash->bs;
174:   PetscScalar    *n_array;

176:   /* allocate a larger stash. */
177:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
178:     if (stash->umax)                  newnmax = stash->umax/bs;
179:     else                              newnmax = DEFAULT_STASH_SIZE/bs;
180:   } else if (!stash->nmax) { /* resuing stash */
181:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
182:     else                              newnmax = stash->oldnmax/bs;
183:   } else                              newnmax = stash->nmax*2;

185:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

187:   PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);
188:   PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
189:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
190:   PetscFree2(stash->array,stash->idx);

192:   stash->array = n_array;
193:   stash->idx   = n_idx;
194:   stash->nmax  = newnmax;
195:   stash->reallocs++;
196:   return 0;
197: }
198: /*
199:   VecStashScatterBegin_Private - Initiates the transfer of values to the
200:   correct owners. This function goes through the stash, and check the
201:   owners of each stashed value, and sends the values off to the owner
202:   processors.

204:   Input Parameters:
205:   stash  - the stash
206:   owners - an array of size 'no-of-procs' which gives the ownership range
207:            for each node.

209:   Notes:
210:     The 'owners' array in the cased of the blocked-stash has the
211:   ranges specified blocked global indices, and for the regular stash in
212:   the proper global indices.
213: */
214: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
215: {
216:   PetscMPIInt    size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
217:   PetscInt       *owner,*start,*nprocs,nsends,nreceives;
218:   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
219:   PetscScalar    *rvalues,*svalues;
220:   MPI_Comm       comm = stash->comm;
221:   MPI_Request    *send_waits,*recv_waits;

223:   /*  first count number of contributors to each processor */
224:   PetscCalloc1(2*size,&nprocs);
225:   PetscMalloc1(stash->n,&owner);

227:   j       = 0;
228:   lastidx = -1;
229:   for (i=0; i<stash->n; i++) {
230:     /* if indices are NOT locally sorted, need to start search at the beginning */
231:     if (lastidx > (idx = stash->idx[i])) j = 0;
232:     lastidx = idx;
233:     for (; j<size; j++) {
234:       if (idx >= owners[j] && idx < owners[j+1]) {
235:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
236:       }
237:     }
238:   }
239:   nsends = 0;
240:   for (i=0; i<size; i++) nsends += nprocs[2*i+1];

242:   /* inform other processors of number of messages and max length*/
243:   PetscMaxSum(comm,nprocs,&nmax,&nreceives);

245:   /* post receives:
246:      since we don't know how long each individual message is we
247:      allocate the largest needed buffer for each receive. Potentially
248:      this is a lot of wasted space.
249:   */
250:   PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);
251:   PetscMalloc1(2*nreceives,&recv_waits);
252:   for (i=0,count=0; i<nreceives; i++) {
253:     MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
254:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
255:   }

257:   /* do sends:
258:       1) starts[i] gives the starting index in svalues for stuff going to
259:          the ith processor
260:   */
261:   PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);
262:   PetscMalloc1(2*nsends,&send_waits);
263:   PetscMalloc1(size,&start);
264:   /* use 2 sends the first with all_v, the next with all_i */
265:   start[0] = 0;
266:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

268:   for (i=0; i<stash->n; i++) {
269:     j = owner[i];
270:     if (bs == 1) svalues[start[j]] = stash->array[i];
271:     else {
272:       PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
273:     }
274:     sindices[start[j]] = stash->idx[i];
275:     start[j]++;
276:   }
277:   start[0] = 0;
278:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

280:   for (i=0,count=0; i<size; i++) {
281:     if (nprocs[2*i+1]) {
282:       MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
283:       MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
284:     }
285:   }
286:   PetscFree(owner);
287:   PetscFree(start);
288:   /* This memory is reused in scatter end  for a different purpose*/
289:   for (i=0; i<2*size; i++) nprocs[i] = -1;

291:   stash->nprocs     = nprocs;
292:   stash->svalues    = svalues;
293:   stash->sindices   = sindices;
294:   stash->rvalues    = rvalues;
295:   stash->rindices   = rindices;
296:   stash->nsends     = nsends;
297:   stash->nrecvs     = nreceives;
298:   stash->send_waits = send_waits;
299:   stash->recv_waits = recv_waits;
300:   stash->rmax       = nmax;
301:   return 0;
302: }

304: /*
305:    VecStashScatterGetMesg_Private - This function waits on the receives posted
306:    in the function VecStashScatterBegin_Private() and returns one message at
307:    a time to the calling function. If no messages are left, it indicates this
308:    by setting flg = 0, else it sets flg = 1.

310:    Input Parameters:
311:    stash - the stash

313:    Output Parameters:
314:    nvals - the number of entries in the current message.
315:    rows  - an array of row indices (or blocked indices) corresponding to the values
316:    cols  - an array of columnindices (or blocked indices) corresponding to the values
317:    vals  - the values
318:    flg   - 0 indicates no more message left, and the current call has no values associated.
319:            1 indicates that the current call successfully received a message, and the
320:              other output parameters nvals,rows,cols,vals are set appropriately.
321: */
322: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
323: {
324:   PetscMPIInt    i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
325:   PetscInt       *flg_v;
326:   PetscInt       i1,i2,bs=stash->bs;
327:   MPI_Status     recv_status;
328:   PetscBool      match_found = PETSC_FALSE;

330:   *flg = 0; /* When a message is discovered this is reset to 1 */
331:   /* Return if no more messages to process */
332:   if (stash->nprocessed == stash->nrecvs) return 0;

334:   flg_v = stash->nprocs;
335:   /* If a matching pair of receives are found, process them, and return the data to
336:      the calling function. Until then keep receiving messages */
337:   while (!match_found) {
338:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
339:     /* Now pack the received message into a structure which is useable by others */
340:     if (i % 2) {
341:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
342:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
343:     } else {
344:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
345:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
346:       *nvals = *nvals/bs;
347:     }

349:     /* Check if we have both the messages from this proc */
350:     i1 = flg_v[2*recv_status.MPI_SOURCE];
351:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
352:     if (i1 != -1 && i2 != -1) {
353:       *rows = stash->rindices + i2*stash->rmax;
354:       *vals = stash->rvalues + i1*bs*stash->rmax;
355:       *flg  = 1;
356:       stash->nprocessed++;
357:       match_found = PETSC_TRUE;
358:     }
359:   }
360:   return 0;
361: }

363: /*
364:  * Sort the stash, removing duplicates (combining as appropriate).
365:  */
366: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
367: {
368:   PetscInt i,j,bs = stash->bs;

370:   if (!stash->n) return 0;
371:   if (bs == 1) {
372:     PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);
373:     for (i=1,j=0; i<stash->n; i++) {
374:       if (stash->idx[i] == stash->idx[j]) {
375:         switch (stash->insertmode) {
376:         case ADD_VALUES:
377:           stash->array[j] += stash->array[i];
378:           break;
379:         case INSERT_VALUES:
380:           stash->array[j] = stash->array[i];
381:           break;
382:         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
383:         }
384:       } else {
385:         j++;
386:         stash->idx[j]   = stash->idx[i];
387:         stash->array[j] = stash->array[i];
388:       }
389:     }
390:     stash->n = j + 1;
391:   } else {                      /* block stash */
392:     PetscInt *perm = NULL;
393:     PetscScalar *arr;
394:     PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);
395:     for (i=0; i<stash->n; i++) perm[i] = i;
396:     PetscSortIntWithArray(stash->n,stash->idx,perm);

398:     /* Out-of-place copy of arr */
399:     PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));
400:     for (i=1,j=0; i<stash->n; i++) {
401:       PetscInt k;
402:       if (stash->idx[i] == stash->idx[j]) {
403:         switch (stash->insertmode) {
404:         case ADD_VALUES:
405:           for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
406:           break;
407:         case INSERT_VALUES:
408:           for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
409:           break;
410:         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
411:         }
412:       } else {
413:         j++;
414:         stash->idx[j] = stash->idx[i];
415:         for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
416:       }
417:     }
418:     stash->n = j + 1;
419:     PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));
420:     PetscFree2(perm,arr);
421:   }
422:   return 0;
423: }

425: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
426: {
427:   PetscInt       i,bs = stash->bs;
428:   PetscMPIInt    r;
429:   PetscSegBuffer seg;

432:   PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);
433:   *nowners = 0;
434:   for (i=0,r=-1; i<stash->n; i++) {
435:     if (stash->idx[i]*bs >= map->range[r+1]) {
436:       PetscMPIInt *rank;
437:       PetscSegBufferGet(seg,1,&rank);
438:       PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);
439:       *rank = r;
440:       (*nowners)++;
441:     }
442:   }
443:   PetscSegBufferExtractAlloc(seg,owners);
444:   PetscSegBufferDestroy(&seg);
445:   return 0;
446: }