1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.143 1996/05/07 20:43:35 balay Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 443 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 445 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 if (a->M != a->N) 508 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 509 return MatGetDiagonal(a->A,v); 510 } 511 512 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 513 { 514 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 515 int ierr; 516 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 517 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 518 return 0; 519 } 520 521 static int MatDestroy_MPIAIJ(PetscObject obj) 522 { 523 Mat mat = (Mat) obj; 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 525 int ierr; 526 #if defined(PETSC_LOG) 527 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 528 #endif 529 PetscFree(aij->rowners); 530 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 531 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 532 if (aij->colmap) PetscFree(aij->colmap); 533 if (aij->garray) PetscFree(aij->garray); 534 if (aij->lvec) VecDestroy(aij->lvec); 535 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 536 if (aij->rowvalues) PetscFree(aij->rowvalues); 537 PetscFree(aij); 538 PLogObjectDestroy(mat); 539 PetscHeaderDestroy(mat); 540 return 0; 541 } 542 #include "draw.h" 543 #include "pinclude/pviewer.h" 544 545 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 546 { 547 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 548 int ierr; 549 550 if (aij->size == 1) { 551 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 552 } 553 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 554 return 0; 555 } 556 557 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 560 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 561 int ierr, format,shift = C->indexshift,rank; 562 FILE *fd; 563 ViewerType vtype; 564 565 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 566 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 567 ierr = ViewerGetFormat(viewer,&format); 568 if (format == ASCII_FORMAT_INFO_DETAILED) { 569 int nz, nzalloc, mem, flg; 570 MPI_Comm_rank(mat->comm,&rank); 571 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 572 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 573 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 574 PetscSequentialPhaseBegin(mat->comm,1); 575 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 576 rank,aij->m,nz,nzalloc,mem); 577 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 578 rank,aij->m,nz,nzalloc,mem); 579 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 580 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 581 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 582 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 583 fflush(fd); 584 PetscSequentialPhaseEnd(mat->comm,1); 585 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 586 return 0; 587 } 588 else if (format == ASCII_FORMAT_INFO) { 589 return 0; 590 } 591 } 592 593 if (vtype == DRAW_VIEWER) { 594 Draw draw; 595 PetscTruth isnull; 596 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 597 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 598 } 599 600 if (vtype == ASCII_FILE_VIEWER) { 601 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 602 PetscSequentialPhaseBegin(mat->comm,1); 603 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 604 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 605 aij->cend); 606 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 607 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 608 fflush(fd); 609 PetscSequentialPhaseEnd(mat->comm,1); 610 } 611 else { 612 int size = aij->size; 613 rank = aij->rank; 614 if (size == 1) { 615 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 616 } 617 else { 618 /* assemble the entire matrix onto first processor. */ 619 Mat A; 620 Mat_SeqAIJ *Aloc; 621 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 622 Scalar *a; 623 624 if (!rank) { 625 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 626 CHKERRQ(ierr); 627 } 628 else { 629 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 630 CHKERRQ(ierr); 631 } 632 PLogObjectParent(mat,A); 633 634 /* copy over the A part */ 635 Aloc = (Mat_SeqAIJ*) aij->A->data; 636 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 637 row = aij->rstart; 638 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 639 for ( i=0; i<m; i++ ) { 640 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 641 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 642 } 643 aj = Aloc->j; 644 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 645 646 /* copy over the B part */ 647 Aloc = (Mat_SeqAIJ*) aij->B->data; 648 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 649 row = aij->rstart; 650 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 651 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 652 for ( i=0; i<m; i++ ) { 653 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 654 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 655 } 656 PetscFree(ct); 657 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 658 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 659 if (!rank) { 660 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(A); CHKERRQ(ierr); 663 } 664 } 665 return 0; 666 } 667 668 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 669 { 670 Mat mat = (Mat) obj; 671 int ierr; 672 ViewerType vtype; 673 674 if (!viewer) { 675 viewer = STDOUT_VIEWER_SELF; 676 } 677 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 678 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 679 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 680 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 681 } 682 else if (vtype == BINARY_FILE_VIEWER) { 683 return MatView_MPIAIJ_Binary(mat,viewer); 684 } 685 return 0; 686 } 687 688 /* 689 This has to provide several versions. 690 691 1) per sequential 692 2) a) use only local smoothing updating outer values only once. 693 b) local smoothing updating outer values each inner iteration 694 3) color updating out values betwen colors. 695 */ 696 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 697 double fshift,int its,Vec xx) 698 { 699 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 700 Mat AA = mat->A, BB = mat->B; 701 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 702 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 703 int ierr,*idx, *diag; 704 int n = mat->n, m = mat->m, i,shift = A->indexshift; 705 Vec tt; 706 707 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 708 xs = x + shift; /* shift by one for index start of 1 */ 709 ls = ls + shift; 710 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 711 diag = A->diag; 712 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 713 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 714 } 715 if (flag & SOR_EISENSTAT) { 716 /* Let A = L + U + D; where L is lower trianglar, 717 U is upper triangular, E is diagonal; This routine applies 718 719 (L + E)^{-1} A (U + E)^{-1} 720 721 to a vector efficiently using Eisenstat's trick. This is for 722 the case of SSOR preconditioner, so E is D/omega where omega 723 is the relaxation factor. 724 */ 725 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 726 PLogObjectParent(matin,tt); 727 VecGetArray(tt,&t); 728 scale = (2.0/omega) - 1.0; 729 /* x = (E + U)^{-1} b */ 730 VecSet(&zero,mat->lvec); 731 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 732 mat->Mvctx); CHKERRQ(ierr); 733 for ( i=m-1; i>-1; i-- ) { 734 n = A->i[i+1] - diag[i] - 1; 735 idx = A->j + diag[i] + !shift; 736 v = A->a + diag[i] + !shift; 737 sum = b[i]; 738 SPARSEDENSEMDOT(sum,xs,v,idx,n); 739 d = fshift + A->a[diag[i]+shift]; 740 n = B->i[i+1] - B->i[i]; 741 idx = B->j + B->i[i] + shift; 742 v = B->a + B->i[i] + shift; 743 SPARSEDENSEMDOT(sum,ls,v,idx,n); 744 x[i] = omega*(sum/d); 745 } 746 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 747 mat->Mvctx); CHKERRQ(ierr); 748 749 /* t = b - (2*E - D)x */ 750 v = A->a; 751 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 752 753 /* t = (E + L)^{-1}t */ 754 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 755 diag = A->diag; 756 VecSet(&zero,mat->lvec); 757 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 for ( i=0; i<m; i++ ) { 760 n = diag[i] - A->i[i]; 761 idx = A->j + A->i[i] + shift; 762 v = A->a + A->i[i] + shift; 763 sum = t[i]; 764 SPARSEDENSEMDOT(sum,ts,v,idx,n); 765 d = fshift + A->a[diag[i]+shift]; 766 n = B->i[i+1] - B->i[i]; 767 idx = B->j + B->i[i] + shift; 768 v = B->a + B->i[i] + shift; 769 SPARSEDENSEMDOT(sum,ls,v,idx,n); 770 t[i] = omega*(sum/d); 771 } 772 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 773 mat->Mvctx); CHKERRQ(ierr); 774 /* x = x + t */ 775 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 776 VecDestroy(tt); 777 return 0; 778 } 779 780 781 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 782 if (flag & SOR_ZERO_INITIAL_GUESS) { 783 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 784 } 785 else { 786 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 787 CHKERRQ(ierr); 788 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 789 CHKERRQ(ierr); 790 } 791 while (its--) { 792 /* go down through the rows */ 793 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 794 mat->Mvctx); CHKERRQ(ierr); 795 for ( i=0; i<m; i++ ) { 796 n = A->i[i+1] - A->i[i]; 797 idx = A->j + A->i[i] + shift; 798 v = A->a + A->i[i] + shift; 799 sum = b[i]; 800 SPARSEDENSEMDOT(sum,xs,v,idx,n); 801 d = fshift + A->a[diag[i]+shift]; 802 n = B->i[i+1] - B->i[i]; 803 idx = B->j + B->i[i] + shift; 804 v = B->a + B->i[i] + shift; 805 SPARSEDENSEMDOT(sum,ls,v,idx,n); 806 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 807 } 808 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 /* come up through the rows */ 811 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 812 mat->Mvctx); CHKERRQ(ierr); 813 for ( i=m-1; i>-1; i-- ) { 814 n = A->i[i+1] - A->i[i]; 815 idx = A->j + A->i[i] + shift; 816 v = A->a + A->i[i] + shift; 817 sum = b[i]; 818 SPARSEDENSEMDOT(sum,xs,v,idx,n); 819 d = fshift + A->a[diag[i]+shift]; 820 n = B->i[i+1] - B->i[i]; 821 idx = B->j + B->i[i] + shift; 822 v = B->a + B->i[i] + shift; 823 SPARSEDENSEMDOT(sum,ls,v,idx,n); 824 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 825 } 826 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 827 mat->Mvctx); CHKERRQ(ierr); 828 } 829 } 830 else if (flag & SOR_FORWARD_SWEEP){ 831 if (flag & SOR_ZERO_INITIAL_GUESS) { 832 VecSet(&zero,mat->lvec); 833 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 834 mat->Mvctx); CHKERRQ(ierr); 835 for ( i=0; i<m; i++ ) { 836 n = diag[i] - A->i[i]; 837 idx = A->j + A->i[i] + shift; 838 v = A->a + A->i[i] + shift; 839 sum = b[i]; 840 SPARSEDENSEMDOT(sum,xs,v,idx,n); 841 d = fshift + A->a[diag[i]+shift]; 842 n = B->i[i+1] - B->i[i]; 843 idx = B->j + B->i[i] + shift; 844 v = B->a + B->i[i] + shift; 845 SPARSEDENSEMDOT(sum,ls,v,idx,n); 846 x[i] = omega*(sum/d); 847 } 848 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 849 mat->Mvctx); CHKERRQ(ierr); 850 its--; 851 } 852 while (its--) { 853 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 854 CHKERRQ(ierr); 855 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 856 CHKERRQ(ierr); 857 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 858 mat->Mvctx); CHKERRQ(ierr); 859 for ( i=0; i<m; i++ ) { 860 n = A->i[i+1] - A->i[i]; 861 idx = A->j + A->i[i] + shift; 862 v = A->a + A->i[i] + shift; 863 sum = b[i]; 864 SPARSEDENSEMDOT(sum,xs,v,idx,n); 865 d = fshift + A->a[diag[i]+shift]; 866 n = B->i[i+1] - B->i[i]; 867 idx = B->j + B->i[i] + shift; 868 v = B->a + B->i[i] + shift; 869 SPARSEDENSEMDOT(sum,ls,v,idx,n); 870 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 871 } 872 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 873 mat->Mvctx); CHKERRQ(ierr); 874 } 875 } 876 else if (flag & SOR_BACKWARD_SWEEP){ 877 if (flag & SOR_ZERO_INITIAL_GUESS) { 878 VecSet(&zero,mat->lvec); 879 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 880 mat->Mvctx); CHKERRQ(ierr); 881 for ( i=m-1; i>-1; i-- ) { 882 n = A->i[i+1] - diag[i] - 1; 883 idx = A->j + diag[i] + !shift; 884 v = A->a + diag[i] + !shift; 885 sum = b[i]; 886 SPARSEDENSEMDOT(sum,xs,v,idx,n); 887 d = fshift + A->a[diag[i]+shift]; 888 n = B->i[i+1] - B->i[i]; 889 idx = B->j + B->i[i] + shift; 890 v = B->a + B->i[i] + shift; 891 SPARSEDENSEMDOT(sum,ls,v,idx,n); 892 x[i] = omega*(sum/d); 893 } 894 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 895 mat->Mvctx); CHKERRQ(ierr); 896 its--; 897 } 898 while (its--) { 899 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 900 mat->Mvctx); CHKERRQ(ierr); 901 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 902 mat->Mvctx); CHKERRQ(ierr); 903 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 904 mat->Mvctx); CHKERRQ(ierr); 905 for ( i=m-1; i>-1; i-- ) { 906 n = A->i[i+1] - A->i[i]; 907 idx = A->j + A->i[i] + shift; 908 v = A->a + A->i[i] + shift; 909 sum = b[i]; 910 SPARSEDENSEMDOT(sum,xs,v,idx,n); 911 d = fshift + A->a[diag[i]+shift]; 912 n = B->i[i+1] - B->i[i]; 913 idx = B->j + B->i[i] + shift; 914 v = B->a + B->i[i] + shift; 915 SPARSEDENSEMDOT(sum,ls,v,idx,n); 916 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 917 } 918 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 919 mat->Mvctx); CHKERRQ(ierr); 920 } 921 } 922 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 923 if (flag & SOR_ZERO_INITIAL_GUESS) { 924 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 925 } 926 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 927 CHKERRQ(ierr); 928 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 929 CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 958 } 959 } 960 } 961 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 962 if (flag & SOR_ZERO_INITIAL_GUESS) { 963 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 966 CHKERRQ(ierr); 967 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 968 CHKERRQ(ierr); 969 while (its--) { 970 for ( i=0; i<m; i++ ) { 971 n = A->i[i+1] - A->i[i]; 972 idx = A->j + A->i[i] + shift; 973 v = A->a + A->i[i] + shift; 974 sum = b[i]; 975 SPARSEDENSEMDOT(sum,xs,v,idx,n); 976 d = fshift + A->a[diag[i]+shift]; 977 n = B->i[i+1] - B->i[i]; 978 idx = B->j + B->i[i] + shift; 979 v = B->a + B->i[i] + shift; 980 SPARSEDENSEMDOT(sum,ls,v,idx,n); 981 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 982 } 983 } 984 } 985 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 990 mat->Mvctx); CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 992 mat->Mvctx); CHKERRQ(ierr); 993 while (its--) { 994 for ( i=m-1; i>-1; i-- ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1006 } 1007 } 1008 } 1009 return 0; 1010 } 1011 1012 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1013 int *nzalloc,int *mem) 1014 { 1015 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1016 Mat A = mat->A, B = mat->B; 1017 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1018 1019 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1020 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1021 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1022 if (flag == MAT_LOCAL) { 1023 if (nz) *nz = isend[0]; 1024 if (nzalloc) *nzalloc = isend[1]; 1025 if (mem) *mem = isend[2]; 1026 } else if (flag == MAT_GLOBAL_MAX) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1028 if (nz) *nz = irecv[0]; 1029 if (nzalloc) *nzalloc = irecv[1]; 1030 if (mem) *mem = irecv[2]; 1031 } else if (flag == MAT_GLOBAL_SUM) { 1032 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1033 if (nz) *nz = irecv[0]; 1034 if (nzalloc) *nzalloc = irecv[1]; 1035 if (mem) *mem = irecv[2]; 1036 } 1037 return 0; 1038 } 1039 1040 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1041 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1042 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1043 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1044 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1045 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1046 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1047 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1048 1049 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1052 1053 if (op == NO_NEW_NONZERO_LOCATIONS || 1054 op == YES_NEW_NONZERO_LOCATIONS || 1055 op == COLUMNS_SORTED || 1056 op == ROW_ORIENTED) { 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == ROWS_SORTED || 1061 op == SYMMETRIC_MATRIX || 1062 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1063 op == YES_NEW_DIAGONALS) 1064 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1065 else if (op == COLUMN_ORIENTED) { 1066 a->roworiented = 0; 1067 MatSetOption(a->A,op); 1068 MatSetOption(a->B,op); 1069 } 1070 else if (op == NO_NEW_DIAGONALS) 1071 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1072 else 1073 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1074 return 0; 1075 } 1076 1077 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->M; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->m; *n = mat->N; 1088 return 0; 1089 } 1090 1091 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 *m = mat->rstart; *n = mat->rend; 1095 return 0; 1096 } 1097 1098 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1099 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1100 1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1105 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1106 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1107 int *cmap, *idx_p; 1108 1109 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1110 mat->getrowactive = PETSC_TRUE; 1111 1112 if (!mat->rowvalues && (idx || v)) { 1113 /* 1114 allocate enough space to hold information from the longest row. 1115 */ 1116 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1117 int max = 1,n = mat->n,tmp; 1118 for ( i=0; i<n; i++ ) { 1119 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1120 if (max < tmp) { max = tmp; } 1121 } 1122 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1123 CHKPTRQ(mat->rowvalues); 1124 mat->rowindices = (int *) (mat->rowvalues + max); 1125 } 1126 1127 1128 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1129 lrow = row - rstart; 1130 1131 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1132 if (!v) {pvA = 0; pvB = 0;} 1133 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1134 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1135 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1136 nztot = nzA + nzB; 1137 1138 cmap = mat->garray; 1139 if (v || idx) { 1140 if (nztot) { 1141 /* Sort by increasing column numbers, assuming A and B already sorted */ 1142 int imark = -1; 1143 if (v) { 1144 *v = v_p = mat->rowvalues; 1145 for ( i=0; i<nzB; i++ ) { 1146 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1147 else break; 1148 } 1149 imark = i; 1150 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1151 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1152 } 1153 if (idx) { 1154 *idx = idx_p = mat->rowindices; 1155 if (imark > -1) { 1156 for ( i=0; i<imark; i++ ) { 1157 idx_p[i] = cmap[cworkB[i]]; 1158 } 1159 } else { 1160 for ( i=0; i<nzB; i++ ) { 1161 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1162 else break; 1163 } 1164 imark = i; 1165 } 1166 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1167 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1168 } 1169 } 1170 else {*idx = 0; *v=0;} 1171 } 1172 *nz = nztot; 1173 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 return 0; 1176 } 1177 1178 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1181 if (aij->getrowactive == PETSC_FALSE) { 1182 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1183 } 1184 aij->getrowactive = PETSC_FALSE; 1185 return 0; 1186 } 1187 1188 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1191 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1192 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1193 double sum = 0.0; 1194 Scalar *v; 1195 1196 if (aij->size == 1) { 1197 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1198 } else { 1199 if (type == NORM_FROBENIUS) { 1200 v = amat->a; 1201 for (i=0; i<amat->nz; i++ ) { 1202 #if defined(PETSC_COMPLEX) 1203 sum += real(conj(*v)*(*v)); v++; 1204 #else 1205 sum += (*v)*(*v); v++; 1206 #endif 1207 } 1208 v = bmat->a; 1209 for (i=0; i<bmat->nz; i++ ) { 1210 #if defined(PETSC_COMPLEX) 1211 sum += real(conj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1217 *norm = sqrt(*norm); 1218 } 1219 else if (type == NORM_1) { /* max column norm */ 1220 double *tmp, *tmp2; 1221 int *jj, *garray = aij->garray; 1222 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1223 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1224 PetscMemzero(tmp,aij->N*sizeof(double)); 1225 *norm = 0.0; 1226 v = amat->a; jj = amat->j; 1227 for ( j=0; j<amat->nz; j++ ) { 1228 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1229 } 1230 v = bmat->a; jj = bmat->j; 1231 for ( j=0; j<bmat->nz; j++ ) { 1232 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1233 } 1234 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1235 for ( j=0; j<aij->N; j++ ) { 1236 if (tmp2[j] > *norm) *norm = tmp2[j]; 1237 } 1238 PetscFree(tmp); PetscFree(tmp2); 1239 } 1240 else if (type == NORM_INFINITY) { /* max row norm */ 1241 double ntemp = 0.0; 1242 for ( j=0; j<amat->m; j++ ) { 1243 v = amat->a + amat->i[j] + shift; 1244 sum = 0.0; 1245 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1246 sum += PetscAbsScalar(*v); v++; 1247 } 1248 v = bmat->a + bmat->i[j] + shift; 1249 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1250 sum += PetscAbsScalar(*v); v++; 1251 } 1252 if (sum > ntemp) ntemp = sum; 1253 } 1254 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1255 } 1256 else { 1257 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1258 } 1259 } 1260 return 0; 1261 } 1262 1263 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1264 { 1265 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1266 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1267 int ierr,shift = Aloc->indexshift; 1268 Mat B; 1269 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1270 Scalar *array; 1271 1272 if (matout == PETSC_NULL && M != N) 1273 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1274 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1275 PETSC_NULL,&B); CHKERRQ(ierr); 1276 1277 /* copy over the A part */ 1278 Aloc = (Mat_SeqAIJ*) a->A->data; 1279 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1280 row = a->rstart; 1281 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1282 for ( i=0; i<m; i++ ) { 1283 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1284 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1285 } 1286 aj = Aloc->j; 1287 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1288 1289 /* copy over the B part */ 1290 Aloc = (Mat_SeqAIJ*) a->B->data; 1291 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1292 row = a->rstart; 1293 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1294 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1295 for ( i=0; i<m; i++ ) { 1296 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1297 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1298 } 1299 PetscFree(ct); 1300 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1301 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1302 if (matout != PETSC_NULL) { 1303 *matout = B; 1304 } else { 1305 /* This isn't really an in-place transpose .... but free data structures from a */ 1306 PetscFree(a->rowners); 1307 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1308 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1309 if (a->colmap) PetscFree(a->colmap); 1310 if (a->garray) PetscFree(a->garray); 1311 if (a->lvec) VecDestroy(a->lvec); 1312 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1313 PetscFree(a); 1314 PetscMemcpy(A,B,sizeof(struct _Mat)); 1315 PetscHeaderDestroy(B); 1316 } 1317 return 0; 1318 } 1319 1320 extern int MatPrintHelp_SeqAIJ(Mat); 1321 static int MatPrintHelp_MPIAIJ(Mat A) 1322 { 1323 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1324 1325 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1326 else return 0; 1327 } 1328 1329 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1330 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1331 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1332 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1333 /* -------------------------------------------------------------------*/ 1334 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1335 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1336 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1337 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1338 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1339 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1340 MatLUFactor_MPIAIJ,0, 1341 MatRelax_MPIAIJ, 1342 MatTranspose_MPIAIJ, 1343 MatGetInfo_MPIAIJ,0, 1344 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1345 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1346 0, 1347 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1348 MatGetReordering_MPIAIJ, 1349 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1350 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1351 MatILUFactorSymbolic_MPIAIJ,0, 1352 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1353 0,0,0, 1354 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1355 MatPrintHelp_MPIAIJ, 1356 MatScale_MPIAIJ}; 1357 1358 /*@C 1359 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1360 (the default parallel PETSc format). For good matrix assembly performance 1361 the user should preallocate the matrix storage by setting the parameters 1362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1363 performance can be increased by more than a factor of 50. 1364 1365 Input Parameters: 1366 . comm - MPI communicator 1367 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1368 . n - number of local columns (or PETSC_DECIDE to have calculated 1369 if N is given) 1370 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1371 . N - number of global columns (or PETSC_DECIDE to have calculated 1372 if n is given) 1373 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1374 (same for all local rows) 1375 . d_nzz - array containing the number of nonzeros in the various rows of the 1376 diagonal portion of local submatrix (possibly different for each row) 1377 or PETSC_NULL. You must leave room for the diagonal entry even if 1378 it is zero. 1379 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1380 submatrix (same for all local rows). 1381 . o_nzz - array containing the number of nonzeros in the various rows of the 1382 off-diagonal portion of the local submatrix (possibly different for 1383 each row) or PETSC_NULL. 1384 1385 Output Parameter: 1386 . A - the matrix 1387 1388 Notes: 1389 The AIJ format (also called the Yale sparse matrix format or 1390 compressed row storage), is fully compatible with standard Fortran 77 1391 storage. That is, the stored row and column indices can begin at 1392 either one (as in Fortran) or zero. See the users manual for details. 1393 1394 The user MUST specify either the local or global matrix dimensions 1395 (possibly both). 1396 1397 By default, this format uses inodes (identical nodes) when possible. 1398 We search for consecutive rows with the same nonzero structure, thereby 1399 reusing matrix information to achieve increased efficiency. 1400 1401 Options Database Keys: 1402 $ -mat_aij_no_inode - Do not use inodes 1403 $ -mat_aij_inode_limit <limit> - Set inode limit. 1404 $ (max limit=5) 1405 $ -mat_aij_oneindex - Internally use indexing starting at 1 1406 $ rather than 0. Note: When calling MatSetValues(), 1407 $ the user still MUST index entries starting at 0! 1408 1409 Storage Information: 1410 For a square global matrix we define each processor's diagonal portion 1411 to be its local rows and the corresponding columns (a square submatrix); 1412 each processor's off-diagonal portion encompasses the remainder of the 1413 local matrix (a rectangular submatrix). 1414 1415 The user can specify preallocated storage for the diagonal part of 1416 the local submatrix with either d_nz or d_nnz (not both). Set 1417 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1418 memory allocation. Likewise, specify preallocated storage for the 1419 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1420 1421 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1422 the figure below we depict these three local rows and all columns (0-11). 1423 1424 $ 0 1 2 3 4 5 6 7 8 9 10 11 1425 $ ------------------- 1426 $ row 3 | o o o d d d o o o o o o 1427 $ row 4 | o o o d d d o o o o o o 1428 $ row 5 | o o o d d d o o o o o o 1429 $ ------------------- 1430 $ 1431 1432 Thus, any entries in the d locations are stored in the d (diagonal) 1433 submatrix, and any entries in the o locations are stored in the 1434 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1435 stored simply in the MATSEQAIJ format for compressed row storage. 1436 1437 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1438 and o_nz should indicate the number of nonzeros per row in the o matrix. 1439 In general, for PDE problems in which most nonzeros are near the diagonal, 1440 one expects d_nz >> o_nz. For additional details, see the users manual 1441 chapter on matrices and the file $(PETSC_DIR)/Performance. 1442 1443 .keywords: matrix, aij, compressed row, sparse, parallel 1444 1445 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1446 @*/ 1447 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1448 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1449 { 1450 Mat B; 1451 Mat_MPIAIJ *b; 1452 int ierr, i,sum[2],work[2]; 1453 1454 *A = 0; 1455 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1456 PLogObjectCreate(B); 1457 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1458 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1459 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1460 B->destroy = MatDestroy_MPIAIJ; 1461 B->view = MatView_MPIAIJ; 1462 B->factor = 0; 1463 B->assembled = PETSC_FALSE; 1464 1465 b->insertmode = NOT_SET_VALUES; 1466 MPI_Comm_rank(comm,&b->rank); 1467 MPI_Comm_size(comm,&b->size); 1468 1469 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1470 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1471 1472 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1473 work[0] = m; work[1] = n; 1474 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1475 if (M == PETSC_DECIDE) M = sum[0]; 1476 if (N == PETSC_DECIDE) N = sum[1]; 1477 } 1478 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1479 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1480 b->m = m; B->m = m; 1481 b->n = n; B->n = n; 1482 b->N = N; B->N = N; 1483 b->M = M; B->M = M; 1484 1485 /* build local table of row and column ownerships */ 1486 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1487 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1488 b->cowners = b->rowners + b->size + 1; 1489 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1490 b->rowners[0] = 0; 1491 for ( i=2; i<=b->size; i++ ) { 1492 b->rowners[i] += b->rowners[i-1]; 1493 } 1494 b->rstart = b->rowners[b->rank]; 1495 b->rend = b->rowners[b->rank+1]; 1496 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1497 b->cowners[0] = 0; 1498 for ( i=2; i<=b->size; i++ ) { 1499 b->cowners[i] += b->cowners[i-1]; 1500 } 1501 b->cstart = b->cowners[b->rank]; 1502 b->cend = b->cowners[b->rank+1]; 1503 1504 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1505 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1506 PLogObjectParent(B,b->A); 1507 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1508 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1509 PLogObjectParent(B,b->B); 1510 1511 /* build cache for off array entries formed */ 1512 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1513 b->colmap = 0; 1514 b->garray = 0; 1515 b->roworiented = 1; 1516 1517 /* stuff used for matrix vector multiply */ 1518 b->lvec = 0; 1519 b->Mvctx = 0; 1520 1521 /* stuff for MatGetRow() */ 1522 b->rowindices = 0; 1523 b->rowvalues = 0; 1524 b->getrowactive = PETSC_FALSE; 1525 1526 *A = B; 1527 return 0; 1528 } 1529 1530 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1531 { 1532 Mat mat; 1533 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1534 int ierr, len=0, flg; 1535 1536 *newmat = 0; 1537 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1538 PLogObjectCreate(mat); 1539 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1540 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1541 mat->destroy = MatDestroy_MPIAIJ; 1542 mat->view = MatView_MPIAIJ; 1543 mat->factor = matin->factor; 1544 mat->assembled = PETSC_TRUE; 1545 1546 a->m = mat->m = oldmat->m; 1547 a->n = mat->n = oldmat->n; 1548 a->M = mat->M = oldmat->M; 1549 a->N = mat->N = oldmat->N; 1550 1551 a->rstart = oldmat->rstart; 1552 a->rend = oldmat->rend; 1553 a->cstart = oldmat->cstart; 1554 a->cend = oldmat->cend; 1555 a->size = oldmat->size; 1556 a->rank = oldmat->rank; 1557 a->insertmode = NOT_SET_VALUES; 1558 a->rowvalues = 0; 1559 a->getrowactive = PETSC_FALSE; 1560 1561 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1562 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1563 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1564 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1565 if (oldmat->colmap) { 1566 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1567 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1568 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1569 } else a->colmap = 0; 1570 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1571 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1572 PLogObjectMemory(mat,len*sizeof(int)); 1573 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1574 } else a->garray = 0; 1575 1576 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1577 PLogObjectParent(mat,a->lvec); 1578 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1579 PLogObjectParent(mat,a->Mvctx); 1580 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1581 PLogObjectParent(mat,a->A); 1582 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1583 PLogObjectParent(mat,a->B); 1584 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1585 if (flg) { 1586 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1587 } 1588 *newmat = mat; 1589 return 0; 1590 } 1591 1592 #include "sys.h" 1593 1594 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1595 { 1596 Mat A; 1597 int i, nz, ierr, j,rstart, rend, fd; 1598 Scalar *vals,*svals; 1599 MPI_Comm comm = ((PetscObject)viewer)->comm; 1600 MPI_Status status; 1601 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1602 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1603 int tag = ((PetscObject)viewer)->tag; 1604 1605 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1606 if (!rank) { 1607 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1608 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1609 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1610 } 1611 1612 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1613 M = header[1]; N = header[2]; 1614 /* determine ownership of all rows */ 1615 m = M/size + ((M % size) > rank); 1616 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1617 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1618 rowners[0] = 0; 1619 for ( i=2; i<=size; i++ ) { 1620 rowners[i] += rowners[i-1]; 1621 } 1622 rstart = rowners[rank]; 1623 rend = rowners[rank+1]; 1624 1625 /* distribute row lengths to all processors */ 1626 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1627 offlens = ourlens + (rend-rstart); 1628 if (!rank) { 1629 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1630 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1631 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1632 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1633 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1634 PetscFree(sndcounts); 1635 } 1636 else { 1637 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1638 } 1639 1640 if (!rank) { 1641 /* calculate the number of nonzeros on each processor */ 1642 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1643 PetscMemzero(procsnz,size*sizeof(int)); 1644 for ( i=0; i<size; i++ ) { 1645 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1646 procsnz[i] += rowlengths[j]; 1647 } 1648 } 1649 PetscFree(rowlengths); 1650 1651 /* determine max buffer needed and allocate it */ 1652 maxnz = 0; 1653 for ( i=0; i<size; i++ ) { 1654 maxnz = PetscMax(maxnz,procsnz[i]); 1655 } 1656 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1657 1658 /* read in my part of the matrix column indices */ 1659 nz = procsnz[0]; 1660 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1661 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1662 1663 /* read in every one elses and ship off */ 1664 for ( i=1; i<size; i++ ) { 1665 nz = procsnz[i]; 1666 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1667 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1668 } 1669 PetscFree(cols); 1670 } 1671 else { 1672 /* determine buffer space needed for message */ 1673 nz = 0; 1674 for ( i=0; i<m; i++ ) { 1675 nz += ourlens[i]; 1676 } 1677 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1678 1679 /* receive message of column indices*/ 1680 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1681 MPI_Get_count(&status,MPI_INT,&maxnz); 1682 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1683 } 1684 1685 /* loop over local rows, determining number of off diagonal entries */ 1686 PetscMemzero(offlens,m*sizeof(int)); 1687 jj = 0; 1688 for ( i=0; i<m; i++ ) { 1689 for ( j=0; j<ourlens[i]; j++ ) { 1690 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1691 jj++; 1692 } 1693 } 1694 1695 /* create our matrix */ 1696 for ( i=0; i<m; i++ ) { 1697 ourlens[i] -= offlens[i]; 1698 } 1699 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1700 A = *newmat; 1701 MatSetOption(A,COLUMNS_SORTED); 1702 for ( i=0; i<m; i++ ) { 1703 ourlens[i] += offlens[i]; 1704 } 1705 1706 if (!rank) { 1707 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1708 1709 /* read in my part of the matrix numerical values */ 1710 nz = procsnz[0]; 1711 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1712 1713 /* insert into matrix */ 1714 jj = rstart; 1715 smycols = mycols; 1716 svals = vals; 1717 for ( i=0; i<m; i++ ) { 1718 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1719 smycols += ourlens[i]; 1720 svals += ourlens[i]; 1721 jj++; 1722 } 1723 1724 /* read in other processors and ship out */ 1725 for ( i=1; i<size; i++ ) { 1726 nz = procsnz[i]; 1727 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1728 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1729 } 1730 PetscFree(procsnz); 1731 } 1732 else { 1733 /* receive numeric values */ 1734 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1735 1736 /* receive message of values*/ 1737 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1738 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1739 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1740 1741 /* insert into matrix */ 1742 jj = rstart; 1743 smycols = mycols; 1744 svals = vals; 1745 for ( i=0; i<m; i++ ) { 1746 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1747 smycols += ourlens[i]; 1748 svals += ourlens[i]; 1749 jj++; 1750 } 1751 } 1752 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1753 1754 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1755 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1756 return 0; 1757 } 1758