1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.144 1996/06/26 18:11:34 curfman Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 443 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 445 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 if (a->M != a->N) 508 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 509 return MatGetDiagonal(a->A,v); 510 } 511 512 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 513 { 514 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 515 int ierr; 516 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 517 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 518 return 0; 519 } 520 521 static int MatDestroy_MPIAIJ(PetscObject obj) 522 { 523 Mat mat = (Mat) obj; 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 525 int ierr; 526 #if defined(PETSC_LOG) 527 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 528 #endif 529 PetscFree(aij->rowners); 530 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 531 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 532 if (aij->colmap) PetscFree(aij->colmap); 533 if (aij->garray) PetscFree(aij->garray); 534 if (aij->lvec) VecDestroy(aij->lvec); 535 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 536 if (aij->rowvalues) PetscFree(aij->rowvalues); 537 PetscFree(aij); 538 PLogObjectDestroy(mat); 539 PetscHeaderDestroy(mat); 540 return 0; 541 } 542 #include "draw.h" 543 #include "pinclude/pviewer.h" 544 545 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 546 { 547 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 548 int ierr; 549 550 if (aij->size == 1) { 551 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 552 } 553 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 554 return 0; 555 } 556 557 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 560 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 561 int ierr, format,shift = C->indexshift,rank; 562 FILE *fd; 563 ViewerType vtype; 564 565 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 566 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 567 ierr = ViewerGetFormat(viewer,&format); 568 if (format == ASCII_FORMAT_INFO_DETAILED) { 569 int nz, nzalloc, mem, flg; 570 MPI_Comm_rank(mat->comm,&rank); 571 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 572 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 573 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 574 PetscSequentialPhaseBegin(mat->comm,1); 575 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 576 rank,aij->m,nz,nzalloc,mem); 577 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 578 rank,aij->m,nz,nzalloc,mem); 579 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 580 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 581 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 582 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 583 fflush(fd); 584 PetscSequentialPhaseEnd(mat->comm,1); 585 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 586 return 0; 587 } 588 else if (format == ASCII_FORMAT_INFO) { 589 return 0; 590 } 591 } 592 593 if (vtype == DRAW_VIEWER) { 594 Draw draw; 595 PetscTruth isnull; 596 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 597 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 598 } 599 600 if (vtype == ASCII_FILE_VIEWER) { 601 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 602 PetscSequentialPhaseBegin(mat->comm,1); 603 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 604 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 605 aij->cend); 606 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 607 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 608 fflush(fd); 609 PetscSequentialPhaseEnd(mat->comm,1); 610 } 611 else { 612 int size = aij->size; 613 rank = aij->rank; 614 if (size == 1) { 615 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 616 } 617 else { 618 /* assemble the entire matrix onto first processor. */ 619 Mat A; 620 Mat_SeqAIJ *Aloc; 621 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 622 Scalar *a; 623 624 if (!rank) { 625 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 626 CHKERRQ(ierr); 627 } 628 else { 629 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 630 CHKERRQ(ierr); 631 } 632 PLogObjectParent(mat,A); 633 634 /* copy over the A part */ 635 Aloc = (Mat_SeqAIJ*) aij->A->data; 636 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 637 row = aij->rstart; 638 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 639 for ( i=0; i<m; i++ ) { 640 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 641 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 642 } 643 aj = Aloc->j; 644 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 645 646 /* copy over the B part */ 647 Aloc = (Mat_SeqAIJ*) aij->B->data; 648 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 649 row = aij->rstart; 650 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 651 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 652 for ( i=0; i<m; i++ ) { 653 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 654 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 655 } 656 PetscFree(ct); 657 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 658 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 659 if (!rank) { 660 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(A); CHKERRQ(ierr); 663 } 664 } 665 return 0; 666 } 667 668 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 669 { 670 Mat mat = (Mat) obj; 671 int ierr; 672 ViewerType vtype; 673 674 if (!viewer) { 675 viewer = STDOUT_VIEWER_SELF; 676 } 677 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 678 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 679 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 680 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 681 } 682 else if (vtype == BINARY_FILE_VIEWER) { 683 return MatView_MPIAIJ_Binary(mat,viewer); 684 } 685 return 0; 686 } 687 688 /* 689 This has to provide several versions. 690 691 1) per sequential 692 2) a) use only local smoothing updating outer values only once. 693 b) local smoothing updating outer values each inner iteration 694 3) color updating out values betwen colors. 695 */ 696 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 697 double fshift,int its,Vec xx) 698 { 699 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 700 Mat AA = mat->A, BB = mat->B; 701 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 702 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 703 int ierr,*idx, *diag; 704 int n = mat->n, m = mat->m, i,shift = A->indexshift; 705 Vec tt; 706 707 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 708 xs = x + shift; /* shift by one for index start of 1 */ 709 ls = ls + shift; 710 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 711 diag = A->diag; 712 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 713 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 714 } 715 if (flag & SOR_EISENSTAT) { 716 /* Let A = L + U + D; where L is lower trianglar, 717 U is upper triangular, E is diagonal; This routine applies 718 719 (L + E)^{-1} A (U + E)^{-1} 720 721 to a vector efficiently using Eisenstat's trick. This is for 722 the case of SSOR preconditioner, so E is D/omega where omega 723 is the relaxation factor. 724 */ 725 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 726 PLogObjectParent(matin,tt); 727 VecGetArray(tt,&t); 728 scale = (2.0/omega) - 1.0; 729 /* x = (E + U)^{-1} b */ 730 VecSet(&zero,mat->lvec); 731 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 732 mat->Mvctx); CHKERRQ(ierr); 733 for ( i=m-1; i>-1; i-- ) { 734 n = A->i[i+1] - diag[i] - 1; 735 idx = A->j + diag[i] + !shift; 736 v = A->a + diag[i] + !shift; 737 sum = b[i]; 738 SPARSEDENSEMDOT(sum,xs,v,idx,n); 739 d = fshift + A->a[diag[i]+shift]; 740 n = B->i[i+1] - B->i[i]; 741 idx = B->j + B->i[i] + shift; 742 v = B->a + B->i[i] + shift; 743 SPARSEDENSEMDOT(sum,ls,v,idx,n); 744 x[i] = omega*(sum/d); 745 } 746 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 747 mat->Mvctx); CHKERRQ(ierr); 748 749 /* t = b - (2*E - D)x */ 750 v = A->a; 751 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 752 753 /* t = (E + L)^{-1}t */ 754 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 755 diag = A->diag; 756 VecSet(&zero,mat->lvec); 757 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 for ( i=0; i<m; i++ ) { 760 n = diag[i] - A->i[i]; 761 idx = A->j + A->i[i] + shift; 762 v = A->a + A->i[i] + shift; 763 sum = t[i]; 764 SPARSEDENSEMDOT(sum,ts,v,idx,n); 765 d = fshift + A->a[diag[i]+shift]; 766 n = B->i[i+1] - B->i[i]; 767 idx = B->j + B->i[i] + shift; 768 v = B->a + B->i[i] + shift; 769 SPARSEDENSEMDOT(sum,ls,v,idx,n); 770 t[i] = omega*(sum/d); 771 } 772 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 773 mat->Mvctx); CHKERRQ(ierr); 774 /* x = x + t */ 775 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 776 VecDestroy(tt); 777 return 0; 778 } 779 780 781 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 782 if (flag & SOR_ZERO_INITIAL_GUESS) { 783 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 784 } 785 else { 786 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 787 CHKERRQ(ierr); 788 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 789 CHKERRQ(ierr); 790 } 791 while (its--) { 792 /* go down through the rows */ 793 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 794 mat->Mvctx); CHKERRQ(ierr); 795 for ( i=0; i<m; i++ ) { 796 n = A->i[i+1] - A->i[i]; 797 idx = A->j + A->i[i] + shift; 798 v = A->a + A->i[i] + shift; 799 sum = b[i]; 800 SPARSEDENSEMDOT(sum,xs,v,idx,n); 801 d = fshift + A->a[diag[i]+shift]; 802 n = B->i[i+1] - B->i[i]; 803 idx = B->j + B->i[i] + shift; 804 v = B->a + B->i[i] + shift; 805 SPARSEDENSEMDOT(sum,ls,v,idx,n); 806 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 807 } 808 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 /* come up through the rows */ 811 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 812 mat->Mvctx); CHKERRQ(ierr); 813 for ( i=m-1; i>-1; i-- ) { 814 n = A->i[i+1] - A->i[i]; 815 idx = A->j + A->i[i] + shift; 816 v = A->a + A->i[i] + shift; 817 sum = b[i]; 818 SPARSEDENSEMDOT(sum,xs,v,idx,n); 819 d = fshift + A->a[diag[i]+shift]; 820 n = B->i[i+1] - B->i[i]; 821 idx = B->j + B->i[i] + shift; 822 v = B->a + B->i[i] + shift; 823 SPARSEDENSEMDOT(sum,ls,v,idx,n); 824 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 825 } 826 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 827 mat->Mvctx); CHKERRQ(ierr); 828 } 829 } 830 else if (flag & SOR_FORWARD_SWEEP){ 831 if (flag & SOR_ZERO_INITIAL_GUESS) { 832 VecSet(&zero,mat->lvec); 833 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 834 mat->Mvctx); CHKERRQ(ierr); 835 for ( i=0; i<m; i++ ) { 836 n = diag[i] - A->i[i]; 837 idx = A->j + A->i[i] + shift; 838 v = A->a + A->i[i] + shift; 839 sum = b[i]; 840 SPARSEDENSEMDOT(sum,xs,v,idx,n); 841 d = fshift + A->a[diag[i]+shift]; 842 n = B->i[i+1] - B->i[i]; 843 idx = B->j + B->i[i] + shift; 844 v = B->a + B->i[i] + shift; 845 SPARSEDENSEMDOT(sum,ls,v,idx,n); 846 x[i] = omega*(sum/d); 847 } 848 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 849 mat->Mvctx); CHKERRQ(ierr); 850 its--; 851 } 852 while (its--) { 853 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 854 CHKERRQ(ierr); 855 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 856 CHKERRQ(ierr); 857 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 858 mat->Mvctx); CHKERRQ(ierr); 859 for ( i=0; i<m; i++ ) { 860 n = A->i[i+1] - A->i[i]; 861 idx = A->j + A->i[i] + shift; 862 v = A->a + A->i[i] + shift; 863 sum = b[i]; 864 SPARSEDENSEMDOT(sum,xs,v,idx,n); 865 d = fshift + A->a[diag[i]+shift]; 866 n = B->i[i+1] - B->i[i]; 867 idx = B->j + B->i[i] + shift; 868 v = B->a + B->i[i] + shift; 869 SPARSEDENSEMDOT(sum,ls,v,idx,n); 870 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 871 } 872 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 873 mat->Mvctx); CHKERRQ(ierr); 874 } 875 } 876 else if (flag & SOR_BACKWARD_SWEEP){ 877 if (flag & SOR_ZERO_INITIAL_GUESS) { 878 VecSet(&zero,mat->lvec); 879 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 880 mat->Mvctx); CHKERRQ(ierr); 881 for ( i=m-1; i>-1; i-- ) { 882 n = A->i[i+1] - diag[i] - 1; 883 idx = A->j + diag[i] + !shift; 884 v = A->a + diag[i] + !shift; 885 sum = b[i]; 886 SPARSEDENSEMDOT(sum,xs,v,idx,n); 887 d = fshift + A->a[diag[i]+shift]; 888 n = B->i[i+1] - B->i[i]; 889 idx = B->j + B->i[i] + shift; 890 v = B->a + B->i[i] + shift; 891 SPARSEDENSEMDOT(sum,ls,v,idx,n); 892 x[i] = omega*(sum/d); 893 } 894 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 895 mat->Mvctx); CHKERRQ(ierr); 896 its--; 897 } 898 while (its--) { 899 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 900 mat->Mvctx); CHKERRQ(ierr); 901 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 902 mat->Mvctx); CHKERRQ(ierr); 903 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 904 mat->Mvctx); CHKERRQ(ierr); 905 for ( i=m-1; i>-1; i-- ) { 906 n = A->i[i+1] - A->i[i]; 907 idx = A->j + A->i[i] + shift; 908 v = A->a + A->i[i] + shift; 909 sum = b[i]; 910 SPARSEDENSEMDOT(sum,xs,v,idx,n); 911 d = fshift + A->a[diag[i]+shift]; 912 n = B->i[i+1] - B->i[i]; 913 idx = B->j + B->i[i] + shift; 914 v = B->a + B->i[i] + shift; 915 SPARSEDENSEMDOT(sum,ls,v,idx,n); 916 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 917 } 918 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 919 mat->Mvctx); CHKERRQ(ierr); 920 } 921 } 922 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 923 if (flag & SOR_ZERO_INITIAL_GUESS) { 924 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 925 } 926 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 927 CHKERRQ(ierr); 928 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 929 CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 958 } 959 } 960 } 961 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 962 if (flag & SOR_ZERO_INITIAL_GUESS) { 963 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 966 CHKERRQ(ierr); 967 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 968 CHKERRQ(ierr); 969 while (its--) { 970 for ( i=0; i<m; i++ ) { 971 n = A->i[i+1] - A->i[i]; 972 idx = A->j + A->i[i] + shift; 973 v = A->a + A->i[i] + shift; 974 sum = b[i]; 975 SPARSEDENSEMDOT(sum,xs,v,idx,n); 976 d = fshift + A->a[diag[i]+shift]; 977 n = B->i[i+1] - B->i[i]; 978 idx = B->j + B->i[i] + shift; 979 v = B->a + B->i[i] + shift; 980 SPARSEDENSEMDOT(sum,ls,v,idx,n); 981 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 982 } 983 } 984 } 985 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 990 mat->Mvctx); CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 992 mat->Mvctx); CHKERRQ(ierr); 993 while (its--) { 994 for ( i=m-1; i>-1; i-- ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1006 } 1007 } 1008 } 1009 return 0; 1010 } 1011 1012 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1013 int *nzalloc,int *mem) 1014 { 1015 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1016 Mat A = mat->A, B = mat->B; 1017 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1018 1019 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1020 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1021 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1022 if (flag == MAT_LOCAL) { 1023 if (nz) *nz = isend[0]; 1024 if (nzalloc) *nzalloc = isend[1]; 1025 if (mem) *mem = isend[2]; 1026 } else if (flag == MAT_GLOBAL_MAX) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1028 if (nz) *nz = irecv[0]; 1029 if (nzalloc) *nzalloc = irecv[1]; 1030 if (mem) *mem = irecv[2]; 1031 } else if (flag == MAT_GLOBAL_SUM) { 1032 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1033 if (nz) *nz = irecv[0]; 1034 if (nzalloc) *nzalloc = irecv[1]; 1035 if (mem) *mem = irecv[2]; 1036 } 1037 return 0; 1038 } 1039 1040 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1041 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1042 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1043 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1044 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1045 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1046 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1047 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1048 1049 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1052 1053 if (op == NO_NEW_NONZERO_LOCATIONS || 1054 op == YES_NEW_NONZERO_LOCATIONS || 1055 op == COLUMNS_SORTED || 1056 op == ROW_ORIENTED) { 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == ROWS_SORTED || 1061 op == SYMMETRIC_MATRIX || 1062 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1063 op == YES_NEW_DIAGONALS) 1064 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1065 else if (op == COLUMN_ORIENTED) { 1066 a->roworiented = 0; 1067 MatSetOption(a->A,op); 1068 MatSetOption(a->B,op); 1069 } 1070 else if (op == NO_NEW_DIAGONALS) 1071 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1072 else 1073 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1074 return 0; 1075 } 1076 1077 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->M; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->m; *n = mat->N; 1088 return 0; 1089 } 1090 1091 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 *m = mat->rstart; *n = mat->rend; 1095 return 0; 1096 } 1097 1098 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1099 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1100 1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1105 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1106 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1107 int *cmap, *idx_p; 1108 1109 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1110 mat->getrowactive = PETSC_TRUE; 1111 1112 if (!mat->rowvalues && (idx || v)) { 1113 /* 1114 allocate enough space to hold information from the longest row. 1115 */ 1116 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1117 int max = 1,n = mat->n,tmp; 1118 for ( i=0; i<n; i++ ) { 1119 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1120 if (max < tmp) { max = tmp; } 1121 } 1122 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1123 CHKPTRQ(mat->rowvalues); 1124 mat->rowindices = (int *) (mat->rowvalues + max); 1125 } 1126 1127 1128 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1129 lrow = row - rstart; 1130 1131 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1132 if (!v) {pvA = 0; pvB = 0;} 1133 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1134 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1135 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1136 nztot = nzA + nzB; 1137 1138 cmap = mat->garray; 1139 if (v || idx) { 1140 if (nztot) { 1141 /* Sort by increasing column numbers, assuming A and B already sorted */ 1142 int imark = -1; 1143 if (v) { 1144 *v = v_p = mat->rowvalues; 1145 for ( i=0; i<nzB; i++ ) { 1146 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1147 else break; 1148 } 1149 imark = i; 1150 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1151 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1152 } 1153 if (idx) { 1154 *idx = idx_p = mat->rowindices; 1155 if (imark > -1) { 1156 for ( i=0; i<imark; i++ ) { 1157 idx_p[i] = cmap[cworkB[i]]; 1158 } 1159 } else { 1160 for ( i=0; i<nzB; i++ ) { 1161 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1162 else break; 1163 } 1164 imark = i; 1165 } 1166 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1167 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1168 } 1169 } 1170 else {*idx = 0; *v=0;} 1171 } 1172 *nz = nztot; 1173 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 return 0; 1176 } 1177 1178 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1181 if (aij->getrowactive == PETSC_FALSE) { 1182 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1183 } 1184 aij->getrowactive = PETSC_FALSE; 1185 return 0; 1186 } 1187 1188 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1191 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1192 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1193 double sum = 0.0; 1194 Scalar *v; 1195 1196 if (aij->size == 1) { 1197 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1198 } else { 1199 if (type == NORM_FROBENIUS) { 1200 v = amat->a; 1201 for (i=0; i<amat->nz; i++ ) { 1202 #if defined(PETSC_COMPLEX) 1203 sum += real(conj(*v)*(*v)); v++; 1204 #else 1205 sum += (*v)*(*v); v++; 1206 #endif 1207 } 1208 v = bmat->a; 1209 for (i=0; i<bmat->nz; i++ ) { 1210 #if defined(PETSC_COMPLEX) 1211 sum += real(conj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1217 *norm = sqrt(*norm); 1218 } 1219 else if (type == NORM_1) { /* max column norm */ 1220 double *tmp, *tmp2; 1221 int *jj, *garray = aij->garray; 1222 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1223 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1224 PetscMemzero(tmp,aij->N*sizeof(double)); 1225 *norm = 0.0; 1226 v = amat->a; jj = amat->j; 1227 for ( j=0; j<amat->nz; j++ ) { 1228 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1229 } 1230 v = bmat->a; jj = bmat->j; 1231 for ( j=0; j<bmat->nz; j++ ) { 1232 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1233 } 1234 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1235 for ( j=0; j<aij->N; j++ ) { 1236 if (tmp2[j] > *norm) *norm = tmp2[j]; 1237 } 1238 PetscFree(tmp); PetscFree(tmp2); 1239 } 1240 else if (type == NORM_INFINITY) { /* max row norm */ 1241 double ntemp = 0.0; 1242 for ( j=0; j<amat->m; j++ ) { 1243 v = amat->a + amat->i[j] + shift; 1244 sum = 0.0; 1245 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1246 sum += PetscAbsScalar(*v); v++; 1247 } 1248 v = bmat->a + bmat->i[j] + shift; 1249 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1250 sum += PetscAbsScalar(*v); v++; 1251 } 1252 if (sum > ntemp) ntemp = sum; 1253 } 1254 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1255 } 1256 else { 1257 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1258 } 1259 } 1260 return 0; 1261 } 1262 1263 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1264 { 1265 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1266 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1267 int ierr,shift = Aloc->indexshift; 1268 Mat B; 1269 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1270 Scalar *array; 1271 1272 if (matout == PETSC_NULL && M != N) 1273 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1274 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1275 PETSC_NULL,&B); CHKERRQ(ierr); 1276 1277 /* copy over the A part */ 1278 Aloc = (Mat_SeqAIJ*) a->A->data; 1279 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1280 row = a->rstart; 1281 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1282 for ( i=0; i<m; i++ ) { 1283 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1284 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1285 } 1286 aj = Aloc->j; 1287 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1288 1289 /* copy over the B part */ 1290 Aloc = (Mat_SeqAIJ*) a->B->data; 1291 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1292 row = a->rstart; 1293 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1294 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1295 for ( i=0; i<m; i++ ) { 1296 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1297 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1298 } 1299 PetscFree(ct); 1300 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1301 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1302 if (matout != PETSC_NULL) { 1303 *matout = B; 1304 } else { 1305 /* This isn't really an in-place transpose .... but free data structures from a */ 1306 PetscFree(a->rowners); 1307 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1308 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1309 if (a->colmap) PetscFree(a->colmap); 1310 if (a->garray) PetscFree(a->garray); 1311 if (a->lvec) VecDestroy(a->lvec); 1312 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1313 PetscFree(a); 1314 PetscMemcpy(A,B,sizeof(struct _Mat)); 1315 PetscHeaderDestroy(B); 1316 } 1317 return 0; 1318 } 1319 1320 extern int MatPrintHelp_SeqAIJ(Mat); 1321 static int MatPrintHelp_MPIAIJ(Mat A) 1322 { 1323 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1324 1325 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1326 else return 0; 1327 } 1328 1329 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1330 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1331 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1332 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1333 /* -------------------------------------------------------------------*/ 1334 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1335 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1336 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1337 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1338 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1339 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1340 MatLUFactor_MPIAIJ,0, 1341 MatRelax_MPIAIJ, 1342 MatTranspose_MPIAIJ, 1343 MatGetInfo_MPIAIJ,0, 1344 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1345 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1346 0, 1347 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1348 MatGetReordering_MPIAIJ, 1349 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1350 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1351 MatILUFactorSymbolic_MPIAIJ,0, 1352 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1353 0,0,0, 1354 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1355 MatPrintHelp_MPIAIJ, 1356 MatScale_MPIAIJ}; 1357 1358 /*@C 1359 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1360 (the default parallel PETSc format). For good matrix assembly performance 1361 the user should preallocate the matrix storage by setting the parameters 1362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1363 performance can be increased by more than a factor of 50. 1364 1365 Input Parameters: 1366 . comm - MPI communicator 1367 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1368 . n - number of local columns (or PETSC_DECIDE to have calculated 1369 if N is given) 1370 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1371 . N - number of global columns (or PETSC_DECIDE to have calculated 1372 if n is given) 1373 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1374 (same for all local rows) 1375 . d_nzz - array containing the number of nonzeros in the various rows of the 1376 diagonal portion of local submatrix (possibly different for each row) 1377 or PETSC_NULL. You must leave room for the diagonal entry even if 1378 it is zero. 1379 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1380 submatrix (same for all local rows). 1381 . o_nzz - array containing the number of nonzeros in the various rows of the 1382 off-diagonal portion of the local submatrix (possibly different for 1383 each row) or PETSC_NULL. 1384 1385 Output Parameter: 1386 . A - the matrix 1387 1388 Notes: 1389 The AIJ format (also called the Yale sparse matrix format or 1390 compressed row storage), is fully compatible with standard Fortran 77 1391 storage. That is, the stored row and column indices can begin at 1392 either one (as in Fortran) or zero. See the users manual for details. 1393 1394 The user MUST specify either the local or global matrix dimensions 1395 (possibly both). 1396 1397 By default, this format uses inodes (identical nodes) when possible. 1398 We search for consecutive rows with the same nonzero structure, thereby 1399 reusing matrix information to achieve increased efficiency. 1400 1401 Options Database Keys: 1402 $ -mat_aij_no_inode - Do not use inodes 1403 $ -mat_aij_inode_limit <limit> - Set inode limit. 1404 $ (max limit=5) 1405 $ -mat_aij_oneindex - Internally use indexing starting at 1 1406 $ rather than 0. Note: When calling MatSetValues(), 1407 $ the user still MUST index entries starting at 0! 1408 1409 Storage Information: 1410 For a square global matrix we define each processor's diagonal portion 1411 to be its local rows and the corresponding columns (a square submatrix); 1412 each processor's off-diagonal portion encompasses the remainder of the 1413 local matrix (a rectangular submatrix). 1414 1415 The user can specify preallocated storage for the diagonal part of 1416 the local submatrix with either d_nz or d_nnz (not both). Set 1417 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1418 memory allocation. Likewise, specify preallocated storage for the 1419 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1420 1421 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1422 the figure below we depict these three local rows and all columns (0-11). 1423 1424 $ 0 1 2 3 4 5 6 7 8 9 10 11 1425 $ ------------------- 1426 $ row 3 | o o o d d d o o o o o o 1427 $ row 4 | o o o d d d o o o o o o 1428 $ row 5 | o o o d d d o o o o o o 1429 $ ------------------- 1430 $ 1431 1432 Thus, any entries in the d locations are stored in the d (diagonal) 1433 submatrix, and any entries in the o locations are stored in the 1434 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1435 stored simply in the MATSEQAIJ format for compressed row storage. 1436 1437 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1438 and o_nz should indicate the number of nonzeros per row in the o matrix. 1439 In general, for PDE problems in which most nonzeros are near the diagonal, 1440 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1441 or you will get TERRIBLE performance, see the users' manual chapter on 1442 matrices and the file $(PETSC_DIR)/Performance. 1443 1444 .keywords: matrix, aij, compressed row, sparse, parallel 1445 1446 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1447 @*/ 1448 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1449 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1450 { 1451 Mat B; 1452 Mat_MPIAIJ *b; 1453 int ierr, i,sum[2],work[2]; 1454 1455 *A = 0; 1456 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1457 PLogObjectCreate(B); 1458 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1459 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1460 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1461 B->destroy = MatDestroy_MPIAIJ; 1462 B->view = MatView_MPIAIJ; 1463 B->factor = 0; 1464 B->assembled = PETSC_FALSE; 1465 1466 b->insertmode = NOT_SET_VALUES; 1467 MPI_Comm_rank(comm,&b->rank); 1468 MPI_Comm_size(comm,&b->size); 1469 1470 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1471 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1472 1473 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1474 work[0] = m; work[1] = n; 1475 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1476 if (M == PETSC_DECIDE) M = sum[0]; 1477 if (N == PETSC_DECIDE) N = sum[1]; 1478 } 1479 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1480 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1481 b->m = m; B->m = m; 1482 b->n = n; B->n = n; 1483 b->N = N; B->N = N; 1484 b->M = M; B->M = M; 1485 1486 /* build local table of row and column ownerships */ 1487 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1488 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1489 b->cowners = b->rowners + b->size + 1; 1490 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1491 b->rowners[0] = 0; 1492 for ( i=2; i<=b->size; i++ ) { 1493 b->rowners[i] += b->rowners[i-1]; 1494 } 1495 b->rstart = b->rowners[b->rank]; 1496 b->rend = b->rowners[b->rank+1]; 1497 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1498 b->cowners[0] = 0; 1499 for ( i=2; i<=b->size; i++ ) { 1500 b->cowners[i] += b->cowners[i-1]; 1501 } 1502 b->cstart = b->cowners[b->rank]; 1503 b->cend = b->cowners[b->rank+1]; 1504 1505 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1506 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1507 PLogObjectParent(B,b->A); 1508 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1509 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1510 PLogObjectParent(B,b->B); 1511 1512 /* build cache for off array entries formed */ 1513 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1514 b->colmap = 0; 1515 b->garray = 0; 1516 b->roworiented = 1; 1517 1518 /* stuff used for matrix vector multiply */ 1519 b->lvec = 0; 1520 b->Mvctx = 0; 1521 1522 /* stuff for MatGetRow() */ 1523 b->rowindices = 0; 1524 b->rowvalues = 0; 1525 b->getrowactive = PETSC_FALSE; 1526 1527 *A = B; 1528 return 0; 1529 } 1530 1531 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1532 { 1533 Mat mat; 1534 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1535 int ierr, len=0, flg; 1536 1537 *newmat = 0; 1538 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1539 PLogObjectCreate(mat); 1540 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1541 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1542 mat->destroy = MatDestroy_MPIAIJ; 1543 mat->view = MatView_MPIAIJ; 1544 mat->factor = matin->factor; 1545 mat->assembled = PETSC_TRUE; 1546 1547 a->m = mat->m = oldmat->m; 1548 a->n = mat->n = oldmat->n; 1549 a->M = mat->M = oldmat->M; 1550 a->N = mat->N = oldmat->N; 1551 1552 a->rstart = oldmat->rstart; 1553 a->rend = oldmat->rend; 1554 a->cstart = oldmat->cstart; 1555 a->cend = oldmat->cend; 1556 a->size = oldmat->size; 1557 a->rank = oldmat->rank; 1558 a->insertmode = NOT_SET_VALUES; 1559 a->rowvalues = 0; 1560 a->getrowactive = PETSC_FALSE; 1561 1562 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1563 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1564 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1565 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1566 if (oldmat->colmap) { 1567 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1568 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1569 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1570 } else a->colmap = 0; 1571 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1572 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1573 PLogObjectMemory(mat,len*sizeof(int)); 1574 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1575 } else a->garray = 0; 1576 1577 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1578 PLogObjectParent(mat,a->lvec); 1579 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1580 PLogObjectParent(mat,a->Mvctx); 1581 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1582 PLogObjectParent(mat,a->A); 1583 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1584 PLogObjectParent(mat,a->B); 1585 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1586 if (flg) { 1587 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1588 } 1589 *newmat = mat; 1590 return 0; 1591 } 1592 1593 #include "sys.h" 1594 1595 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1596 { 1597 Mat A; 1598 int i, nz, ierr, j,rstart, rend, fd; 1599 Scalar *vals,*svals; 1600 MPI_Comm comm = ((PetscObject)viewer)->comm; 1601 MPI_Status status; 1602 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1603 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1604 int tag = ((PetscObject)viewer)->tag; 1605 1606 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1607 if (!rank) { 1608 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1609 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1610 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1611 } 1612 1613 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1614 M = header[1]; N = header[2]; 1615 /* determine ownership of all rows */ 1616 m = M/size + ((M % size) > rank); 1617 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1618 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1619 rowners[0] = 0; 1620 for ( i=2; i<=size; i++ ) { 1621 rowners[i] += rowners[i-1]; 1622 } 1623 rstart = rowners[rank]; 1624 rend = rowners[rank+1]; 1625 1626 /* distribute row lengths to all processors */ 1627 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1628 offlens = ourlens + (rend-rstart); 1629 if (!rank) { 1630 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1631 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1632 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1633 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1634 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1635 PetscFree(sndcounts); 1636 } 1637 else { 1638 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1639 } 1640 1641 if (!rank) { 1642 /* calculate the number of nonzeros on each processor */ 1643 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1644 PetscMemzero(procsnz,size*sizeof(int)); 1645 for ( i=0; i<size; i++ ) { 1646 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1647 procsnz[i] += rowlengths[j]; 1648 } 1649 } 1650 PetscFree(rowlengths); 1651 1652 /* determine max buffer needed and allocate it */ 1653 maxnz = 0; 1654 for ( i=0; i<size; i++ ) { 1655 maxnz = PetscMax(maxnz,procsnz[i]); 1656 } 1657 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1658 1659 /* read in my part of the matrix column indices */ 1660 nz = procsnz[0]; 1661 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1662 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1663 1664 /* read in every one elses and ship off */ 1665 for ( i=1; i<size; i++ ) { 1666 nz = procsnz[i]; 1667 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1668 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1669 } 1670 PetscFree(cols); 1671 } 1672 else { 1673 /* determine buffer space needed for message */ 1674 nz = 0; 1675 for ( i=0; i<m; i++ ) { 1676 nz += ourlens[i]; 1677 } 1678 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1679 1680 /* receive message of column indices*/ 1681 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1682 MPI_Get_count(&status,MPI_INT,&maxnz); 1683 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1684 } 1685 1686 /* loop over local rows, determining number of off diagonal entries */ 1687 PetscMemzero(offlens,m*sizeof(int)); 1688 jj = 0; 1689 for ( i=0; i<m; i++ ) { 1690 for ( j=0; j<ourlens[i]; j++ ) { 1691 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1692 jj++; 1693 } 1694 } 1695 1696 /* create our matrix */ 1697 for ( i=0; i<m; i++ ) { 1698 ourlens[i] -= offlens[i]; 1699 } 1700 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1701 A = *newmat; 1702 MatSetOption(A,COLUMNS_SORTED); 1703 for ( i=0; i<m; i++ ) { 1704 ourlens[i] += offlens[i]; 1705 } 1706 1707 if (!rank) { 1708 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1709 1710 /* read in my part of the matrix numerical values */ 1711 nz = procsnz[0]; 1712 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1713 1714 /* insert into matrix */ 1715 jj = rstart; 1716 smycols = mycols; 1717 svals = vals; 1718 for ( i=0; i<m; i++ ) { 1719 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1720 smycols += ourlens[i]; 1721 svals += ourlens[i]; 1722 jj++; 1723 } 1724 1725 /* read in other processors and ship out */ 1726 for ( i=1; i<size; i++ ) { 1727 nz = procsnz[i]; 1728 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1729 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1730 } 1731 PetscFree(procsnz); 1732 } 1733 else { 1734 /* receive numeric values */ 1735 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1736 1737 /* receive message of values*/ 1738 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1739 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1740 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1741 1742 /* insert into matrix */ 1743 jj = rstart; 1744 smycols = mycols; 1745 svals = vals; 1746 for ( i=0; i<m; i++ ) { 1747 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1748 smycols += ourlens[i]; 1749 svals += ourlens[i]; 1750 jj++; 1751 } 1752 } 1753 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1754 1755 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1756 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1757 return 0; 1758 } 1759