1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.129 1996/02/28 00:12:20 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 return MatGetDiagonal(a->A,v); 508 } 509 510 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511 { 512 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516 return 0; 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526 #endif 527 PetscFree(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 529 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 530 if (aij->colmap) PetscFree(aij->colmap); 531 if (aij->garray) PetscFree(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534 if (aij->rowvalues) PetscFree(aij->rowvalues); 535 PetscFree(aij); 536 PLogObjectDestroy(mat); 537 PetscHeaderDestroy(mat); 538 return 0; 539 } 540 #include "draw.h" 541 #include "pinclude/pviewer.h" 542 543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (aij->size == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552 return 0; 553 } 554 555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559 int ierr, format,shift = C->indexshift,rank; 560 PetscObject vobj = (PetscObject) viewer; 561 FILE *fd; 562 563 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 564 ierr = ViewerFileGetFormat_Private(viewer,&format); 565 if (format == FILE_FORMAT_INFO_DETAILED) { 566 int nz, nzalloc, mem, flg; 567 MPI_Comm_rank(mat->comm,&rank); 568 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 569 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 570 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 571 MPIU_Seq_begin(mat->comm,1); 572 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 573 rank,aij->m,nz,nzalloc,mem); 574 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 575 rank,aij->m,nz,nzalloc,mem); 576 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 577 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 578 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 579 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 580 fflush(fd); 581 MPIU_Seq_end(mat->comm,1); 582 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 583 return 0; 584 } 585 else if (format == FILE_FORMAT_INFO) { 586 return 0; 587 } 588 } 589 590 if (vobj->type == ASCII_FILE_VIEWER) { 591 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 592 MPIU_Seq_begin(mat->comm,1); 593 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 594 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 595 aij->cend); 596 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 597 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 598 fflush(fd); 599 MPIU_Seq_end(mat->comm,1); 600 } 601 else { 602 int size = aij->size; 603 rank = aij->rank; 604 if (size == 1) { 605 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 606 } 607 else { 608 /* assemble the entire matrix onto first processor. */ 609 Mat A; 610 Mat_SeqAIJ *Aloc; 611 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 612 Scalar *a; 613 614 if (!rank) { 615 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 616 CHKERRQ(ierr); 617 } 618 else { 619 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 620 CHKERRQ(ierr); 621 } 622 PLogObjectParent(mat,A); 623 624 /* copy over the A part */ 625 Aloc = (Mat_SeqAIJ*) aij->A->data; 626 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 627 row = aij->rstart; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 629 for ( i=0; i<m; i++ ) { 630 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 631 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 632 } 633 aj = Aloc->j; 634 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 635 636 /* copy over the B part */ 637 Aloc = (Mat_SeqAIJ*) aij->B->data; 638 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 639 row = aij->rstart; 640 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 641 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 642 for ( i=0; i<m; i++ ) { 643 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 644 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 645 } 646 PetscFree(ct); 647 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 648 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 651 } 652 ierr = MatDestroy(A); CHKERRQ(ierr); 653 } 654 } 655 return 0; 656 } 657 658 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 659 { 660 Mat mat = (Mat) obj; 661 int ierr; 662 PetscObject vobj = (PetscObject) viewer; 663 664 if (!viewer) { 665 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 666 } 667 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 675 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 676 } 677 else if (vobj->cookie == DRAW_COOKIE) { 678 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679 } 680 else if (vobj->type == BINARY_FILE_VIEWER) { 681 return MatView_MPIAIJ_Binary(mat,viewer); 682 } 683 return 0; 684 } 685 686 extern int MatMarkDiag_SeqAIJ(Mat); 687 /* 688 This has to provide several versions. 689 690 1) per sequential 691 2) a) use only local smoothing updating outer values only once. 692 b) local smoothing updating outer values each inner iteration 693 3) color updating out values betwen colors. 694 */ 695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696 double fshift,int its,Vec xx) 697 { 698 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699 Mat AA = mat->A, BB = mat->B; 700 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 701 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 702 int ierr,*idx, *diag; 703 int n = mat->n, m = mat->m, i,shift = A->indexshift; 704 Vec tt; 705 706 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707 xs = x + shift; /* shift by one for index start of 1 */ 708 ls = ls + shift; 709 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710 diag = A->diag; 711 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 712 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713 } 714 if (flag & SOR_EISENSTAT) { 715 /* Let A = L + U + D; where L is lower trianglar, 716 U is upper triangular, E is diagonal; This routine applies 717 718 (L + E)^{-1} A (U + E)^{-1} 719 720 to a vector efficiently using Eisenstat's trick. This is for 721 the case of SSOR preconditioner, so E is D/omega where omega 722 is the relaxation factor. 723 */ 724 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725 PLogObjectParent(matin,tt); 726 VecGetArray(tt,&t); 727 scale = (2.0/omega) - 1.0; 728 /* x = (E + U)^{-1} b */ 729 VecSet(&zero,mat->lvec); 730 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 731 mat->Mvctx); CHKERRQ(ierr); 732 for ( i=m-1; i>-1; i-- ) { 733 n = A->i[i+1] - diag[i] - 1; 734 idx = A->j + diag[i] + !shift; 735 v = A->a + diag[i] + !shift; 736 sum = b[i]; 737 SPARSEDENSEMDOT(sum,xs,v,idx,n); 738 d = fshift + A->a[diag[i]+shift]; 739 n = B->i[i+1] - B->i[i]; 740 idx = B->j + B->i[i] + shift; 741 v = B->a + B->i[i] + shift; 742 SPARSEDENSEMDOT(sum,ls,v,idx,n); 743 x[i] = omega*(sum/d); 744 } 745 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 746 mat->Mvctx); CHKERRQ(ierr); 747 748 /* t = b - (2*E - D)x */ 749 v = A->a; 750 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751 752 /* t = (E + L)^{-1}t */ 753 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754 diag = A->diag; 755 VecSet(&zero,mat->lvec); 756 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 for ( i=0; i<m; i++ ) { 759 n = diag[i] - A->i[i]; 760 idx = A->j + A->i[i] + shift; 761 v = A->a + A->i[i] + shift; 762 sum = t[i]; 763 SPARSEDENSEMDOT(sum,ts,v,idx,n); 764 d = fshift + A->a[diag[i]+shift]; 765 n = B->i[i+1] - B->i[i]; 766 idx = B->j + B->i[i] + shift; 767 v = B->a + B->i[i] + shift; 768 SPARSEDENSEMDOT(sum,ls,v,idx,n); 769 t[i] = omega*(sum/d); 770 } 771 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 772 mat->Mvctx); CHKERRQ(ierr); 773 /* x = x + t */ 774 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775 VecDestroy(tt); 776 return 0; 777 } 778 779 780 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783 } 784 else { 785 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 788 CHKERRQ(ierr); 789 } 790 while (its--) { 791 /* go down through the rows */ 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=0; i<m; i++ ) { 795 n = A->i[i+1] - A->i[i]; 796 idx = A->j + A->i[i] + shift; 797 v = A->a + A->i[i] + shift; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = fshift + A->a[diag[i]+shift]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] + shift; 803 v = B->a + B->i[i] + shift; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 808 mat->Mvctx); CHKERRQ(ierr); 809 /* come up through the rows */ 810 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 811 mat->Mvctx); CHKERRQ(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 826 mat->Mvctx); CHKERRQ(ierr); 827 } 828 } 829 else if (flag & SOR_FORWARD_SWEEP){ 830 if (flag & SOR_ZERO_INITIAL_GUESS) { 831 VecSet(&zero,mat->lvec); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=0; i<m; i++ ) { 835 n = diag[i] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = omega*(sum/d); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 its--; 850 } 851 while (its--) { 852 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 855 CHKERRQ(ierr); 856 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 857 mat->Mvctx); CHKERRQ(ierr); 858 for ( i=0; i<m; i++ ) { 859 n = A->i[i+1] - A->i[i]; 860 idx = A->j + A->i[i] + shift; 861 v = A->a + A->i[i] + shift; 862 sum = b[i]; 863 SPARSEDENSEMDOT(sum,xs,v,idx,n); 864 d = fshift + A->a[diag[i]+shift]; 865 n = B->i[i+1] - B->i[i]; 866 idx = B->j + B->i[i] + shift; 867 v = B->a + B->i[i] + shift; 868 SPARSEDENSEMDOT(sum,ls,v,idx,n); 869 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870 } 871 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 872 mat->Mvctx); CHKERRQ(ierr); 873 } 874 } 875 else if (flag & SOR_BACKWARD_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 VecSet(&zero,mat->lvec); 878 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 879 mat->Mvctx); CHKERRQ(ierr); 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - diag[i] - 1; 882 idx = A->j + diag[i] + !shift; 883 v = A->a + diag[i] + !shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = omega*(sum/d); 892 } 893 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 894 mat->Mvctx); CHKERRQ(ierr); 895 its--; 896 } 897 while (its--) { 898 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 901 mat->Mvctx); CHKERRQ(ierr); 902 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 903 mat->Mvctx); CHKERRQ(ierr); 904 for ( i=m-1; i>-1; i-- ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916 } 917 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 918 mat->Mvctx); CHKERRQ(ierr); 919 } 920 } 921 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922 if (flag & SOR_ZERO_INITIAL_GUESS) { 923 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 928 CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 967 CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981 } 982 } 983 } 984 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012 int *nzalloc,int *mem) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 Mat A = mat->A, B = mat->B; 1016 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017 1018 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1019 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021 if (flag == MAT_LOCAL) { 1022 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1023 } else if (flag == MAT_GLOBAL_MAX) { 1024 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1025 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026 } else if (flag == MAT_GLOBAL_SUM) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1028 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1029 } 1030 return 0; 1031 } 1032 1033 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1034 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1035 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1036 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1037 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1038 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1039 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1040 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1041 1042 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1045 1046 if (op == NO_NEW_NONZERO_LOCATIONS || 1047 op == YES_NEW_NONZERO_LOCATIONS || 1048 op == COLUMNS_SORTED || 1049 op == ROW_ORIENTED) { 1050 MatSetOption(a->A,op); 1051 MatSetOption(a->B,op); 1052 } 1053 else if (op == ROWS_SORTED || 1054 op == SYMMETRIC_MATRIX || 1055 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1056 op == YES_NEW_DIAGONALS) 1057 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1058 else if (op == COLUMN_ORIENTED) { 1059 a->roworiented = 0; 1060 MatSetOption(a->A,op); 1061 MatSetOption(a->B,op); 1062 } 1063 else if (op == NO_NEW_DIAGONALS) 1064 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1065 else 1066 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1067 return 0; 1068 } 1069 1070 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1071 { 1072 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1073 *m = mat->M; *n = mat->N; 1074 return 0; 1075 } 1076 1077 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->m; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->rstart; *n = mat->rend; 1088 return 0; 1089 } 1090 1091 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1092 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1093 1094 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1095 { 1096 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1097 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1098 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1099 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1100 int *cmap, *idx_p; 1101 1102 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1103 mat->getrowactive = PETSC_TRUE; 1104 1105 if (!mat->rowvalues && (idx || v)) { 1106 /* 1107 allocate enough space to hold information from the longest row. 1108 */ 1109 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1110 int max = 1,n = mat->n,tmp; 1111 for ( i=0; i<n; i++ ) { 1112 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1113 if (max < tmp) { max = tmp; } 1114 } 1115 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1116 CHKPTRQ(mat->rowvalues); 1117 mat->rowindices = (int *) (mat->rowvalues + max); 1118 } 1119 1120 1121 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1122 lrow = row - rstart; 1123 1124 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1125 if (!v) {pvA = 0; pvB = 0;} 1126 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1127 ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1128 ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1129 nztot = nzA + nzB; 1130 1131 cmap = mat->garray; 1132 if (v || idx) { 1133 if (nztot) { 1134 /* Sort by increasing column numbers, assuming A and B already sorted */ 1135 int imark = -1; 1136 if (v) { 1137 *v = v_p = mat->rowvalues; 1138 for ( i=0; i<nzB; i++ ) { 1139 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1140 else break; 1141 } 1142 imark = i; 1143 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1144 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1145 } 1146 if (idx) { 1147 *idx = idx_p = mat->rowindices; 1148 if (imark > -1) { 1149 for ( i=0; i<imark; i++ ) { 1150 idx_p[i] = cmap[cworkB[i]]; 1151 } 1152 } else { 1153 for ( i=0; i<nzB; i++ ) { 1154 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1155 else break; 1156 } 1157 imark = i; 1158 } 1159 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1160 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1161 } 1162 } 1163 else {*idx = 0; *v=0;} 1164 } 1165 *nz = nztot; 1166 ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1167 ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1168 return 0; 1169 } 1170 1171 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1172 { 1173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1174 if (aij->getrowactive == PETSC_FALSE) { 1175 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1176 } 1177 aij->getrowactive = PETSC_FALSE; 1178 return 0; 1179 } 1180 1181 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1184 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1185 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1186 double sum = 0.0; 1187 Scalar *v; 1188 1189 if (aij->size == 1) { 1190 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1191 } else { 1192 if (type == NORM_FROBENIUS) { 1193 v = amat->a; 1194 for (i=0; i<amat->nz; i++ ) { 1195 #if defined(PETSC_COMPLEX) 1196 sum += real(conj(*v)*(*v)); v++; 1197 #else 1198 sum += (*v)*(*v); v++; 1199 #endif 1200 } 1201 v = bmat->a; 1202 for (i=0; i<bmat->nz; i++ ) { 1203 #if defined(PETSC_COMPLEX) 1204 sum += real(conj(*v)*(*v)); v++; 1205 #else 1206 sum += (*v)*(*v); v++; 1207 #endif 1208 } 1209 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1210 *norm = sqrt(*norm); 1211 } 1212 else if (type == NORM_1) { /* max column norm */ 1213 double *tmp, *tmp2; 1214 int *jj, *garray = aij->garray; 1215 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1216 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1217 PetscMemzero(tmp,aij->N*sizeof(double)); 1218 *norm = 0.0; 1219 v = amat->a; jj = amat->j; 1220 for ( j=0; j<amat->nz; j++ ) { 1221 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1222 } 1223 v = bmat->a; jj = bmat->j; 1224 for ( j=0; j<bmat->nz; j++ ) { 1225 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1226 } 1227 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1228 for ( j=0; j<aij->N; j++ ) { 1229 if (tmp2[j] > *norm) *norm = tmp2[j]; 1230 } 1231 PetscFree(tmp); PetscFree(tmp2); 1232 } 1233 else if (type == NORM_INFINITY) { /* max row norm */ 1234 double ntemp = 0.0; 1235 for ( j=0; j<amat->m; j++ ) { 1236 v = amat->a + amat->i[j] + shift; 1237 sum = 0.0; 1238 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1239 sum += PetscAbsScalar(*v); v++; 1240 } 1241 v = bmat->a + bmat->i[j] + shift; 1242 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1243 sum += PetscAbsScalar(*v); v++; 1244 } 1245 if (sum > ntemp) ntemp = sum; 1246 } 1247 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1248 } 1249 else { 1250 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1251 } 1252 } 1253 return 0; 1254 } 1255 1256 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1257 { 1258 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1259 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1260 int ierr,shift = Aloc->indexshift; 1261 Mat B; 1262 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1263 Scalar *array; 1264 1265 if (matout == PETSC_NULL && M != N) 1266 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1267 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1268 PETSC_NULL,&B); CHKERRQ(ierr); 1269 1270 /* copy over the A part */ 1271 Aloc = (Mat_SeqAIJ*) a->A->data; 1272 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1273 row = a->rstart; 1274 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1275 for ( i=0; i<m; i++ ) { 1276 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1277 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1278 } 1279 aj = Aloc->j; 1280 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1281 1282 /* copy over the B part */ 1283 Aloc = (Mat_SeqAIJ*) a->B->data; 1284 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1285 row = a->rstart; 1286 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1287 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1288 for ( i=0; i<m; i++ ) { 1289 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1290 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1291 } 1292 PetscFree(ct); 1293 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1294 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1295 if (matout != PETSC_NULL) { 1296 *matout = B; 1297 } else { 1298 /* This isn't really an in-place transpose .... but free data structures from a */ 1299 PetscFree(a->rowners); 1300 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1301 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1302 if (a->colmap) PetscFree(a->colmap); 1303 if (a->garray) PetscFree(a->garray); 1304 if (a->lvec) VecDestroy(a->lvec); 1305 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1306 PetscFree(a); 1307 PetscMemcpy(A,B,sizeof(struct _Mat)); 1308 PetscHeaderDestroy(B); 1309 } 1310 return 0; 1311 } 1312 1313 extern int MatPrintHelp_SeqAIJ(Mat); 1314 static int MatPrintHelp_MPIAIJ(Mat A) 1315 { 1316 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1317 1318 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1319 else return 0; 1320 } 1321 1322 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1323 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1324 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1325 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1326 /* -------------------------------------------------------------------*/ 1327 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1328 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1329 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1330 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1331 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1332 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1333 MatLUFactor_MPIAIJ,0, 1334 MatRelax_MPIAIJ, 1335 MatTranspose_MPIAIJ, 1336 MatGetInfo_MPIAIJ,0, 1337 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1338 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1339 0, 1340 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1341 MatGetReordering_MPIAIJ, 1342 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1343 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1344 MatILUFactorSymbolic_MPIAIJ,0, 1345 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1346 0,0,0, 1347 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1348 MatPrintHelp_MPIAIJ, 1349 MatScale_MPIAIJ}; 1350 1351 /*@C 1352 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1353 (the default parallel PETSc format). For good matrix assembly performance 1354 the user should preallocate the matrix storage by setting the parameters 1355 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1356 performance can be increased by more than a factor of 50. 1357 1358 Input Parameters: 1359 . comm - MPI communicator 1360 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1361 . n - number of local columns (or PETSC_DECIDE to have calculated 1362 if N is given) 1363 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1364 . N - number of global columns (or PETSC_DECIDE to have calculated 1365 if n is given) 1366 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1367 (same for all local rows) 1368 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1369 or null (possibly different for each row). You must leave room 1370 for the diagonal entry even if it is zero. 1371 . o_nz - number of nonzeros per row in off-diagonal portion of local 1372 submatrix (same for all local rows). 1373 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1374 submatrix or null (possibly different for each row). 1375 1376 Output Parameter: 1377 . newmat - the matrix 1378 1379 Notes: 1380 The AIJ format (also called the Yale sparse matrix format or 1381 compressed row storage), is fully compatible with standard Fortran 77 1382 storage. That is, the stored row and column indices can begin at 1383 either one (as in Fortran) or zero. See the users manual for details. 1384 1385 The user MUST specify either the local or global matrix dimensions 1386 (possibly both). 1387 1388 By default, this format uses inodes (identical nodes) when possible. 1389 We search for consecutive rows with the same nonzero structure, thereby 1390 reusing matrix information to achieve increased efficiency. 1391 1392 Options Database Keys: 1393 $ -mat_aij_no_inode - Do not use inodes 1394 $ -mat_aij_inode_limit <limit> - Set inode limit. 1395 $ (max limit=5) 1396 $ -mat_aij_oneindex - Internally use indexing starting at 1 1397 $ rather than 0. Note: When calling MatSetValues(), 1398 $ the user still MUST index entries starting at 0! 1399 1400 Storage Information: 1401 For a square global matrix we define each processor's diagonal portion 1402 to be its local rows and the corresponding columns (a square submatrix); 1403 each processor's off-diagonal portion encompasses the remainder of the 1404 local matrix (a rectangular submatrix). 1405 1406 The user can specify preallocated storage for the diagonal part of 1407 the local submatrix with either d_nz or d_nnz (not both). Set 1408 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1409 memory allocation. Likewise, specify preallocated storage for the 1410 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1411 1412 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1413 the figure below we depict these three local rows and all columns (0-11). 1414 1415 $ 0 1 2 3 4 5 6 7 8 9 10 11 1416 $ ------------------- 1417 $ row 3 | o o o d d d o o o o o o 1418 $ row 4 | o o o d d d o o o o o o 1419 $ row 5 | o o o d d d o o o o o o 1420 $ ------------------- 1421 $ 1422 1423 Thus, any entries in the d locations are stored in the d (diagonal) 1424 submatrix, and any entries in the o locations are stored in the 1425 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1426 stored simply in the MATSEQAIJ format for compressed row storage. 1427 1428 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1429 and o_nz should indicate the number of nonzeros per row in the o matrix. 1430 In general, for PDE problems in which most nonzeros are near the diagonal, 1431 one expects d_nz >> o_nz. For additional details, see the users manual 1432 chapter on matrices and the file $(PETSC_DIR)/Performance. 1433 1434 .keywords: matrix, aij, compressed row, sparse, parallel 1435 1436 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1437 @*/ 1438 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1439 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1440 { 1441 Mat mat; 1442 Mat_MPIAIJ *a; 1443 int ierr, i,sum[2],work[2]; 1444 1445 *newmat = 0; 1446 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1447 PLogObjectCreate(mat); 1448 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1449 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1450 mat->destroy = MatDestroy_MPIAIJ; 1451 mat->view = MatView_MPIAIJ; 1452 mat->factor = 0; 1453 mat->assembled = PETSC_FALSE; 1454 1455 a->insertmode = NOT_SET_VALUES; 1456 MPI_Comm_rank(comm,&a->rank); 1457 MPI_Comm_size(comm,&a->size); 1458 1459 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1460 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1461 1462 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1463 work[0] = m; work[1] = n; 1464 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1465 if (M == PETSC_DECIDE) M = sum[0]; 1466 if (N == PETSC_DECIDE) N = sum[1]; 1467 } 1468 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1469 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1470 a->m = m; 1471 a->n = n; 1472 a->N = N; 1473 a->M = M; 1474 1475 /* build local table of row and column ownerships */ 1476 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1477 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1478 a->cowners = a->rowners + a->size + 1; 1479 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1480 a->rowners[0] = 0; 1481 for ( i=2; i<=a->size; i++ ) { 1482 a->rowners[i] += a->rowners[i-1]; 1483 } 1484 a->rstart = a->rowners[a->rank]; 1485 a->rend = a->rowners[a->rank+1]; 1486 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1487 a->cowners[0] = 0; 1488 for ( i=2; i<=a->size; i++ ) { 1489 a->cowners[i] += a->cowners[i-1]; 1490 } 1491 a->cstart = a->cowners[a->rank]; 1492 a->cend = a->cowners[a->rank+1]; 1493 1494 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1495 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1496 PLogObjectParent(mat,a->A); 1497 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1498 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1499 PLogObjectParent(mat,a->B); 1500 1501 /* build cache for off array entries formed */ 1502 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1503 a->colmap = 0; 1504 a->garray = 0; 1505 a->roworiented = 1; 1506 1507 /* stuff used for matrix vector multiply */ 1508 a->lvec = 0; 1509 a->Mvctx = 0; 1510 1511 /* stuff for MatGetRow() */ 1512 a->rowindices = 0; 1513 a->rowvalues = 0; 1514 a->getrowactive = PETSC_FALSE; 1515 1516 *newmat = mat; 1517 return 0; 1518 } 1519 1520 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1521 { 1522 Mat mat; 1523 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1524 int ierr, len,flg; 1525 1526 *newmat = 0; 1527 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1528 PLogObjectCreate(mat); 1529 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1530 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1531 mat->destroy = MatDestroy_MPIAIJ; 1532 mat->view = MatView_MPIAIJ; 1533 mat->factor = matin->factor; 1534 mat->assembled = PETSC_TRUE; 1535 1536 a->m = oldmat->m; 1537 a->n = oldmat->n; 1538 a->M = oldmat->M; 1539 a->N = oldmat->N; 1540 1541 a->rstart = oldmat->rstart; 1542 a->rend = oldmat->rend; 1543 a->cstart = oldmat->cstart; 1544 a->cend = oldmat->cend; 1545 a->size = oldmat->size; 1546 a->rank = oldmat->rank; 1547 a->insertmode = NOT_SET_VALUES; 1548 1549 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1550 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1551 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1552 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1553 if (oldmat->colmap) { 1554 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1555 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1556 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1557 } else a->colmap = 0; 1558 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1559 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1560 PLogObjectMemory(mat,len*sizeof(int)); 1561 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1562 } else a->garray = 0; 1563 1564 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1565 PLogObjectParent(mat,a->lvec); 1566 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1567 PLogObjectParent(mat,a->Mvctx); 1568 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1569 PLogObjectParent(mat,a->A); 1570 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1571 PLogObjectParent(mat,a->B); 1572 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1573 if (flg) { 1574 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1575 } 1576 *newmat = mat; 1577 return 0; 1578 } 1579 1580 #include "sysio.h" 1581 1582 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1583 { 1584 Mat A; 1585 int i, nz, ierr, j,rstart, rend, fd; 1586 Scalar *vals,*svals; 1587 PetscObject vobj = (PetscObject) bview; 1588 MPI_Comm comm = vobj->comm; 1589 MPI_Status status; 1590 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1591 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1592 int tag = ((PetscObject)bview)->tag; 1593 1594 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1595 if (!rank) { 1596 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1597 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1598 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1599 } 1600 1601 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1602 M = header[1]; N = header[2]; 1603 /* determine ownership of all rows */ 1604 m = M/size + ((M % size) > rank); 1605 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1606 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1607 rowners[0] = 0; 1608 for ( i=2; i<=size; i++ ) { 1609 rowners[i] += rowners[i-1]; 1610 } 1611 rstart = rowners[rank]; 1612 rend = rowners[rank+1]; 1613 1614 /* distribute row lengths to all processors */ 1615 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1616 offlens = ourlens + (rend-rstart); 1617 if (!rank) { 1618 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1619 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1620 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1621 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1622 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1623 PetscFree(sndcounts); 1624 } 1625 else { 1626 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1627 } 1628 1629 if (!rank) { 1630 /* calculate the number of nonzeros on each processor */ 1631 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1632 PetscMemzero(procsnz,size*sizeof(int)); 1633 for ( i=0; i<size; i++ ) { 1634 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1635 procsnz[i] += rowlengths[j]; 1636 } 1637 } 1638 PetscFree(rowlengths); 1639 1640 /* determine max buffer needed and allocate it */ 1641 maxnz = 0; 1642 for ( i=0; i<size; i++ ) { 1643 maxnz = PetscMax(maxnz,procsnz[i]); 1644 } 1645 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1646 1647 /* read in my part of the matrix column indices */ 1648 nz = procsnz[0]; 1649 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1650 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1651 1652 /* read in every one elses and ship off */ 1653 for ( i=1; i<size; i++ ) { 1654 nz = procsnz[i]; 1655 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1656 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1657 } 1658 PetscFree(cols); 1659 } 1660 else { 1661 /* determine buffer space needed for message */ 1662 nz = 0; 1663 for ( i=0; i<m; i++ ) { 1664 nz += ourlens[i]; 1665 } 1666 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1667 1668 /* receive message of column indices*/ 1669 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1670 MPI_Get_count(&status,MPI_INT,&maxnz); 1671 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1672 } 1673 1674 /* loop over local rows, determining number of off diagonal entries */ 1675 PetscMemzero(offlens,m*sizeof(int)); 1676 jj = 0; 1677 for ( i=0; i<m; i++ ) { 1678 for ( j=0; j<ourlens[i]; j++ ) { 1679 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1680 jj++; 1681 } 1682 } 1683 1684 /* create our matrix */ 1685 for ( i=0; i<m; i++ ) { 1686 ourlens[i] -= offlens[i]; 1687 } 1688 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1689 A = *newmat; 1690 MatSetOption(A,COLUMNS_SORTED); 1691 for ( i=0; i<m; i++ ) { 1692 ourlens[i] += offlens[i]; 1693 } 1694 1695 if (!rank) { 1696 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1697 1698 /* read in my part of the matrix numerical values */ 1699 nz = procsnz[0]; 1700 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1701 1702 /* insert into matrix */ 1703 jj = rstart; 1704 smycols = mycols; 1705 svals = vals; 1706 for ( i=0; i<m; i++ ) { 1707 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1708 smycols += ourlens[i]; 1709 svals += ourlens[i]; 1710 jj++; 1711 } 1712 1713 /* read in other processors and ship out */ 1714 for ( i=1; i<size; i++ ) { 1715 nz = procsnz[i]; 1716 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1717 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1718 } 1719 PetscFree(procsnz); 1720 } 1721 else { 1722 /* receive numeric values */ 1723 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1724 1725 /* receive message of values*/ 1726 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1727 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1728 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1729 1730 /* insert into matrix */ 1731 jj = rstart; 1732 smycols = mycols; 1733 svals = vals; 1734 for ( i=0; i<m; i++ ) { 1735 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1736 smycols += ourlens[i]; 1737 svals += ourlens[i]; 1738 jj++; 1739 } 1740 } 1741 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1742 1743 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1744 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1745 return 0; 1746 } 1747