1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.127 1996/02/23 23:06:31 curfman Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 return MatGetDiagonal(a->A,v); 508 } 509 510 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511 { 512 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516 return 0; 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526 #endif 527 PetscFree(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 529 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 530 if (aij->colmap) PetscFree(aij->colmap); 531 if (aij->garray) PetscFree(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534 if (aij->rowvalues) PetscFree(aij->rowvalues); 535 PetscFree(aij); 536 PLogObjectDestroy(mat); 537 PetscHeaderDestroy(mat); 538 return 0; 539 } 540 #include "draw.h" 541 #include "pinclude/pviewer.h" 542 543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (aij->size == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552 return 0; 553 } 554 555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559 int ierr, format,shift = C->indexshift,rank; 560 PetscObject vobj = (PetscObject) viewer; 561 FILE *fd; 562 563 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 564 ierr = ViewerFileGetFormat_Private(viewer,&format); 565 if (format == FILE_FORMAT_INFO_DETAILED) { 566 int nz, nzalloc, mem, flg; 567 MPI_Comm_rank(mat->comm,&rank); 568 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 569 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 570 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 571 MPIU_Seq_begin(mat->comm,1); 572 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 573 rank,aij->m,nz,nzalloc,mem); 574 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 575 rank,aij->m,nz,nzalloc,mem); 576 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 577 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 578 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 579 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 580 fflush(fd); 581 MPIU_Seq_end(mat->comm,1); 582 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 583 return 0; 584 } 585 else if (format == FILE_FORMAT_INFO) { 586 return 0; 587 } 588 } 589 590 if (vobj->type == ASCII_FILE_VIEWER) { 591 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 592 MPIU_Seq_begin(mat->comm,1); 593 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 594 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 595 aij->cend); 596 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 597 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 598 fflush(fd); 599 MPIU_Seq_end(mat->comm,1); 600 } 601 else { 602 int size = aij->size; 603 rank = aij->rank; 604 if (size == 1) { 605 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 606 } 607 else { 608 /* assemble the entire matrix onto first processor. */ 609 Mat A; 610 Mat_SeqAIJ *Aloc; 611 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 612 Scalar *a; 613 614 if (!rank) { 615 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 616 CHKERRQ(ierr); 617 } 618 else { 619 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 620 CHKERRQ(ierr); 621 } 622 PLogObjectParent(mat,A); 623 624 /* copy over the A part */ 625 Aloc = (Mat_SeqAIJ*) aij->A->data; 626 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 627 row = aij->rstart; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 629 for ( i=0; i<m; i++ ) { 630 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 631 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 632 } 633 aj = Aloc->j; 634 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 635 636 /* copy over the B part */ 637 Aloc = (Mat_SeqAIJ*) aij->B->data; 638 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 639 row = aij->rstart; 640 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 641 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 642 for ( i=0; i<m; i++ ) { 643 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 644 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 645 } 646 PetscFree(ct); 647 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 648 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 651 } 652 ierr = MatDestroy(A); CHKERRQ(ierr); 653 } 654 } 655 return 0; 656 } 657 658 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 659 { 660 Mat mat = (Mat) obj; 661 int ierr; 662 PetscObject vobj = (PetscObject) viewer; 663 664 if (!viewer) { 665 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 666 } 667 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 675 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 676 } 677 else if (vobj->cookie == DRAW_COOKIE) { 678 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679 } 680 else if (vobj->type == BINARY_FILE_VIEWER) { 681 return MatView_MPIAIJ_Binary(mat,viewer); 682 } 683 return 0; 684 } 685 686 extern int MatMarkDiag_SeqAIJ(Mat); 687 /* 688 This has to provide several versions. 689 690 1) per sequential 691 2) a) use only local smoothing updating outer values only once. 692 b) local smoothing updating outer values each inner iteration 693 3) color updating out values betwen colors. 694 */ 695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696 double fshift,int its,Vec xx) 697 { 698 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699 Mat AA = mat->A, BB = mat->B; 700 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 701 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 702 int ierr,*idx, *diag; 703 int n = mat->n, m = mat->m, i,shift = A->indexshift; 704 Vec tt; 705 706 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707 xs = x + shift; /* shift by one for index start of 1 */ 708 ls = ls + shift; 709 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710 diag = A->diag; 711 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 712 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713 } 714 if (flag & SOR_EISENSTAT) { 715 /* Let A = L + U + D; where L is lower trianglar, 716 U is upper triangular, E is diagonal; This routine applies 717 718 (L + E)^{-1} A (U + E)^{-1} 719 720 to a vector efficiently using Eisenstat's trick. This is for 721 the case of SSOR preconditioner, so E is D/omega where omega 722 is the relaxation factor. 723 */ 724 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725 PLogObjectParent(matin,tt); 726 VecGetArray(tt,&t); 727 scale = (2.0/omega) - 1.0; 728 /* x = (E + U)^{-1} b */ 729 VecSet(&zero,mat->lvec); 730 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 731 mat->Mvctx); CHKERRQ(ierr); 732 for ( i=m-1; i>-1; i-- ) { 733 n = A->i[i+1] - diag[i] - 1; 734 idx = A->j + diag[i] + !shift; 735 v = A->a + diag[i] + !shift; 736 sum = b[i]; 737 SPARSEDENSEMDOT(sum,xs,v,idx,n); 738 d = fshift + A->a[diag[i]+shift]; 739 n = B->i[i+1] - B->i[i]; 740 idx = B->j + B->i[i] + shift; 741 v = B->a + B->i[i] + shift; 742 SPARSEDENSEMDOT(sum,ls,v,idx,n); 743 x[i] = omega*(sum/d); 744 } 745 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 746 mat->Mvctx); CHKERRQ(ierr); 747 748 /* t = b - (2*E - D)x */ 749 v = A->a; 750 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751 752 /* t = (E + L)^{-1}t */ 753 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754 diag = A->diag; 755 VecSet(&zero,mat->lvec); 756 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 for ( i=0; i<m; i++ ) { 759 n = diag[i] - A->i[i]; 760 idx = A->j + A->i[i] + shift; 761 v = A->a + A->i[i] + shift; 762 sum = t[i]; 763 SPARSEDENSEMDOT(sum,ts,v,idx,n); 764 d = fshift + A->a[diag[i]+shift]; 765 n = B->i[i+1] - B->i[i]; 766 idx = B->j + B->i[i] + shift; 767 v = B->a + B->i[i] + shift; 768 SPARSEDENSEMDOT(sum,ls,v,idx,n); 769 t[i] = omega*(sum/d); 770 } 771 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 772 mat->Mvctx); CHKERRQ(ierr); 773 /* x = x + t */ 774 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775 VecDestroy(tt); 776 return 0; 777 } 778 779 780 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783 } 784 else { 785 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 788 CHKERRQ(ierr); 789 } 790 while (its--) { 791 /* go down through the rows */ 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=0; i<m; i++ ) { 795 n = A->i[i+1] - A->i[i]; 796 idx = A->j + A->i[i] + shift; 797 v = A->a + A->i[i] + shift; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = fshift + A->a[diag[i]+shift]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] + shift; 803 v = B->a + B->i[i] + shift; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 808 mat->Mvctx); CHKERRQ(ierr); 809 /* come up through the rows */ 810 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 811 mat->Mvctx); CHKERRQ(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 826 mat->Mvctx); CHKERRQ(ierr); 827 } 828 } 829 else if (flag & SOR_FORWARD_SWEEP){ 830 if (flag & SOR_ZERO_INITIAL_GUESS) { 831 VecSet(&zero,mat->lvec); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=0; i<m; i++ ) { 835 n = diag[i] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = omega*(sum/d); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 its--; 850 } 851 while (its--) { 852 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 855 CHKERRQ(ierr); 856 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 857 mat->Mvctx); CHKERRQ(ierr); 858 for ( i=0; i<m; i++ ) { 859 n = A->i[i+1] - A->i[i]; 860 idx = A->j + A->i[i] + shift; 861 v = A->a + A->i[i] + shift; 862 sum = b[i]; 863 SPARSEDENSEMDOT(sum,xs,v,idx,n); 864 d = fshift + A->a[diag[i]+shift]; 865 n = B->i[i+1] - B->i[i]; 866 idx = B->j + B->i[i] + shift; 867 v = B->a + B->i[i] + shift; 868 SPARSEDENSEMDOT(sum,ls,v,idx,n); 869 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870 } 871 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 872 mat->Mvctx); CHKERRQ(ierr); 873 } 874 } 875 else if (flag & SOR_BACKWARD_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 VecSet(&zero,mat->lvec); 878 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 879 mat->Mvctx); CHKERRQ(ierr); 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - diag[i] - 1; 882 idx = A->j + diag[i] + !shift; 883 v = A->a + diag[i] + !shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = omega*(sum/d); 892 } 893 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 894 mat->Mvctx); CHKERRQ(ierr); 895 its--; 896 } 897 while (its--) { 898 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 901 mat->Mvctx); CHKERRQ(ierr); 902 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 903 mat->Mvctx); CHKERRQ(ierr); 904 for ( i=m-1; i>-1; i-- ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916 } 917 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 918 mat->Mvctx); CHKERRQ(ierr); 919 } 920 } 921 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922 if (flag & SOR_ZERO_INITIAL_GUESS) { 923 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 928 CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 967 CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981 } 982 } 983 } 984 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012 int *nzalloc,int *mem) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 Mat A = mat->A, B = mat->B; 1016 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017 1018 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1019 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021 if (flag == MAT_LOCAL) { 1022 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1023 } else if (flag == MAT_GLOBAL_MAX) { 1024 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1025 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026 } else if (flag == MAT_GLOBAL_SUM) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1028 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1029 } 1030 return 0; 1031 } 1032 1033 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1034 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1035 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1036 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1037 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1038 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1039 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1040 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1041 1042 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1045 1046 if (op == NO_NEW_NONZERO_LOCATIONS || 1047 op == YES_NEW_NONZERO_LOCATIONS || 1048 op == COLUMNS_SORTED || 1049 op == ROW_ORIENTED) { 1050 MatSetOption(a->A,op); 1051 MatSetOption(a->B,op); 1052 } 1053 else if (op == ROWS_SORTED || 1054 op == SYMMETRIC_MATRIX || 1055 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1056 op == YES_NEW_DIAGONALS) 1057 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1058 else if (op == COLUMN_ORIENTED) { 1059 a->roworiented = 0; 1060 MatSetOption(a->A,op); 1061 MatSetOption(a->B,op); 1062 } 1063 else if (op == NO_NEW_DIAGONALS) 1064 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1065 else 1066 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1067 return 0; 1068 } 1069 1070 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1071 { 1072 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1073 *m = mat->M; *n = mat->N; 1074 return 0; 1075 } 1076 1077 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->m; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->rstart; *n = mat->rend; 1088 return 0; 1089 } 1090 1091 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 Scalar *vworkA, *vworkB, **pvA, **pvB; 1095 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1096 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1097 1098 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1099 mat->getrowactive = PETSC_TRUE; 1100 1101 if (!mat->rowvalues) { 1102 /* 1103 allocate enough space to hold information from the longest row. 1104 */ 1105 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1106 int max = 1,n = mat->n,tmp; 1107 for ( i=0; i<n; i++ ) { 1108 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1109 if (max < tmp) { max = tmp; } 1110 } 1111 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1112 CHKPTRQ(mat->rowvalues); 1113 mat->rowindices = (int *) (mat->rowvalues + max); 1114 } 1115 1116 1117 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1118 lrow = row - rstart; 1119 1120 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1121 if (!v) {pvA = 0; pvB = 0;} 1122 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1123 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1124 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1125 nztot = nzA + nzB; 1126 1127 if (v || idx) { 1128 if (nztot) { 1129 /* Sort by increasing column numbers, assuming A and B already sorted */ 1130 int imark; 1131 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1132 if (v) { 1133 *v = mat->rowvalues; 1134 for ( i=0; i<nzB; i++ ) { 1135 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1136 else break; 1137 } 1138 imark = i; 1139 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1140 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1141 } 1142 if (idx) { 1143 *idx = mat->rowindices; 1144 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1145 for ( i=0; i<nzB; i++ ) { 1146 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1147 else break; 1148 } 1149 imark = i; 1150 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1151 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1152 } 1153 } 1154 else {*idx = 0; *v=0;} 1155 } 1156 *nz = nztot; 1157 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1158 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1159 return 0; 1160 } 1161 1162 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1165 if (aij->getrowactive == PETSC_FALSE) { 1166 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1167 } 1168 aij->getrowactive = PETSC_FALSE; 1169 return 0; 1170 } 1171 1172 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1173 { 1174 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1175 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1176 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1177 double sum = 0.0; 1178 Scalar *v; 1179 1180 if (aij->size == 1) { 1181 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1182 } else { 1183 if (type == NORM_FROBENIUS) { 1184 v = amat->a; 1185 for (i=0; i<amat->nz; i++ ) { 1186 #if defined(PETSC_COMPLEX) 1187 sum += real(conj(*v)*(*v)); v++; 1188 #else 1189 sum += (*v)*(*v); v++; 1190 #endif 1191 } 1192 v = bmat->a; 1193 for (i=0; i<bmat->nz; i++ ) { 1194 #if defined(PETSC_COMPLEX) 1195 sum += real(conj(*v)*(*v)); v++; 1196 #else 1197 sum += (*v)*(*v); v++; 1198 #endif 1199 } 1200 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1201 *norm = sqrt(*norm); 1202 } 1203 else if (type == NORM_1) { /* max column norm */ 1204 double *tmp, *tmp2; 1205 int *jj, *garray = aij->garray; 1206 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1207 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1208 PetscMemzero(tmp,aij->N*sizeof(double)); 1209 *norm = 0.0; 1210 v = amat->a; jj = amat->j; 1211 for ( j=0; j<amat->nz; j++ ) { 1212 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1213 } 1214 v = bmat->a; jj = bmat->j; 1215 for ( j=0; j<bmat->nz; j++ ) { 1216 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1217 } 1218 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1219 for ( j=0; j<aij->N; j++ ) { 1220 if (tmp2[j] > *norm) *norm = tmp2[j]; 1221 } 1222 PetscFree(tmp); PetscFree(tmp2); 1223 } 1224 else if (type == NORM_INFINITY) { /* max row norm */ 1225 double ntemp = 0.0; 1226 for ( j=0; j<amat->m; j++ ) { 1227 v = amat->a + amat->i[j] + shift; 1228 sum = 0.0; 1229 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1230 sum += PetscAbsScalar(*v); v++; 1231 } 1232 v = bmat->a + bmat->i[j] + shift; 1233 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1234 sum += PetscAbsScalar(*v); v++; 1235 } 1236 if (sum > ntemp) ntemp = sum; 1237 } 1238 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1239 } 1240 else { 1241 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1242 } 1243 } 1244 return 0; 1245 } 1246 1247 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1248 { 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1250 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1251 int ierr,shift = Aloc->indexshift; 1252 Mat B; 1253 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1254 Scalar *array; 1255 1256 if (matout == PETSC_NULL && M != N) 1257 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1258 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1259 PETSC_NULL,&B); CHKERRQ(ierr); 1260 1261 /* copy over the A part */ 1262 Aloc = (Mat_SeqAIJ*) a->A->data; 1263 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1264 row = a->rstart; 1265 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1266 for ( i=0; i<m; i++ ) { 1267 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1268 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1269 } 1270 aj = Aloc->j; 1271 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1272 1273 /* copy over the B part */ 1274 Aloc = (Mat_SeqAIJ*) a->B->data; 1275 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1276 row = a->rstart; 1277 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1278 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1279 for ( i=0; i<m; i++ ) { 1280 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1281 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1282 } 1283 PetscFree(ct); 1284 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1285 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1286 if (matout != PETSC_NULL) { 1287 *matout = B; 1288 } else { 1289 /* This isn't really an in-place transpose .... but free data structures from a */ 1290 PetscFree(a->rowners); 1291 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1292 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1293 if (a->colmap) PetscFree(a->colmap); 1294 if (a->garray) PetscFree(a->garray); 1295 if (a->lvec) VecDestroy(a->lvec); 1296 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1297 PetscFree(a); 1298 PetscMemcpy(A,B,sizeof(struct _Mat)); 1299 PetscHeaderDestroy(B); 1300 } 1301 return 0; 1302 } 1303 1304 extern int MatPrintHelp_SeqAIJ(Mat); 1305 static int MatPrintHelp_MPIAIJ(Mat A) 1306 { 1307 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1308 1309 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1310 else return 0; 1311 } 1312 1313 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1314 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1315 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1316 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1317 /* -------------------------------------------------------------------*/ 1318 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1319 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1320 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1321 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1322 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1323 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1324 MatLUFactor_MPIAIJ,0, 1325 MatRelax_MPIAIJ, 1326 MatTranspose_MPIAIJ, 1327 MatGetInfo_MPIAIJ,0, 1328 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1329 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1330 0, 1331 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1332 MatGetReordering_MPIAIJ, 1333 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1334 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1335 MatILUFactorSymbolic_MPIAIJ,0, 1336 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1337 0,0,0, 1338 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1339 MatPrintHelp_MPIAIJ, 1340 MatScale_MPIAIJ}; 1341 1342 /*@C 1343 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1344 (the default parallel PETSc format). For good matrix assembly performance 1345 the user should preallocate the matrix storage by setting the parameters 1346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1347 performance can be increased by more than a factor of 50. 1348 1349 Input Parameters: 1350 . comm - MPI communicator 1351 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1352 . n - number of local columns (or PETSC_DECIDE to have calculated 1353 if N is given) 1354 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1355 . N - number of global columns (or PETSC_DECIDE to have calculated 1356 if n is given) 1357 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1358 (same for all local rows) 1359 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1360 or null (possibly different for each row). You must leave room 1361 for the diagonal entry even if it is zero. 1362 . o_nz - number of nonzeros per row in off-diagonal portion of local 1363 submatrix (same for all local rows). 1364 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1365 submatrix or null (possibly different for each row). 1366 1367 Output Parameter: 1368 . newmat - the matrix 1369 1370 Notes: 1371 The AIJ format (also called the Yale sparse matrix format or 1372 compressed row storage), is fully compatible with standard Fortran 77 1373 storage. That is, the stored row and column indices can begin at 1374 either one (as in Fortran) or zero. See the users manual for details. 1375 1376 The user MUST specify either the local or global matrix dimensions 1377 (possibly both). 1378 1379 By default, this format uses inodes (identical nodes) when possible. 1380 We search for consecutive rows with the same nonzero structure, thereby 1381 reusing matrix information to achieve increased efficiency. 1382 1383 Options Database Keys: 1384 $ -mat_aij_no_inode - Do not use inodes 1385 $ -mat_aij_inode_limit <limit> - Set inode limit. 1386 $ (max limit=5) 1387 $ -mat_aij_oneindex - Internally use indexing starting at 1 1388 $ rather than 0. Note: When calling MatSetValues(), 1389 $ the user still MUST index entries starting at 0! 1390 1391 Storage Information: 1392 For a square global matrix we define each processor's diagonal portion 1393 to be its local rows and the corresponding columns (a square submatrix); 1394 each processor's off-diagonal portion encompasses the remainder of the 1395 local matrix (a rectangular submatrix). 1396 1397 The user can specify preallocated storage for the diagonal part of 1398 the local submatrix with either d_nz or d_nnz (not both). Set 1399 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1400 memory allocation. Likewise, specify preallocated storage for the 1401 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1402 1403 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1404 the figure below we depict these three local rows and all columns (0-11). 1405 1406 $ 0 1 2 3 4 5 6 7 8 9 10 11 1407 $ ------------------- 1408 $ row 3 | o o o d d d o o o o o o 1409 $ row 4 | o o o d d d o o o o o o 1410 $ row 5 | o o o d d d o o o o o o 1411 $ ------------------- 1412 $ 1413 1414 Thus, any entries in the d locations are stored in the d (diagonal) 1415 submatrix, and any entries in the o locations are stored in the 1416 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1417 stored simply in the MATSEQAIJ format for compressed row storage. 1418 1419 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1420 and o_nz should indicate the number of nonzeros per row in the o matrix. 1421 In general, for PDE problems in which most nonzeros are near the diagonal, 1422 one expects d_nz >> o_nz. For additional details, see the users manual 1423 chapter on matrices and the file $(PETSC_DIR)/Performance. 1424 1425 .keywords: matrix, aij, compressed row, sparse, parallel 1426 1427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1428 @*/ 1429 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1430 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1431 { 1432 Mat mat; 1433 Mat_MPIAIJ *a; 1434 int ierr, i,sum[2],work[2]; 1435 1436 *newmat = 0; 1437 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1438 PLogObjectCreate(mat); 1439 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1440 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1441 mat->destroy = MatDestroy_MPIAIJ; 1442 mat->view = MatView_MPIAIJ; 1443 mat->factor = 0; 1444 mat->assembled = PETSC_FALSE; 1445 1446 a->insertmode = NOT_SET_VALUES; 1447 MPI_Comm_rank(comm,&a->rank); 1448 MPI_Comm_size(comm,&a->size); 1449 1450 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1451 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1452 1453 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1454 work[0] = m; work[1] = n; 1455 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1456 if (M == PETSC_DECIDE) M = sum[0]; 1457 if (N == PETSC_DECIDE) N = sum[1]; 1458 } 1459 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1460 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1461 a->m = m; 1462 a->n = n; 1463 a->N = N; 1464 a->M = M; 1465 1466 /* build local table of row and column ownerships */ 1467 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1468 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1469 a->cowners = a->rowners + a->size + 1; 1470 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1471 a->rowners[0] = 0; 1472 for ( i=2; i<=a->size; i++ ) { 1473 a->rowners[i] += a->rowners[i-1]; 1474 } 1475 a->rstart = a->rowners[a->rank]; 1476 a->rend = a->rowners[a->rank+1]; 1477 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1478 a->cowners[0] = 0; 1479 for ( i=2; i<=a->size; i++ ) { 1480 a->cowners[i] += a->cowners[i-1]; 1481 } 1482 a->cstart = a->cowners[a->rank]; 1483 a->cend = a->cowners[a->rank+1]; 1484 1485 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1486 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1487 PLogObjectParent(mat,a->A); 1488 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1489 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1490 PLogObjectParent(mat,a->B); 1491 1492 /* build cache for off array entries formed */ 1493 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1494 a->colmap = 0; 1495 a->garray = 0; 1496 a->roworiented = 1; 1497 1498 /* stuff used for matrix vector multiply */ 1499 a->lvec = 0; 1500 a->Mvctx = 0; 1501 1502 /* stuff for MatGetRow() */ 1503 a->rowindices = 0; 1504 a->rowvalues = 0; 1505 a->getrowactive = PETSC_FALSE; 1506 1507 *newmat = mat; 1508 return 0; 1509 } 1510 1511 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1512 { 1513 Mat mat; 1514 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1515 int ierr, len,flg; 1516 1517 *newmat = 0; 1518 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1519 PLogObjectCreate(mat); 1520 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1521 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1522 mat->destroy = MatDestroy_MPIAIJ; 1523 mat->view = MatView_MPIAIJ; 1524 mat->factor = matin->factor; 1525 mat->assembled = PETSC_TRUE; 1526 1527 a->m = oldmat->m; 1528 a->n = oldmat->n; 1529 a->M = oldmat->M; 1530 a->N = oldmat->N; 1531 1532 a->rstart = oldmat->rstart; 1533 a->rend = oldmat->rend; 1534 a->cstart = oldmat->cstart; 1535 a->cend = oldmat->cend; 1536 a->size = oldmat->size; 1537 a->rank = oldmat->rank; 1538 a->insertmode = NOT_SET_VALUES; 1539 1540 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1541 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1542 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1543 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1544 if (oldmat->colmap) { 1545 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1546 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1547 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1548 } else a->colmap = 0; 1549 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1550 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1551 PLogObjectMemory(mat,len*sizeof(int)); 1552 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1553 } else a->garray = 0; 1554 1555 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1556 PLogObjectParent(mat,a->lvec); 1557 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1558 PLogObjectParent(mat,a->Mvctx); 1559 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1560 PLogObjectParent(mat,a->A); 1561 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1562 PLogObjectParent(mat,a->B); 1563 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1564 if (flg) { 1565 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1566 } 1567 *newmat = mat; 1568 return 0; 1569 } 1570 1571 #include "sysio.h" 1572 1573 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1574 { 1575 Mat A; 1576 int i, nz, ierr, j,rstart, rend, fd; 1577 Scalar *vals,*svals; 1578 PetscObject vobj = (PetscObject) bview; 1579 MPI_Comm comm = vobj->comm; 1580 MPI_Status status; 1581 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1582 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1583 int tag = ((PetscObject)bview)->tag; 1584 1585 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1586 if (!rank) { 1587 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1588 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1589 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1590 } 1591 1592 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1593 M = header[1]; N = header[2]; 1594 /* determine ownership of all rows */ 1595 m = M/size + ((M % size) > rank); 1596 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1597 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1598 rowners[0] = 0; 1599 for ( i=2; i<=size; i++ ) { 1600 rowners[i] += rowners[i-1]; 1601 } 1602 rstart = rowners[rank]; 1603 rend = rowners[rank+1]; 1604 1605 /* distribute row lengths to all processors */ 1606 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1607 offlens = ourlens + (rend-rstart); 1608 if (!rank) { 1609 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1610 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1611 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1612 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1613 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1614 PetscFree(sndcounts); 1615 } 1616 else { 1617 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1618 } 1619 1620 if (!rank) { 1621 /* calculate the number of nonzeros on each processor */ 1622 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1623 PetscMemzero(procsnz,size*sizeof(int)); 1624 for ( i=0; i<size; i++ ) { 1625 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1626 procsnz[i] += rowlengths[j]; 1627 } 1628 } 1629 PetscFree(rowlengths); 1630 1631 /* determine max buffer needed and allocate it */ 1632 maxnz = 0; 1633 for ( i=0; i<size; i++ ) { 1634 maxnz = PetscMax(maxnz,procsnz[i]); 1635 } 1636 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1637 1638 /* read in my part of the matrix column indices */ 1639 nz = procsnz[0]; 1640 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1641 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1642 1643 /* read in every one elses and ship off */ 1644 for ( i=1; i<size; i++ ) { 1645 nz = procsnz[i]; 1646 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1647 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1648 } 1649 PetscFree(cols); 1650 } 1651 else { 1652 /* determine buffer space needed for message */ 1653 nz = 0; 1654 for ( i=0; i<m; i++ ) { 1655 nz += ourlens[i]; 1656 } 1657 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1658 1659 /* receive message of column indices*/ 1660 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1661 MPI_Get_count(&status,MPI_INT,&maxnz); 1662 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1663 } 1664 1665 /* loop over local rows, determining number of off diagonal entries */ 1666 PetscMemzero(offlens,m*sizeof(int)); 1667 jj = 0; 1668 for ( i=0; i<m; i++ ) { 1669 for ( j=0; j<ourlens[i]; j++ ) { 1670 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1671 jj++; 1672 } 1673 } 1674 1675 /* create our matrix */ 1676 for ( i=0; i<m; i++ ) { 1677 ourlens[i] -= offlens[i]; 1678 } 1679 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1680 A = *newmat; 1681 MatSetOption(A,COLUMNS_SORTED); 1682 for ( i=0; i<m; i++ ) { 1683 ourlens[i] += offlens[i]; 1684 } 1685 1686 if (!rank) { 1687 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1688 1689 /* read in my part of the matrix numerical values */ 1690 nz = procsnz[0]; 1691 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1692 1693 /* insert into matrix */ 1694 jj = rstart; 1695 smycols = mycols; 1696 svals = vals; 1697 for ( i=0; i<m; i++ ) { 1698 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1699 smycols += ourlens[i]; 1700 svals += ourlens[i]; 1701 jj++; 1702 } 1703 1704 /* read in other processors and ship out */ 1705 for ( i=1; i<size; i++ ) { 1706 nz = procsnz[i]; 1707 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1708 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1709 } 1710 PetscFree(procsnz); 1711 } 1712 else { 1713 /* receive numeric values */ 1714 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1715 1716 /* receive message of values*/ 1717 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1718 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1719 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1720 1721 /* insert into matrix */ 1722 jj = rstart; 1723 smycols = mycols; 1724 svals = vals; 1725 for ( i=0; i<m; i++ ) { 1726 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1727 smycols += ourlens[i]; 1728 svals += ourlens[i]; 1729 jj++; 1730 } 1731 } 1732 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1733 1734 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1735 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1736 return 0; 1737 } 1738