1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.146 1996/07/01 15:33:56 bsmith Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 117 col = aij->colmap[idxn[j]] + shift; 118 if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0; 119 else { 120 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 121 } 122 } 123 } 124 } 125 else { 126 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 127 } 128 } 129 return 0; 130 } 131 132 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 133 { 134 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 135 MPI_Comm comm = mat->comm; 136 int size = aij->size, *owners = aij->rowners; 137 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 138 MPI_Request *send_waits,*recv_waits; 139 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 140 InsertMode addv; 141 Scalar *rvalues,*svalues; 142 143 /* make sure all processors are either in INSERTMODE or ADDMODE */ 144 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 145 if (addv == (ADD_VALUES|INSERT_VALUES)) { 146 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 147 } 148 aij->insertmode = addv; /* in case this processor had no cache */ 149 150 /* first count number of contributors to each processor */ 151 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 152 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 153 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 154 for ( i=0; i<aij->stash.n; i++ ) { 155 idx = aij->stash.idx[i]; 156 for ( j=0; j<size; j++ ) { 157 if (idx >= owners[j] && idx < owners[j+1]) { 158 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 159 } 160 } 161 } 162 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 163 164 /* inform other processors of number of messages and max length*/ 165 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 166 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 167 nreceives = work[rank]; 168 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 169 nmax = work[rank]; 170 PetscFree(work); 171 172 /* post receives: 173 1) each message will consist of ordered pairs 174 (global index,value) we store the global index as a double 175 to simplify the message passing. 176 2) since we don't know how long each individual message is we 177 allocate the largest needed buffer for each receive. Potentially 178 this is a lot of wasted space. 179 180 181 This could be done better. 182 */ 183 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 184 CHKPTRQ(rvalues); 185 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 186 CHKPTRQ(recv_waits); 187 for ( i=0; i<nreceives; i++ ) { 188 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 189 comm,recv_waits+i); 190 } 191 192 /* do sends: 193 1) starts[i] gives the starting index in svalues for stuff going to 194 the ith processor 195 */ 196 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 197 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 198 CHKPTRQ(send_waits); 199 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 200 starts[0] = 0; 201 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 202 for ( i=0; i<aij->stash.n; i++ ) { 203 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 204 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 205 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 206 } 207 PetscFree(owner); 208 starts[0] = 0; 209 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 210 count = 0; 211 for ( i=0; i<size; i++ ) { 212 if (procs[i]) { 213 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 214 comm,send_waits+count++); 215 } 216 } 217 PetscFree(starts); PetscFree(nprocs); 218 219 /* Free cache space */ 220 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 221 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 222 223 aij->svalues = svalues; aij->rvalues = rvalues; 224 aij->nsends = nsends; aij->nrecvs = nreceives; 225 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 226 aij->rmax = nmax; 227 228 return 0; 229 } 230 extern int MatSetUpMultiply_MPIAIJ(Mat); 231 232 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 233 { 234 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 236 MPI_Status *send_status,recv_status; 237 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 238 int row,col,other_disassembled,shift = C->indexshift; 239 Scalar *values,val; 240 InsertMode addv = aij->insertmode; 241 242 /* wait on receives */ 243 while (count) { 244 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 245 /* unpack receives into our local space */ 246 values = aij->rvalues + 3*imdex*aij->rmax; 247 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 248 n = n/3; 249 for ( i=0; i<n; i++ ) { 250 row = (int) PetscReal(values[3*i]) - aij->rstart; 251 col = (int) PetscReal(values[3*i+1]); 252 val = values[3*i+2]; 253 if (col >= aij->cstart && col < aij->cend) { 254 col -= aij->cstart; 255 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 256 } 257 else { 258 if (mat->was_assembled) { 259 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 260 col = aij->colmap[col] + shift; 261 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 262 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 263 col = (int) PetscReal(values[3*i+1]); 264 } 265 } 266 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 267 } 268 } 269 count--; 270 } 271 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 272 273 /* wait on sends */ 274 if (aij->nsends) { 275 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 276 CHKPTRQ(send_status); 277 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 278 PetscFree(send_status); 279 } 280 PetscFree(aij->send_waits); PetscFree(aij->svalues); 281 282 aij->insertmode = NOT_SET_VALUES; 283 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 285 286 /* determine if any processor has disassembled, if so we must 287 also disassemble ourselfs, in order that we may reassemble. */ 288 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 289 if (mat->was_assembled && !other_disassembled) { 290 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 293 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 294 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 295 } 296 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 297 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 298 299 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 300 return 0; 301 } 302 303 static int MatZeroEntries_MPIAIJ(Mat A) 304 { 305 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 306 int ierr; 307 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 308 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 309 return 0; 310 } 311 312 /* the code does not do the diagonal entries correctly unless the 313 matrix is square and the column and row owerships are identical. 314 This is a BUG. The only way to fix it seems to be to access 315 aij->A and aij->B directly and not through the MatZeroRows() 316 routine. 317 */ 318 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 319 { 320 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 321 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 322 int *procs,*nprocs,j,found,idx,nsends,*work; 323 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 324 int *rvalues,tag = A->tag,count,base,slen,n,*source; 325 int *lens,imdex,*lrows,*values; 326 MPI_Comm comm = A->comm; 327 MPI_Request *send_waits,*recv_waits; 328 MPI_Status recv_status,*send_status; 329 IS istmp; 330 331 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 332 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 333 334 /* first count number of contributors to each processor */ 335 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 336 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 337 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 338 for ( i=0; i<N; i++ ) { 339 idx = rows[i]; 340 found = 0; 341 for ( j=0; j<size; j++ ) { 342 if (idx >= owners[j] && idx < owners[j+1]) { 343 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 344 } 345 } 346 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 347 } 348 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 349 350 /* inform other processors of number of messages and max length*/ 351 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 352 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 353 nrecvs = work[rank]; 354 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 355 nmax = work[rank]; 356 PetscFree(work); 357 358 /* post receives: */ 359 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 360 CHKPTRQ(rvalues); 361 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 362 CHKPTRQ(recv_waits); 363 for ( i=0; i<nrecvs; i++ ) { 364 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 365 } 366 367 /* do sends: 368 1) starts[i] gives the starting index in svalues for stuff going to 369 the ith processor 370 */ 371 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 372 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 373 CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<N; i++ ) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 ISRestoreIndices(is,&rows); 381 382 starts[0] = 0; 383 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 384 count = 0; 385 for ( i=0; i<size; i++ ) { 386 if (procs[i]) { 387 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 388 } 389 } 390 PetscFree(starts); 391 392 base = owners[rank]; 393 394 /* wait on receives */ 395 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 396 source = lens + nrecvs; 397 count = nrecvs; slen = 0; 398 while (count) { 399 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 400 /* unpack receives into our local space */ 401 MPI_Get_count(&recv_status,MPI_INT,&n); 402 source[imdex] = recv_status.MPI_SOURCE; 403 lens[imdex] = n; 404 slen += n; 405 count--; 406 } 407 PetscFree(recv_waits); 408 409 /* move the data into the send scatter */ 410 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 411 count = 0; 412 for ( i=0; i<nrecvs; i++ ) { 413 values = rvalues + i*nmax; 414 for ( j=0; j<lens[i]; j++ ) { 415 lrows[count++] = values[j] - base; 416 } 417 } 418 PetscFree(rvalues); PetscFree(lens); 419 PetscFree(owner); PetscFree(nprocs); 420 421 /* actually zap the local rows */ 422 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 423 PLogObjectParent(A,istmp); 424 PetscFree(lrows); 425 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 426 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 427 ierr = ISDestroy(istmp); CHKERRQ(ierr); 428 429 /* wait on sends */ 430 if (nsends) { 431 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 432 CHKPTRQ(send_status); 433 MPI_Waitall(nsends,send_waits,send_status); 434 PetscFree(send_status); 435 } 436 PetscFree(send_waits); PetscFree(svalues); 437 438 return 0; 439 } 440 441 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 442 { 443 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444 int ierr; 445 446 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 447 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 448 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 449 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 450 return 0; 451 } 452 453 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 454 { 455 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456 int ierr; 457 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 458 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 459 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 460 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 461 return 0; 462 } 463 464 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 465 { 466 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467 int ierr; 468 469 /* do nondiagonal part */ 470 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 471 /* send it on its way */ 472 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 473 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 474 /* do local part */ 475 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 476 /* receive remote parts: note this assumes the values are not actually */ 477 /* inserted in yy until the next line, which is true for my implementation*/ 478 /* but is not perhaps always true. */ 479 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 480 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 481 return 0; 482 } 483 484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr; 488 489 /* do nondiagonal part */ 490 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 491 /* send it on its way */ 492 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 493 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 494 /* do local part */ 495 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 496 /* receive remote parts: note this assumes the values are not actually */ 497 /* inserted in yy until the next line, which is true for my implementation*/ 498 /* but is not perhaps always true. */ 499 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 500 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 501 return 0; 502 } 503 504 /* 505 This only works correctly for square matrices where the subblock A->A is the 506 diagonal block 507 */ 508 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 509 { 510 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511 if (a->M != a->N) 512 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 513 return MatGetDiagonal(a->A,v); 514 } 515 516 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 517 { 518 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 519 int ierr; 520 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 521 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 522 return 0; 523 } 524 525 static int MatDestroy_MPIAIJ(PetscObject obj) 526 { 527 Mat mat = (Mat) obj; 528 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 529 int ierr; 530 #if defined(PETSC_LOG) 531 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 532 #endif 533 PetscFree(aij->rowners); 534 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 535 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 536 if (aij->colmap) PetscFree(aij->colmap); 537 if (aij->garray) PetscFree(aij->garray); 538 if (aij->lvec) VecDestroy(aij->lvec); 539 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 540 if (aij->rowvalues) PetscFree(aij->rowvalues); 541 PetscFree(aij); 542 PLogObjectDestroy(mat); 543 PetscHeaderDestroy(mat); 544 return 0; 545 } 546 #include "draw.h" 547 #include "pinclude/pviewer.h" 548 549 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 550 { 551 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 552 int ierr; 553 554 if (aij->size == 1) { 555 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 556 } 557 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 558 return 0; 559 } 560 561 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 564 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 565 int ierr, format,shift = C->indexshift,rank; 566 FILE *fd; 567 ViewerType vtype; 568 569 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 570 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 571 ierr = ViewerGetFormat(viewer,&format); 572 if (format == ASCII_FORMAT_INFO_DETAILED) { 573 int nz, nzalloc, mem, flg; 574 MPI_Comm_rank(mat->comm,&rank); 575 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 576 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 577 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 578 PetscSequentialPhaseBegin(mat->comm,1); 579 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 580 rank,aij->m,nz,nzalloc,mem); 581 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 582 rank,aij->m,nz,nzalloc,mem); 583 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 584 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 585 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 586 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 587 fflush(fd); 588 PetscSequentialPhaseEnd(mat->comm,1); 589 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 590 return 0; 591 } 592 else if (format == ASCII_FORMAT_INFO) { 593 return 0; 594 } 595 } 596 597 if (vtype == DRAW_VIEWER) { 598 Draw draw; 599 PetscTruth isnull; 600 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 601 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 602 } 603 604 if (vtype == ASCII_FILE_VIEWER) { 605 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 606 PetscSequentialPhaseBegin(mat->comm,1); 607 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 608 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 609 aij->cend); 610 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 611 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 612 fflush(fd); 613 PetscSequentialPhaseEnd(mat->comm,1); 614 } 615 else { 616 int size = aij->size; 617 rank = aij->rank; 618 if (size == 1) { 619 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 620 } 621 else { 622 /* assemble the entire matrix onto first processor. */ 623 Mat A; 624 Mat_SeqAIJ *Aloc; 625 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 626 Scalar *a; 627 628 if (!rank) { 629 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 630 CHKERRQ(ierr); 631 } 632 else { 633 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 634 CHKERRQ(ierr); 635 } 636 PLogObjectParent(mat,A); 637 638 /* copy over the A part */ 639 Aloc = (Mat_SeqAIJ*) aij->A->data; 640 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 641 row = aij->rstart; 642 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 643 for ( i=0; i<m; i++ ) { 644 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 645 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 646 } 647 aj = Aloc->j; 648 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 649 650 /* copy over the B part */ 651 Aloc = (Mat_SeqAIJ*) aij->B->data; 652 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 653 row = aij->rstart; 654 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 655 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 656 for ( i=0; i<m; i++ ) { 657 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 658 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 659 } 660 PetscFree(ct); 661 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 662 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 665 } 666 ierr = MatDestroy(A); CHKERRQ(ierr); 667 } 668 } 669 return 0; 670 } 671 672 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 673 { 674 Mat mat = (Mat) obj; 675 int ierr; 676 ViewerType vtype; 677 678 if (!viewer) { 679 viewer = STDOUT_VIEWER_SELF; 680 } 681 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 682 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 683 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 684 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 685 } 686 else if (vtype == BINARY_FILE_VIEWER) { 687 return MatView_MPIAIJ_Binary(mat,viewer); 688 } 689 return 0; 690 } 691 692 /* 693 This has to provide several versions. 694 695 1) per sequential 696 2) a) use only local smoothing updating outer values only once. 697 b) local smoothing updating outer values each inner iteration 698 3) color updating out values betwen colors. 699 */ 700 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 701 double fshift,int its,Vec xx) 702 { 703 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 704 Mat AA = mat->A, BB = mat->B; 705 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 706 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 707 int ierr,*idx, *diag; 708 int n = mat->n, m = mat->m, i,shift = A->indexshift; 709 Vec tt; 710 711 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 712 xs = x + shift; /* shift by one for index start of 1 */ 713 ls = ls + shift; 714 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 715 diag = A->diag; 716 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 717 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 718 } 719 if (flag & SOR_EISENSTAT) { 720 /* Let A = L + U + D; where L is lower trianglar, 721 U is upper triangular, E is diagonal; This routine applies 722 723 (L + E)^{-1} A (U + E)^{-1} 724 725 to a vector efficiently using Eisenstat's trick. This is for 726 the case of SSOR preconditioner, so E is D/omega where omega 727 is the relaxation factor. 728 */ 729 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 730 PLogObjectParent(matin,tt); 731 VecGetArray(tt,&t); 732 scale = (2.0/omega) - 1.0; 733 /* x = (E + U)^{-1} b */ 734 VecSet(&zero,mat->lvec); 735 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 736 mat->Mvctx); CHKERRQ(ierr); 737 for ( i=m-1; i>-1; i-- ) { 738 n = A->i[i+1] - diag[i] - 1; 739 idx = A->j + diag[i] + !shift; 740 v = A->a + diag[i] + !shift; 741 sum = b[i]; 742 SPARSEDENSEMDOT(sum,xs,v,idx,n); 743 d = fshift + A->a[diag[i]+shift]; 744 n = B->i[i+1] - B->i[i]; 745 idx = B->j + B->i[i] + shift; 746 v = B->a + B->i[i] + shift; 747 SPARSEDENSEMDOT(sum,ls,v,idx,n); 748 x[i] = omega*(sum/d); 749 } 750 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 751 mat->Mvctx); CHKERRQ(ierr); 752 753 /* t = b - (2*E - D)x */ 754 v = A->a; 755 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 756 757 /* t = (E + L)^{-1}t */ 758 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 759 diag = A->diag; 760 VecSet(&zero,mat->lvec); 761 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 762 mat->Mvctx); CHKERRQ(ierr); 763 for ( i=0; i<m; i++ ) { 764 n = diag[i] - A->i[i]; 765 idx = A->j + A->i[i] + shift; 766 v = A->a + A->i[i] + shift; 767 sum = t[i]; 768 SPARSEDENSEMDOT(sum,ts,v,idx,n); 769 d = fshift + A->a[diag[i]+shift]; 770 n = B->i[i+1] - B->i[i]; 771 idx = B->j + B->i[i] + shift; 772 v = B->a + B->i[i] + shift; 773 SPARSEDENSEMDOT(sum,ls,v,idx,n); 774 t[i] = omega*(sum/d); 775 } 776 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 777 mat->Mvctx); CHKERRQ(ierr); 778 /* x = x + t */ 779 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 780 VecDestroy(tt); 781 return 0; 782 } 783 784 785 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 786 if (flag & SOR_ZERO_INITIAL_GUESS) { 787 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 788 } 789 else { 790 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 791 CHKERRQ(ierr); 792 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 793 CHKERRQ(ierr); 794 } 795 while (its--) { 796 /* go down through the rows */ 797 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 798 mat->Mvctx); CHKERRQ(ierr); 799 for ( i=0; i<m; i++ ) { 800 n = A->i[i+1] - A->i[i]; 801 idx = A->j + A->i[i] + shift; 802 v = A->a + A->i[i] + shift; 803 sum = b[i]; 804 SPARSEDENSEMDOT(sum,xs,v,idx,n); 805 d = fshift + A->a[diag[i]+shift]; 806 n = B->i[i+1] - B->i[i]; 807 idx = B->j + B->i[i] + shift; 808 v = B->a + B->i[i] + shift; 809 SPARSEDENSEMDOT(sum,ls,v,idx,n); 810 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 811 } 812 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 813 mat->Mvctx); CHKERRQ(ierr); 814 /* come up through the rows */ 815 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 816 mat->Mvctx); CHKERRQ(ierr); 817 for ( i=m-1; i>-1; i-- ) { 818 n = A->i[i+1] - A->i[i]; 819 idx = A->j + A->i[i] + shift; 820 v = A->a + A->i[i] + shift; 821 sum = b[i]; 822 SPARSEDENSEMDOT(sum,xs,v,idx,n); 823 d = fshift + A->a[diag[i]+shift]; 824 n = B->i[i+1] - B->i[i]; 825 idx = B->j + B->i[i] + shift; 826 v = B->a + B->i[i] + shift; 827 SPARSEDENSEMDOT(sum,ls,v,idx,n); 828 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 829 } 830 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 831 mat->Mvctx); CHKERRQ(ierr); 832 } 833 } 834 else if (flag & SOR_FORWARD_SWEEP){ 835 if (flag & SOR_ZERO_INITIAL_GUESS) { 836 VecSet(&zero,mat->lvec); 837 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 838 mat->Mvctx); CHKERRQ(ierr); 839 for ( i=0; i<m; i++ ) { 840 n = diag[i] - A->i[i]; 841 idx = A->j + A->i[i] + shift; 842 v = A->a + A->i[i] + shift; 843 sum = b[i]; 844 SPARSEDENSEMDOT(sum,xs,v,idx,n); 845 d = fshift + A->a[diag[i]+shift]; 846 n = B->i[i+1] - B->i[i]; 847 idx = B->j + B->i[i] + shift; 848 v = B->a + B->i[i] + shift; 849 SPARSEDENSEMDOT(sum,ls,v,idx,n); 850 x[i] = omega*(sum/d); 851 } 852 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 853 mat->Mvctx); CHKERRQ(ierr); 854 its--; 855 } 856 while (its--) { 857 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 858 CHKERRQ(ierr); 859 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 860 CHKERRQ(ierr); 861 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 862 mat->Mvctx); CHKERRQ(ierr); 863 for ( i=0; i<m; i++ ) { 864 n = A->i[i+1] - A->i[i]; 865 idx = A->j + A->i[i] + shift; 866 v = A->a + A->i[i] + shift; 867 sum = b[i]; 868 SPARSEDENSEMDOT(sum,xs,v,idx,n); 869 d = fshift + A->a[diag[i]+shift]; 870 n = B->i[i+1] - B->i[i]; 871 idx = B->j + B->i[i] + shift; 872 v = B->a + B->i[i] + shift; 873 SPARSEDENSEMDOT(sum,ls,v,idx,n); 874 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 875 } 876 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 877 mat->Mvctx); CHKERRQ(ierr); 878 } 879 } 880 else if (flag & SOR_BACKWARD_SWEEP){ 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 VecSet(&zero,mat->lvec); 883 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 884 mat->Mvctx); CHKERRQ(ierr); 885 for ( i=m-1; i>-1; i-- ) { 886 n = A->i[i+1] - diag[i] - 1; 887 idx = A->j + diag[i] + !shift; 888 v = A->a + diag[i] + !shift; 889 sum = b[i]; 890 SPARSEDENSEMDOT(sum,xs,v,idx,n); 891 d = fshift + A->a[diag[i]+shift]; 892 n = B->i[i+1] - B->i[i]; 893 idx = B->j + B->i[i] + shift; 894 v = B->a + B->i[i] + shift; 895 SPARSEDENSEMDOT(sum,ls,v,idx,n); 896 x[i] = omega*(sum/d); 897 } 898 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 899 mat->Mvctx); CHKERRQ(ierr); 900 its--; 901 } 902 while (its--) { 903 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 904 mat->Mvctx); CHKERRQ(ierr); 905 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 906 mat->Mvctx); CHKERRQ(ierr); 907 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 908 mat->Mvctx); CHKERRQ(ierr); 909 for ( i=m-1; i>-1; i-- ) { 910 n = A->i[i+1] - A->i[i]; 911 idx = A->j + A->i[i] + shift; 912 v = A->a + A->i[i] + shift; 913 sum = b[i]; 914 SPARSEDENSEMDOT(sum,xs,v,idx,n); 915 d = fshift + A->a[diag[i]+shift]; 916 n = B->i[i+1] - B->i[i]; 917 idx = B->j + B->i[i] + shift; 918 v = B->a + B->i[i] + shift; 919 SPARSEDENSEMDOT(sum,ls,v,idx,n); 920 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 921 } 922 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 923 mat->Mvctx); CHKERRQ(ierr); 924 } 925 } 926 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 927 if (flag & SOR_ZERO_INITIAL_GUESS) { 928 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 929 } 930 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 931 CHKERRQ(ierr); 932 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 933 CHKERRQ(ierr); 934 while (its--) { 935 /* go down through the rows */ 936 for ( i=0; i<m; i++ ) { 937 n = A->i[i+1] - A->i[i]; 938 idx = A->j + A->i[i] + shift; 939 v = A->a + A->i[i] + shift; 940 sum = b[i]; 941 SPARSEDENSEMDOT(sum,xs,v,idx,n); 942 d = fshift + A->a[diag[i]+shift]; 943 n = B->i[i+1] - B->i[i]; 944 idx = B->j + B->i[i] + shift; 945 v = B->a + B->i[i] + shift; 946 SPARSEDENSEMDOT(sum,ls,v,idx,n); 947 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 948 } 949 /* come up through the rows */ 950 for ( i=m-1; i>-1; i-- ) { 951 n = A->i[i+1] - A->i[i]; 952 idx = A->j + A->i[i] + shift; 953 v = A->a + A->i[i] + shift; 954 sum = b[i]; 955 SPARSEDENSEMDOT(sum,xs,v,idx,n); 956 d = fshift + A->a[diag[i]+shift]; 957 n = B->i[i+1] - B->i[i]; 958 idx = B->j + B->i[i] + shift; 959 v = B->a + B->i[i] + shift; 960 SPARSEDENSEMDOT(sum,ls,v,idx,n); 961 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 962 } 963 } 964 } 965 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 966 if (flag & SOR_ZERO_INITIAL_GUESS) { 967 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 968 } 969 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 970 CHKERRQ(ierr); 971 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 972 CHKERRQ(ierr); 973 while (its--) { 974 for ( i=0; i<m; i++ ) { 975 n = A->i[i+1] - A->i[i]; 976 idx = A->j + A->i[i] + shift; 977 v = A->a + A->i[i] + shift; 978 sum = b[i]; 979 SPARSEDENSEMDOT(sum,xs,v,idx,n); 980 d = fshift + A->a[diag[i]+shift]; 981 n = B->i[i+1] - B->i[i]; 982 idx = B->j + B->i[i] + shift; 983 v = B->a + B->i[i] + shift; 984 SPARSEDENSEMDOT(sum,ls,v,idx,n); 985 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 986 } 987 } 988 } 989 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 990 if (flag & SOR_ZERO_INITIAL_GUESS) { 991 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 992 } 993 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 994 mat->Mvctx); CHKERRQ(ierr); 995 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 996 mat->Mvctx); CHKERRQ(ierr); 997 while (its--) { 998 for ( i=m-1; i>-1; i-- ) { 999 n = A->i[i+1] - A->i[i]; 1000 idx = A->j + A->i[i] + shift; 1001 v = A->a + A->i[i] + shift; 1002 sum = b[i]; 1003 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1004 d = fshift + A->a[diag[i]+shift]; 1005 n = B->i[i+1] - B->i[i]; 1006 idx = B->j + B->i[i] + shift; 1007 v = B->a + B->i[i] + shift; 1008 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1009 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1010 } 1011 } 1012 } 1013 return 0; 1014 } 1015 1016 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1017 int *nzalloc,int *mem) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1020 Mat A = mat->A, B = mat->B; 1021 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1022 1023 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1024 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1025 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1026 if (flag == MAT_LOCAL) { 1027 if (nz) *nz = isend[0]; 1028 if (nzalloc) *nzalloc = isend[1]; 1029 if (mem) *mem = isend[2]; 1030 } else if (flag == MAT_GLOBAL_MAX) { 1031 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1032 if (nz) *nz = irecv[0]; 1033 if (nzalloc) *nzalloc = irecv[1]; 1034 if (mem) *mem = irecv[2]; 1035 } else if (flag == MAT_GLOBAL_SUM) { 1036 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1037 if (nz) *nz = irecv[0]; 1038 if (nzalloc) *nzalloc = irecv[1]; 1039 if (mem) *mem = irecv[2]; 1040 } 1041 return 0; 1042 } 1043 1044 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1045 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1046 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1047 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1048 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1049 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1050 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1051 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1052 1053 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1056 1057 if (op == NO_NEW_NONZERO_LOCATIONS || 1058 op == YES_NEW_NONZERO_LOCATIONS || 1059 op == COLUMNS_SORTED || 1060 op == ROW_ORIENTED) { 1061 MatSetOption(a->A,op); 1062 MatSetOption(a->B,op); 1063 } 1064 else if (op == ROWS_SORTED || 1065 op == SYMMETRIC_MATRIX || 1066 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1067 op == YES_NEW_DIAGONALS) 1068 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1069 else if (op == COLUMN_ORIENTED) { 1070 a->roworiented = 0; 1071 MatSetOption(a->A,op); 1072 MatSetOption(a->B,op); 1073 } 1074 else if (op == NO_NEW_DIAGONALS) 1075 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1076 else 1077 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1078 return 0; 1079 } 1080 1081 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 *m = mat->M; *n = mat->N; 1085 return 0; 1086 } 1087 1088 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1089 { 1090 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1091 *m = mat->m; *n = mat->N; 1092 return 0; 1093 } 1094 1095 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1096 { 1097 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1098 *m = mat->rstart; *n = mat->rend; 1099 return 0; 1100 } 1101 1102 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1103 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1104 1105 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1108 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1109 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1110 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1111 int *cmap, *idx_p; 1112 1113 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1114 mat->getrowactive = PETSC_TRUE; 1115 1116 if (!mat->rowvalues && (idx || v)) { 1117 /* 1118 allocate enough space to hold information from the longest row. 1119 */ 1120 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1121 int max = 1,n = mat->n,tmp; 1122 for ( i=0; i<n; i++ ) { 1123 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1124 if (max < tmp) { max = tmp; } 1125 } 1126 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1127 CHKPTRQ(mat->rowvalues); 1128 mat->rowindices = (int *) (mat->rowvalues + max); 1129 } 1130 1131 1132 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1133 lrow = row - rstart; 1134 1135 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1136 if (!v) {pvA = 0; pvB = 0;} 1137 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1138 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1139 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1140 nztot = nzA + nzB; 1141 1142 cmap = mat->garray; 1143 if (v || idx) { 1144 if (nztot) { 1145 /* Sort by increasing column numbers, assuming A and B already sorted */ 1146 int imark = -1; 1147 if (v) { 1148 *v = v_p = mat->rowvalues; 1149 for ( i=0; i<nzB; i++ ) { 1150 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1151 else break; 1152 } 1153 imark = i; 1154 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1155 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1156 } 1157 if (idx) { 1158 *idx = idx_p = mat->rowindices; 1159 if (imark > -1) { 1160 for ( i=0; i<imark; i++ ) { 1161 idx_p[i] = cmap[cworkB[i]]; 1162 } 1163 } else { 1164 for ( i=0; i<nzB; i++ ) { 1165 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1166 else break; 1167 } 1168 imark = i; 1169 } 1170 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1171 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1172 } 1173 } 1174 else {*idx = 0; *v=0;} 1175 } 1176 *nz = nztot; 1177 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1178 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1179 return 0; 1180 } 1181 1182 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1185 if (aij->getrowactive == PETSC_FALSE) { 1186 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1187 } 1188 aij->getrowactive = PETSC_FALSE; 1189 return 0; 1190 } 1191 1192 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1193 { 1194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1195 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1196 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1197 double sum = 0.0; 1198 Scalar *v; 1199 1200 if (aij->size == 1) { 1201 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1202 } else { 1203 if (type == NORM_FROBENIUS) { 1204 v = amat->a; 1205 for (i=0; i<amat->nz; i++ ) { 1206 #if defined(PETSC_COMPLEX) 1207 sum += real(conj(*v)*(*v)); v++; 1208 #else 1209 sum += (*v)*(*v); v++; 1210 #endif 1211 } 1212 v = bmat->a; 1213 for (i=0; i<bmat->nz; i++ ) { 1214 #if defined(PETSC_COMPLEX) 1215 sum += real(conj(*v)*(*v)); v++; 1216 #else 1217 sum += (*v)*(*v); v++; 1218 #endif 1219 } 1220 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1221 *norm = sqrt(*norm); 1222 } 1223 else if (type == NORM_1) { /* max column norm */ 1224 double *tmp, *tmp2; 1225 int *jj, *garray = aij->garray; 1226 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1227 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1228 PetscMemzero(tmp,aij->N*sizeof(double)); 1229 *norm = 0.0; 1230 v = amat->a; jj = amat->j; 1231 for ( j=0; j<amat->nz; j++ ) { 1232 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1233 } 1234 v = bmat->a; jj = bmat->j; 1235 for ( j=0; j<bmat->nz; j++ ) { 1236 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1237 } 1238 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1239 for ( j=0; j<aij->N; j++ ) { 1240 if (tmp2[j] > *norm) *norm = tmp2[j]; 1241 } 1242 PetscFree(tmp); PetscFree(tmp2); 1243 } 1244 else if (type == NORM_INFINITY) { /* max row norm */ 1245 double ntemp = 0.0; 1246 for ( j=0; j<amat->m; j++ ) { 1247 v = amat->a + amat->i[j] + shift; 1248 sum = 0.0; 1249 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1250 sum += PetscAbsScalar(*v); v++; 1251 } 1252 v = bmat->a + bmat->i[j] + shift; 1253 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1254 sum += PetscAbsScalar(*v); v++; 1255 } 1256 if (sum > ntemp) ntemp = sum; 1257 } 1258 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1259 } 1260 else { 1261 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1262 } 1263 } 1264 return 0; 1265 } 1266 1267 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1268 { 1269 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1270 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1271 int ierr,shift = Aloc->indexshift; 1272 Mat B; 1273 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1274 Scalar *array; 1275 1276 if (matout == PETSC_NULL && M != N) 1277 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1278 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1279 PETSC_NULL,&B); CHKERRQ(ierr); 1280 1281 /* copy over the A part */ 1282 Aloc = (Mat_SeqAIJ*) a->A->data; 1283 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1284 row = a->rstart; 1285 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1286 for ( i=0; i<m; i++ ) { 1287 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1288 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1289 } 1290 aj = Aloc->j; 1291 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1292 1293 /* copy over the B part */ 1294 Aloc = (Mat_SeqAIJ*) a->B->data; 1295 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1296 row = a->rstart; 1297 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1298 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1299 for ( i=0; i<m; i++ ) { 1300 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1301 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1302 } 1303 PetscFree(ct); 1304 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1305 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1306 if (matout != PETSC_NULL) { 1307 *matout = B; 1308 } else { 1309 /* This isn't really an in-place transpose .... but free data structures from a */ 1310 PetscFree(a->rowners); 1311 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1312 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1313 if (a->colmap) PetscFree(a->colmap); 1314 if (a->garray) PetscFree(a->garray); 1315 if (a->lvec) VecDestroy(a->lvec); 1316 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1317 PetscFree(a); 1318 PetscMemcpy(A,B,sizeof(struct _Mat)); 1319 PetscHeaderDestroy(B); 1320 } 1321 return 0; 1322 } 1323 1324 extern int MatPrintHelp_SeqAIJ(Mat); 1325 static int MatPrintHelp_MPIAIJ(Mat A) 1326 { 1327 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1328 1329 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1330 else return 0; 1331 } 1332 1333 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1334 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1335 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1336 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1337 /* -------------------------------------------------------------------*/ 1338 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1339 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1340 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1341 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1342 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1343 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1344 MatLUFactor_MPIAIJ,0, 1345 MatRelax_MPIAIJ, 1346 MatTranspose_MPIAIJ, 1347 MatGetInfo_MPIAIJ,0, 1348 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1349 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1350 0, 1351 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1352 MatGetReordering_MPIAIJ, 1353 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1354 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1355 MatILUFactorSymbolic_MPIAIJ,0, 1356 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1357 0,0,0, 1358 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1359 MatPrintHelp_MPIAIJ, 1360 MatScale_MPIAIJ}; 1361 1362 /*@C 1363 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1364 (the default parallel PETSc format). For good matrix assembly performance 1365 the user should preallocate the matrix storage by setting the parameters 1366 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1367 performance can be increased by more than a factor of 50. 1368 1369 Input Parameters: 1370 . comm - MPI communicator 1371 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1372 . n - number of local columns (or PETSC_DECIDE to have calculated 1373 if N is given) 1374 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1375 . N - number of global columns (or PETSC_DECIDE to have calculated 1376 if n is given) 1377 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1378 (same for all local rows) 1379 . d_nzz - array containing the number of nonzeros in the various rows of the 1380 diagonal portion of local submatrix (possibly different for each row) 1381 or PETSC_NULL. You must leave room for the diagonal entry even if 1382 it is zero. 1383 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1384 submatrix (same for all local rows). 1385 . o_nzz - array containing the number of nonzeros in the various rows of the 1386 off-diagonal portion of the local submatrix (possibly different for 1387 each row) or PETSC_NULL. 1388 1389 Output Parameter: 1390 . A - the matrix 1391 1392 Notes: 1393 The AIJ format (also called the Yale sparse matrix format or 1394 compressed row storage), is fully compatible with standard Fortran 77 1395 storage. That is, the stored row and column indices can begin at 1396 either one (as in Fortran) or zero. See the users manual for details. 1397 1398 The user MUST specify either the local or global matrix dimensions 1399 (possibly both). 1400 1401 By default, this format uses inodes (identical nodes) when possible. 1402 We search for consecutive rows with the same nonzero structure, thereby 1403 reusing matrix information to achieve increased efficiency. 1404 1405 Options Database Keys: 1406 $ -mat_aij_no_inode - Do not use inodes 1407 $ -mat_aij_inode_limit <limit> - Set inode limit. 1408 $ (max limit=5) 1409 $ -mat_aij_oneindex - Internally use indexing starting at 1 1410 $ rather than 0. Note: When calling MatSetValues(), 1411 $ the user still MUST index entries starting at 0! 1412 1413 Storage Information: 1414 For a square global matrix we define each processor's diagonal portion 1415 to be its local rows and the corresponding columns (a square submatrix); 1416 each processor's off-diagonal portion encompasses the remainder of the 1417 local matrix (a rectangular submatrix). 1418 1419 The user can specify preallocated storage for the diagonal part of 1420 the local submatrix with either d_nz or d_nnz (not both). Set 1421 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1422 memory allocation. Likewise, specify preallocated storage for the 1423 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1424 1425 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1426 the figure below we depict these three local rows and all columns (0-11). 1427 1428 $ 0 1 2 3 4 5 6 7 8 9 10 11 1429 $ ------------------- 1430 $ row 3 | o o o d d d o o o o o o 1431 $ row 4 | o o o d d d o o o o o o 1432 $ row 5 | o o o d d d o o o o o o 1433 $ ------------------- 1434 $ 1435 1436 Thus, any entries in the d locations are stored in the d (diagonal) 1437 submatrix, and any entries in the o locations are stored in the 1438 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1439 stored simply in the MATSEQAIJ format for compressed row storage. 1440 1441 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1442 and o_nz should indicate the number of nonzeros per row in the o matrix. 1443 In general, for PDE problems in which most nonzeros are near the diagonal, 1444 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1445 or you will get TERRIBLE performance, see the users' manual chapter on 1446 matrices and the file $(PETSC_DIR)/Performance. 1447 1448 .keywords: matrix, aij, compressed row, sparse, parallel 1449 1450 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1451 @*/ 1452 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1453 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1454 { 1455 Mat B; 1456 Mat_MPIAIJ *b; 1457 int ierr, i,sum[2],work[2]; 1458 1459 *A = 0; 1460 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1461 PLogObjectCreate(B); 1462 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1463 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1464 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1465 B->destroy = MatDestroy_MPIAIJ; 1466 B->view = MatView_MPIAIJ; 1467 B->factor = 0; 1468 B->assembled = PETSC_FALSE; 1469 1470 b->insertmode = NOT_SET_VALUES; 1471 MPI_Comm_rank(comm,&b->rank); 1472 MPI_Comm_size(comm,&b->size); 1473 1474 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1475 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1476 1477 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1478 work[0] = m; work[1] = n; 1479 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1480 if (M == PETSC_DECIDE) M = sum[0]; 1481 if (N == PETSC_DECIDE) N = sum[1]; 1482 } 1483 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1484 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1485 b->m = m; B->m = m; 1486 b->n = n; B->n = n; 1487 b->N = N; B->N = N; 1488 b->M = M; B->M = M; 1489 1490 /* build local table of row and column ownerships */ 1491 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1492 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1493 b->cowners = b->rowners + b->size + 1; 1494 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1495 b->rowners[0] = 0; 1496 for ( i=2; i<=b->size; i++ ) { 1497 b->rowners[i] += b->rowners[i-1]; 1498 } 1499 b->rstart = b->rowners[b->rank]; 1500 b->rend = b->rowners[b->rank+1]; 1501 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1502 b->cowners[0] = 0; 1503 for ( i=2; i<=b->size; i++ ) { 1504 b->cowners[i] += b->cowners[i-1]; 1505 } 1506 b->cstart = b->cowners[b->rank]; 1507 b->cend = b->cowners[b->rank+1]; 1508 1509 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1510 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1511 PLogObjectParent(B,b->A); 1512 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1513 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1514 PLogObjectParent(B,b->B); 1515 1516 /* build cache for off array entries formed */ 1517 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1518 b->colmap = 0; 1519 b->garray = 0; 1520 b->roworiented = 1; 1521 1522 /* stuff used for matrix vector multiply */ 1523 b->lvec = 0; 1524 b->Mvctx = 0; 1525 1526 /* stuff for MatGetRow() */ 1527 b->rowindices = 0; 1528 b->rowvalues = 0; 1529 b->getrowactive = PETSC_FALSE; 1530 1531 *A = B; 1532 return 0; 1533 } 1534 1535 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1536 { 1537 Mat mat; 1538 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1539 int ierr, len=0, flg; 1540 1541 *newmat = 0; 1542 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1543 PLogObjectCreate(mat); 1544 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1545 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1546 mat->destroy = MatDestroy_MPIAIJ; 1547 mat->view = MatView_MPIAIJ; 1548 mat->factor = matin->factor; 1549 mat->assembled = PETSC_TRUE; 1550 1551 a->m = mat->m = oldmat->m; 1552 a->n = mat->n = oldmat->n; 1553 a->M = mat->M = oldmat->M; 1554 a->N = mat->N = oldmat->N; 1555 1556 a->rstart = oldmat->rstart; 1557 a->rend = oldmat->rend; 1558 a->cstart = oldmat->cstart; 1559 a->cend = oldmat->cend; 1560 a->size = oldmat->size; 1561 a->rank = oldmat->rank; 1562 a->insertmode = NOT_SET_VALUES; 1563 a->rowvalues = 0; 1564 a->getrowactive = PETSC_FALSE; 1565 1566 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1567 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1568 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1569 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1570 if (oldmat->colmap) { 1571 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1572 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1573 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1574 } else a->colmap = 0; 1575 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1576 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1577 PLogObjectMemory(mat,len*sizeof(int)); 1578 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1579 } else a->garray = 0; 1580 1581 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1582 PLogObjectParent(mat,a->lvec); 1583 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1584 PLogObjectParent(mat,a->Mvctx); 1585 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1586 PLogObjectParent(mat,a->A); 1587 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1588 PLogObjectParent(mat,a->B); 1589 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1590 if (flg) { 1591 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1592 } 1593 *newmat = mat; 1594 return 0; 1595 } 1596 1597 #include "sys.h" 1598 1599 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1600 { 1601 Mat A; 1602 int i, nz, ierr, j,rstart, rend, fd; 1603 Scalar *vals,*svals; 1604 MPI_Comm comm = ((PetscObject)viewer)->comm; 1605 MPI_Status status; 1606 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1607 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1608 int tag = ((PetscObject)viewer)->tag; 1609 1610 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1611 if (!rank) { 1612 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1613 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1614 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1615 } 1616 1617 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1618 M = header[1]; N = header[2]; 1619 /* determine ownership of all rows */ 1620 m = M/size + ((M % size) > rank); 1621 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1622 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1623 rowners[0] = 0; 1624 for ( i=2; i<=size; i++ ) { 1625 rowners[i] += rowners[i-1]; 1626 } 1627 rstart = rowners[rank]; 1628 rend = rowners[rank+1]; 1629 1630 /* distribute row lengths to all processors */ 1631 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1632 offlens = ourlens + (rend-rstart); 1633 if (!rank) { 1634 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1635 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1636 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1637 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1638 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1639 PetscFree(sndcounts); 1640 } 1641 else { 1642 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1643 } 1644 1645 if (!rank) { 1646 /* calculate the number of nonzeros on each processor */ 1647 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1648 PetscMemzero(procsnz,size*sizeof(int)); 1649 for ( i=0; i<size; i++ ) { 1650 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1651 procsnz[i] += rowlengths[j]; 1652 } 1653 } 1654 PetscFree(rowlengths); 1655 1656 /* determine max buffer needed and allocate it */ 1657 maxnz = 0; 1658 for ( i=0; i<size; i++ ) { 1659 maxnz = PetscMax(maxnz,procsnz[i]); 1660 } 1661 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1662 1663 /* read in my part of the matrix column indices */ 1664 nz = procsnz[0]; 1665 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1666 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1667 1668 /* read in every one elses and ship off */ 1669 for ( i=1; i<size; i++ ) { 1670 nz = procsnz[i]; 1671 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1672 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1673 } 1674 PetscFree(cols); 1675 } 1676 else { 1677 /* determine buffer space needed for message */ 1678 nz = 0; 1679 for ( i=0; i<m; i++ ) { 1680 nz += ourlens[i]; 1681 } 1682 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1683 1684 /* receive message of column indices*/ 1685 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1686 MPI_Get_count(&status,MPI_INT,&maxnz); 1687 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1688 } 1689 1690 /* loop over local rows, determining number of off diagonal entries */ 1691 PetscMemzero(offlens,m*sizeof(int)); 1692 jj = 0; 1693 for ( i=0; i<m; i++ ) { 1694 for ( j=0; j<ourlens[i]; j++ ) { 1695 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1696 jj++; 1697 } 1698 } 1699 1700 /* create our matrix */ 1701 for ( i=0; i<m; i++ ) { 1702 ourlens[i] -= offlens[i]; 1703 } 1704 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1705 A = *newmat; 1706 MatSetOption(A,COLUMNS_SORTED); 1707 for ( i=0; i<m; i++ ) { 1708 ourlens[i] += offlens[i]; 1709 } 1710 1711 if (!rank) { 1712 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1713 1714 /* read in my part of the matrix numerical values */ 1715 nz = procsnz[0]; 1716 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1717 1718 /* insert into matrix */ 1719 jj = rstart; 1720 smycols = mycols; 1721 svals = vals; 1722 for ( i=0; i<m; i++ ) { 1723 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1724 smycols += ourlens[i]; 1725 svals += ourlens[i]; 1726 jj++; 1727 } 1728 1729 /* read in other processors and ship out */ 1730 for ( i=1; i<size; i++ ) { 1731 nz = procsnz[i]; 1732 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1733 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1734 } 1735 PetscFree(procsnz); 1736 } 1737 else { 1738 /* receive numeric values */ 1739 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1740 1741 /* receive message of values*/ 1742 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1743 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1744 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1745 1746 /* insert into matrix */ 1747 jj = rstart; 1748 smycols = mycols; 1749 svals = vals; 1750 for ( i=0; i<m; i++ ) { 1751 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1752 smycols += ourlens[i]; 1753 svals += ourlens[i]; 1754 jj++; 1755 } 1756 } 1757 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1758 1759 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1760 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1761 return 0; 1762 } 1763