1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.118 1996/01/31 17:34:55 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 return 0; 296 } 297 298 static int MatZeroEntries_MPIAIJ(Mat A) 299 { 300 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301 int ierr; 302 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 303 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 304 return 0; 305 } 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 314 { 315 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 319 int *rvalues,tag = A->tag,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 327 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 333 for ( i=0; i<N; i++ ) { 334 idx = rows[i]; 335 found = 0; 336 for ( j=0; j<size; j++ ) { 337 if (idx >= owners[j] && idx < owners[j+1]) { 338 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 339 } 340 } 341 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 342 } 343 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 344 345 /* inform other processors of number of messages and max length*/ 346 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 347 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 348 nrecvs = work[rank]; 349 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 350 nmax = work[rank]; 351 PetscFree(work); 352 353 /* post receives: */ 354 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nrecvs; i++ ) { 359 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 368 CHKPTRQ(send_waits); 369 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 370 starts[0] = 0; 371 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<N; i++ ) { 373 svalues[starts[owner[i]]++] = rows[i]; 374 } 375 ISRestoreIndices(is,&rows); 376 377 starts[0] = 0; 378 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); 386 387 base = owners[rank]; 388 389 /* wait on receives */ 390 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 391 source = lens + nrecvs; 392 count = nrecvs; slen = 0; 393 while (count) { 394 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 395 /* unpack receives into our local space */ 396 MPI_Get_count(&recv_status,MPI_INT,&n); 397 source[imdex] = recv_status.MPI_SOURCE; 398 lens[imdex] = n; 399 slen += n; 400 count--; 401 } 402 PetscFree(recv_waits); 403 404 /* move the data into the send scatter */ 405 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 406 count = 0; 407 for ( i=0; i<nrecvs; i++ ) { 408 values = rvalues + i*nmax; 409 for ( j=0; j<lens[i]; j++ ) { 410 lrows[count++] = values[j] - base; 411 } 412 } 413 PetscFree(rvalues); PetscFree(lens); 414 PetscFree(owner); PetscFree(nprocs); 415 416 /* actually zap the local rows */ 417 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418 PLogObjectParent(A,istmp); 419 PetscFree(lrows); 420 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 421 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 422 ierr = ISDestroy(istmp); CHKERRQ(ierr); 423 424 /* wait on sends */ 425 if (nsends) { 426 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 427 CHKPTRQ(send_status); 428 MPI_Waitall(nsends,send_waits,send_status); 429 PetscFree(send_status); 430 } 431 PetscFree(send_waits); PetscFree(svalues); 432 433 return 0; 434 } 435 436 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 437 { 438 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 439 int ierr; 440 441 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 443 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 445 return 0; 446 } 447 448 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451 int ierr; 452 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 454 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460 { 461 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462 int ierr; 463 464 /* do nondiagonal part */ 465 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466 /* send it on its way */ 467 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 468 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469 /* do local part */ 470 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471 /* receive remote parts: note this assumes the values are not actually */ 472 /* inserted in yy until the next line, which is true for my implementation*/ 473 /* but is not perhaps always true. */ 474 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 475 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476 return 0; 477 } 478 479 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482 int ierr; 483 484 /* do nondiagonal part */ 485 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486 /* send it on its way */ 487 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 488 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489 /* do local part */ 490 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491 /* receive remote parts: note this assumes the values are not actually */ 492 /* inserted in yy until the next line, which is true for my implementation*/ 493 /* but is not perhaps always true. */ 494 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 495 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496 return 0; 497 } 498 499 /* 500 This only works correctly for square matrices where the subblock A->A is the 501 diagonal block 502 */ 503 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 504 { 505 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506 return MatGetDiagonal(a->A,v); 507 } 508 509 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515 return 0; 516 } 517 518 static int MatDestroy_MPIAIJ(PetscObject obj) 519 { 520 Mat mat = (Mat) obj; 521 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 522 int ierr; 523 #if defined(PETSC_LOG) 524 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525 #endif 526 PetscFree(aij->rowners); 527 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 528 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 529 if (aij->colmap) PetscFree(aij->colmap); 530 if (aij->garray) PetscFree(aij->garray); 531 if (aij->lvec) VecDestroy(aij->lvec); 532 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 533 PetscFree(aij); 534 PLogObjectDestroy(mat); 535 PetscHeaderDestroy(mat); 536 return 0; 537 } 538 #include "draw.h" 539 #include "pinclude/pviewer.h" 540 541 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 int ierr; 545 546 if (aij->size == 1) { 547 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548 } 549 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550 return 0; 551 } 552 553 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554 { 555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557 int ierr, format,shift = C->indexshift,rank; 558 PetscObject vobj = (PetscObject) viewer; 559 FILE *fd; 560 561 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562 ierr = ViewerFileGetFormat_Private(viewer,&format); 563 if (format == FILE_FORMAT_INFO_DETAILED) { 564 int nz,nzalloc,mem; 565 MPI_Comm_rank(mat->comm,&rank); 566 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568 MPIU_Seq_begin(mat->comm,1); 569 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570 nzalloc,mem); 571 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 572 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 573 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 574 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575 fflush(fd); 576 MPIU_Seq_end(mat->comm,1); 577 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578 return 0; 579 } 580 else if (format == FILE_FORMAT_INFO) { 581 return 0; 582 } 583 } 584 585 if (vobj->type == ASCII_FILE_VIEWER) { 586 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 587 MPIU_Seq_begin(mat->comm,1); 588 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 589 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 590 aij->cend); 591 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 592 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593 fflush(fd); 594 MPIU_Seq_end(mat->comm,1); 595 } 596 else { 597 int size = aij->size; 598 rank = aij->rank; 599 if (size == 1) { 600 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 601 } 602 else { 603 /* assemble the entire matrix onto first processor. */ 604 Mat A; 605 Mat_SeqAIJ *Aloc; 606 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 607 Scalar *a; 608 609 if (!rank) { 610 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611 CHKERRQ(ierr); 612 } 613 else { 614 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615 CHKERRQ(ierr); 616 } 617 PLogObjectParent(mat,A); 618 619 /* copy over the A part */ 620 Aloc = (Mat_SeqAIJ*) aij->A->data; 621 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 622 row = aij->rstart; 623 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 624 for ( i=0; i<m; i++ ) { 625 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 626 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 627 } 628 aj = Aloc->j; 629 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 630 631 /* copy over the B part */ 632 Aloc = (Mat_SeqAIJ*) aij->B->data; 633 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 634 row = aij->rstart; 635 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 637 for ( i=0; i<m; i++ ) { 638 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 639 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 640 } 641 PetscFree(ct); 642 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 644 if (!rank) { 645 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 646 } 647 ierr = MatDestroy(A); CHKERRQ(ierr); 648 } 649 } 650 return 0; 651 } 652 653 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654 { 655 Mat mat = (Mat) obj; 656 int ierr; 657 PetscObject vobj = (PetscObject) viewer; 658 659 if (!viewer) { 660 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661 } 662 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665 } 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == DRAW_COOKIE) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->type == BINARY_FILE_VIEWER) { 676 return MatView_MPIAIJ_Binary(mat,viewer); 677 } 678 return 0; 679 } 680 681 extern int MatMarkDiag_SeqAIJ(Mat); 682 /* 683 This has to provide several versions. 684 685 1) per sequential 686 2) a) use only local smoothing updating outer values only once. 687 b) local smoothing updating outer values each inner iteration 688 3) color updating out values betwen colors. 689 */ 690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691 double fshift,int its,Vec xx) 692 { 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694 Mat AA = mat->A, BB = mat->B; 695 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 696 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 697 int ierr,*idx, *diag; 698 int n = mat->n, m = mat->m, i,shift = A->indexshift; 699 Vec tt; 700 701 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702 xs = x + shift; /* shift by one for index start of 1 */ 703 ls = ls + shift; 704 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705 diag = A->diag; 706 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 707 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708 } 709 if (flag & SOR_EISENSTAT) { 710 /* Let A = L + U + D; where L is lower trianglar, 711 U is upper triangular, E is diagonal; This routine applies 712 713 (L + E)^{-1} A (U + E)^{-1} 714 715 to a vector efficiently using Eisenstat's trick. This is for 716 the case of SSOR preconditioner, so E is D/omega where omega 717 is the relaxation factor. 718 */ 719 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720 PLogObjectParent(matin,tt); 721 VecGetArray(tt,&t); 722 scale = (2.0/omega) - 1.0; 723 /* x = (E + U)^{-1} b */ 724 VecSet(&zero,mat->lvec); 725 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 726 mat->Mvctx); CHKERRQ(ierr); 727 for ( i=m-1; i>-1; i-- ) { 728 n = A->i[i+1] - diag[i] - 1; 729 idx = A->j + diag[i] + !shift; 730 v = A->a + diag[i] + !shift; 731 sum = b[i]; 732 SPARSEDENSEMDOT(sum,xs,v,idx,n); 733 d = fshift + A->a[diag[i]+shift]; 734 n = B->i[i+1] - B->i[i]; 735 idx = B->j + B->i[i] + shift; 736 v = B->a + B->i[i] + shift; 737 SPARSEDENSEMDOT(sum,ls,v,idx,n); 738 x[i] = omega*(sum/d); 739 } 740 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 741 mat->Mvctx); CHKERRQ(ierr); 742 743 /* t = b - (2*E - D)x */ 744 v = A->a; 745 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746 747 /* t = (E + L)^{-1}t */ 748 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749 diag = A->diag; 750 VecSet(&zero,mat->lvec); 751 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 752 mat->Mvctx); CHKERRQ(ierr); 753 for ( i=0; i<m; i++ ) { 754 n = diag[i] - A->i[i]; 755 idx = A->j + A->i[i] + shift; 756 v = A->a + A->i[i] + shift; 757 sum = t[i]; 758 SPARSEDENSEMDOT(sum,ts,v,idx,n); 759 d = fshift + A->a[diag[i]+shift]; 760 n = B->i[i+1] - B->i[i]; 761 idx = B->j + B->i[i] + shift; 762 v = B->a + B->i[i] + shift; 763 SPARSEDENSEMDOT(sum,ls,v,idx,n); 764 t[i] = omega*(sum/d); 765 } 766 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 /* x = x + t */ 769 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770 VecDestroy(tt); 771 return 0; 772 } 773 774 775 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776 if (flag & SOR_ZERO_INITIAL_GUESS) { 777 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778 } 779 else { 780 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 781 CHKERRQ(ierr); 782 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 } 785 while (its--) { 786 /* go down through the rows */ 787 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 788 mat->Mvctx); CHKERRQ(ierr); 789 for ( i=0; i<m; i++ ) { 790 n = A->i[i+1] - A->i[i]; 791 idx = A->j + A->i[i] + shift; 792 v = A->a + A->i[i] + shift; 793 sum = b[i]; 794 SPARSEDENSEMDOT(sum,xs,v,idx,n); 795 d = fshift + A->a[diag[i]+shift]; 796 n = B->i[i+1] - B->i[i]; 797 idx = B->j + B->i[i] + shift; 798 v = B->a + B->i[i] + shift; 799 SPARSEDENSEMDOT(sum,ls,v,idx,n); 800 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801 } 802 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 803 mat->Mvctx); CHKERRQ(ierr); 804 /* come up through the rows */ 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=m-1; i>-1; i-- ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 821 mat->Mvctx); CHKERRQ(ierr); 822 } 823 } 824 else if (flag & SOR_FORWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 VecSet(&zero,mat->lvec); 827 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 828 mat->Mvctx); CHKERRQ(ierr); 829 for ( i=0; i<m; i++ ) { 830 n = diag[i] - A->i[i]; 831 idx = A->j + A->i[i] + shift; 832 v = A->a + A->i[i] + shift; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = fshift + A->a[diag[i]+shift]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] + shift; 838 v = B->a + B->i[i] + shift; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = omega*(sum/d); 841 } 842 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 843 mat->Mvctx); CHKERRQ(ierr); 844 its--; 845 } 846 while (its--) { 847 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 848 CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 850 CHKERRQ(ierr); 851 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 852 mat->Mvctx); CHKERRQ(ierr); 853 for ( i=0; i<m; i++ ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 867 mat->Mvctx); CHKERRQ(ierr); 868 } 869 } 870 else if (flag & SOR_BACKWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 VecSet(&zero,mat->lvec); 873 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 874 mat->Mvctx); CHKERRQ(ierr); 875 for ( i=m-1; i>-1; i-- ) { 876 n = A->i[i+1] - diag[i] - 1; 877 idx = A->j + diag[i] + !shift; 878 v = A->a + diag[i] + !shift; 879 sum = b[i]; 880 SPARSEDENSEMDOT(sum,xs,v,idx,n); 881 d = fshift + A->a[diag[i]+shift]; 882 n = B->i[i+1] - B->i[i]; 883 idx = B->j + B->i[i] + shift; 884 v = B->a + B->i[i] + shift; 885 SPARSEDENSEMDOT(sum,ls,v,idx,n); 886 x[i] = omega*(sum/d); 887 } 888 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 889 mat->Mvctx); CHKERRQ(ierr); 890 its--; 891 } 892 while (its--) { 893 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 894 mat->Mvctx); CHKERRQ(ierr); 895 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 896 mat->Mvctx); CHKERRQ(ierr); 897 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 898 mat->Mvctx); CHKERRQ(ierr); 899 for ( i=m-1; i>-1; i-- ) { 900 n = A->i[i+1] - A->i[i]; 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911 } 912 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 913 mat->Mvctx); CHKERRQ(ierr); 914 } 915 } 916 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917 if (flag & SOR_ZERO_INITIAL_GUESS) { 918 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919 } 920 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 921 CHKERRQ(ierr); 922 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 923 CHKERRQ(ierr); 924 while (its--) { 925 /* go down through the rows */ 926 for ( i=0; i<m; i++ ) { 927 n = A->i[i+1] - A->i[i]; 928 idx = A->j + A->i[i] + shift; 929 v = A->a + A->i[i] + shift; 930 sum = b[i]; 931 SPARSEDENSEMDOT(sum,xs,v,idx,n); 932 d = fshift + A->a[diag[i]+shift]; 933 n = B->i[i+1] - B->i[i]; 934 idx = B->j + B->i[i] + shift; 935 v = B->a + B->i[i] + shift; 936 SPARSEDENSEMDOT(sum,ls,v,idx,n); 937 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938 } 939 /* come up through the rows */ 940 for ( i=m-1; i>-1; i-- ) { 941 n = A->i[i+1] - A->i[i]; 942 idx = A->j + A->i[i] + shift; 943 v = A->a + A->i[i] + shift; 944 sum = b[i]; 945 SPARSEDENSEMDOT(sum,xs,v,idx,n); 946 d = fshift + A->a[diag[i]+shift]; 947 n = B->i[i+1] - B->i[i]; 948 idx = B->j + B->i[i] + shift; 949 v = B->a + B->i[i] + shift; 950 SPARSEDENSEMDOT(sum,ls,v,idx,n); 951 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952 } 953 } 954 } 955 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956 if (flag & SOR_ZERO_INITIAL_GUESS) { 957 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958 } 959 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 960 CHKERRQ(ierr); 961 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 962 CHKERRQ(ierr); 963 while (its--) { 964 for ( i=0; i<m; i++ ) { 965 n = A->i[i+1] - A->i[i]; 966 idx = A->j + A->i[i] + shift; 967 v = A->a + A->i[i] + shift; 968 sum = b[i]; 969 SPARSEDENSEMDOT(sum,xs,v,idx,n); 970 d = fshift + A->a[diag[i]+shift]; 971 n = B->i[i+1] - B->i[i]; 972 idx = B->j + B->i[i] + shift; 973 v = B->a + B->i[i] + shift; 974 SPARSEDENSEMDOT(sum,ls,v,idx,n); 975 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976 } 977 } 978 } 979 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980 if (flag & SOR_ZERO_INITIAL_GUESS) { 981 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982 } 983 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 984 mat->Mvctx); CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 986 mat->Mvctx); CHKERRQ(ierr); 987 while (its--) { 988 for ( i=m-1; i>-1; i-- ) { 989 n = A->i[i+1] - A->i[i]; 990 idx = A->j + A->i[i] + shift; 991 v = A->a + A->i[i] + shift; 992 sum = b[i]; 993 SPARSEDENSEMDOT(sum,xs,v,idx,n); 994 d = fshift + A->a[diag[i]+shift]; 995 n = B->i[i+1] - B->i[i]; 996 idx = B->j + B->i[i] + shift; 997 v = B->a + B->i[i] + shift; 998 SPARSEDENSEMDOT(sum,ls,v,idx,n); 999 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000 } 1001 } 1002 } 1003 return 0; 1004 } 1005 1006 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007 int *nzalloc,int *mem) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012 1013 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1014 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016 if (flag == MAT_LOCAL) { 1017 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018 } else if (flag == MAT_GLOBAL_MAX) { 1019 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021 } else if (flag == MAT_GLOBAL_SUM) { 1022 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024 } 1025 return 0; 1026 } 1027 1028 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036 1037 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040 1041 if (op == NO_NEW_NONZERO_LOCATIONS || 1042 op == YES_NEW_NONZERO_LOCATIONS || 1043 op == COLUMNS_SORTED || 1044 op == ROW_ORIENTED) { 1045 MatSetOption(a->A,op); 1046 MatSetOption(a->B,op); 1047 } 1048 else if (op == ROWS_SORTED || 1049 op == SYMMETRIC_MATRIX || 1050 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051 op == YES_NEW_DIAGONALS) 1052 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1053 else if (op == COLUMN_ORIENTED) { 1054 a->roworiented = 0; 1055 MatSetOption(a->A,op); 1056 MatSetOption(a->B,op); 1057 } 1058 else if (op == NO_NEW_DIAGONALS) 1059 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060 else 1061 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062 return 0; 1063 } 1064 1065 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066 { 1067 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068 *m = mat->M; *n = mat->N; 1069 return 0; 1070 } 1071 1072 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073 { 1074 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075 *m = mat->m; *n = mat->N; 1076 return 0; 1077 } 1078 1079 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082 *m = mat->rstart; *n = mat->rend; 1083 return 0; 1084 } 1085 1086 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1087 { 1088 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089 Scalar *vworkA, *vworkB, **pvA, **pvB; 1090 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1092 1093 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094 lrow = row - rstart; 1095 1096 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097 if (!v) {pvA = 0; pvB = 0;} 1098 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1099 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1100 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101 nztot = nzA + nzB; 1102 1103 if (v || idx) { 1104 if (nztot) { 1105 /* Sort by increasing column numbers, assuming A and B already sorted */ 1106 int imark; 1107 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108 if (v) { 1109 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1110 for ( i=0; i<nzB; i++ ) { 1111 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112 else break; 1113 } 1114 imark = i; 1115 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117 } 1118 if (idx) { 1119 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121 for ( i=0; i<nzB; i++ ) { 1122 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123 else break; 1124 } 1125 imark = i; 1126 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1128 } 1129 } 1130 else {*idx = 0; *v=0;} 1131 } 1132 *nz = nztot; 1133 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 return 0; 1136 } 1137 1138 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1139 { 1140 if (idx) PetscFree(*idx); 1141 if (v) PetscFree(*v); 1142 return 0; 1143 } 1144 1145 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150 double sum = 0.0; 1151 Scalar *v; 1152 1153 if (aij->size == 1) { 1154 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1155 } else { 1156 if (type == NORM_FROBENIUS) { 1157 v = amat->a; 1158 for (i=0; i<amat->nz; i++ ) { 1159 #if defined(PETSC_COMPLEX) 1160 sum += real(conj(*v)*(*v)); v++; 1161 #else 1162 sum += (*v)*(*v); v++; 1163 #endif 1164 } 1165 v = bmat->a; 1166 for (i=0; i<bmat->nz; i++ ) { 1167 #if defined(PETSC_COMPLEX) 1168 sum += real(conj(*v)*(*v)); v++; 1169 #else 1170 sum += (*v)*(*v); v++; 1171 #endif 1172 } 1173 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1174 *norm = sqrt(*norm); 1175 } 1176 else if (type == NORM_1) { /* max column norm */ 1177 double *tmp, *tmp2; 1178 int *jj, *garray = aij->garray; 1179 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1180 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181 PetscMemzero(tmp,aij->N*sizeof(double)); 1182 *norm = 0.0; 1183 v = amat->a; jj = amat->j; 1184 for ( j=0; j<amat->nz; j++ ) { 1185 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1186 } 1187 v = bmat->a; jj = bmat->j; 1188 for ( j=0; j<bmat->nz; j++ ) { 1189 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1190 } 1191 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1192 for ( j=0; j<aij->N; j++ ) { 1193 if (tmp2[j] > *norm) *norm = tmp2[j]; 1194 } 1195 PetscFree(tmp); PetscFree(tmp2); 1196 } 1197 else if (type == NORM_INFINITY) { /* max row norm */ 1198 double ntemp = 0.0; 1199 for ( j=0; j<amat->m; j++ ) { 1200 v = amat->a + amat->i[j] + shift; 1201 sum = 0.0; 1202 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203 sum += PetscAbsScalar(*v); v++; 1204 } 1205 v = bmat->a + bmat->i[j] + shift; 1206 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207 sum += PetscAbsScalar(*v); v++; 1208 } 1209 if (sum > ntemp) ntemp = sum; 1210 } 1211 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1212 } 1213 else { 1214 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1215 } 1216 } 1217 return 0; 1218 } 1219 1220 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224 int ierr,shift = Aloc->indexshift; 1225 Mat B; 1226 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227 Scalar *array; 1228 1229 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1230 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1231 PETSC_NULL,&B); CHKERRQ(ierr); 1232 1233 /* copy over the A part */ 1234 Aloc = (Mat_SeqAIJ*) a->A->data; 1235 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1236 row = a->rstart; 1237 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1238 for ( i=0; i<m; i++ ) { 1239 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1240 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1241 } 1242 aj = Aloc->j; 1243 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1244 1245 /* copy over the B part */ 1246 Aloc = (Mat_SeqAIJ*) a->B->data; 1247 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248 row = a->rstart; 1249 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1250 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1251 for ( i=0; i<m; i++ ) { 1252 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1253 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1254 } 1255 PetscFree(ct); 1256 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1257 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1258 if (matout) { 1259 *matout = B; 1260 } else { 1261 /* This isn't really an in-place transpose .... but free data structures from a */ 1262 PetscFree(a->rowners); 1263 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1264 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1265 if (a->colmap) PetscFree(a->colmap); 1266 if (a->garray) PetscFree(a->garray); 1267 if (a->lvec) VecDestroy(a->lvec); 1268 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1269 PetscFree(a); 1270 PetscMemcpy(A,B,sizeof(struct _Mat)); 1271 PetscHeaderDestroy(B); 1272 } 1273 return 0; 1274 } 1275 1276 extern int MatPrintHelp_SeqAIJ(Mat); 1277 static int MatPrintHelp_MPIAIJ(Mat A) 1278 { 1279 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1280 1281 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1282 else return 0; 1283 } 1284 1285 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1286 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1287 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1288 /* -------------------------------------------------------------------*/ 1289 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1290 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1291 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1292 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1293 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1294 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1295 MatLUFactor_MPIAIJ,0, 1296 MatRelax_MPIAIJ, 1297 MatTranspose_MPIAIJ, 1298 MatGetInfo_MPIAIJ,0, 1299 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1300 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1301 0, 1302 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1303 MatGetReordering_MPIAIJ, 1304 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1305 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1306 MatILUFactorSymbolic_MPIAIJ,0, 1307 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1308 0,0,0, 1309 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1310 MatPrintHelp_MPIAIJ, 1311 MatScale_MPIAIJ}; 1312 1313 /*@C 1314 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1315 (the default parallel PETSc format). For good performance you should 1316 preallocate the matrix storage by setting the parameters d_nz or d_nzz and 1317 o_nz or o_nzz. By setting these parameters accurately performance can 1318 be increased by more then a factor of 50. 1319 1320 Input Parameters: 1321 . comm - MPI communicator 1322 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1323 . n - number of local columns (or PETSC_DECIDE to have calculated 1324 if N is given) 1325 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1326 . N - number of global columns (or PETSC_DECIDE to have calculated 1327 if n is given) 1328 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1329 (same for all local rows) 1330 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1331 or null (possibly different for each row). You must leave room 1332 for the diagonal entry even if it is zero. 1333 . o_nz - number of nonzeros per row in off-diagonal portion of local 1334 submatrix (same for all local rows). 1335 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1336 submatrix or null (possibly different for each row). 1337 1338 Output Parameter: 1339 . newmat - the matrix 1340 1341 Notes: 1342 The AIJ format (also called the Yale sparse matrix format or 1343 compressed row storage), is fully compatible with standard Fortran 77 1344 storage. That is, the stored row and column indices can begin at 1345 either one (as in Fortran) or zero. See the users manual for details. 1346 1347 The user MUST specify either the local or global matrix dimensions 1348 (possibly both). 1349 1350 Storage Information: 1351 For a square global matrix we define each processor's diagonal portion 1352 to be its local rows and the corresponding columns (a square submatrix); 1353 each processor's off-diagonal portion encompasses the remainder of the 1354 local matrix (a rectangular submatrix). 1355 1356 The user can specify preallocated storage for the diagonal part of 1357 the local submatrix with either d_nz or d_nnz (not both). Set 1358 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1359 memory allocation. Likewise, specify preallocated storage for the 1360 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1361 1362 Consider a processor that owns rows 3, 4 and 5 (in this figure we depict 1363 those three rows and all columns (0-11). 1364 1365 $ 0 1 2 3 4 5 6 7 8 9 10 11 1366 $ ------------------- 1367 $ 3 | o o o d d d o o o o o o 1368 $ 4 | o o o d d d o o o o o o 1369 $ 5 | o o o d d d o o o o o o 1370 $ 1371 Thus any entries in the d locations are stored in the d matrix and 1372 any in the o locations are store in the o matrix. (The d and the o matrix 1373 are simply SeqAIJ matrix (that is they store the matrices in compress 1374 row storage)). 1375 1376 Now d_nz should give the number of non zeros per row in the d matrix 1377 and o_nz should give the number of nonzeros per row in the o matrix. 1378 In general for PDE problems where most nonzeros are near the diagonal 1379 one expects d_nz >> o_nz. See the users manual chapter on matrices for 1380 more details. 1381 1382 By default, this format uses inodes (identical nodes) when possible. 1383 We search for consecutive rows with the same nonzero structure, thereby 1384 reusing matrix information to achieve increased efficiency. 1385 1386 Options Database Keys: 1387 $ -mat_aij_no_inode - Do not use inodes 1388 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1389 1390 .keywords: matrix, aij, compressed row, sparse, parallel 1391 1392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1393 @*/ 1394 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1395 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1396 { 1397 Mat mat; 1398 Mat_MPIAIJ *a; 1399 int ierr, i,sum[2],work[2]; 1400 1401 *newmat = 0; 1402 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1403 PLogObjectCreate(mat); 1404 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1405 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1406 mat->destroy = MatDestroy_MPIAIJ; 1407 mat->view = MatView_MPIAIJ; 1408 mat->factor = 0; 1409 mat->assembled = PETSC_FALSE; 1410 1411 a->insertmode = NOT_SET_VALUES; 1412 MPI_Comm_rank(comm,&a->rank); 1413 MPI_Comm_size(comm,&a->size); 1414 1415 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1416 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1417 1418 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1419 work[0] = m; work[1] = n; 1420 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1421 if (M == PETSC_DECIDE) M = sum[0]; 1422 if (N == PETSC_DECIDE) N = sum[1]; 1423 } 1424 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1425 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1426 a->m = m; 1427 a->n = n; 1428 a->N = N; 1429 a->M = M; 1430 1431 /* build local table of row and column ownerships */ 1432 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1433 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1434 a->cowners = a->rowners + a->size + 1; 1435 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1436 a->rowners[0] = 0; 1437 for ( i=2; i<=a->size; i++ ) { 1438 a->rowners[i] += a->rowners[i-1]; 1439 } 1440 a->rstart = a->rowners[a->rank]; 1441 a->rend = a->rowners[a->rank+1]; 1442 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1443 a->cowners[0] = 0; 1444 for ( i=2; i<=a->size; i++ ) { 1445 a->cowners[i] += a->cowners[i-1]; 1446 } 1447 a->cstart = a->cowners[a->rank]; 1448 a->cend = a->cowners[a->rank+1]; 1449 1450 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1451 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1452 PLogObjectParent(mat,a->A); 1453 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1454 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1455 PLogObjectParent(mat,a->B); 1456 1457 /* build cache for off array entries formed */ 1458 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1459 a->colmap = 0; 1460 a->garray = 0; 1461 a->roworiented = 1; 1462 1463 /* stuff used for matrix vector multiply */ 1464 a->lvec = 0; 1465 a->Mvctx = 0; 1466 1467 *newmat = mat; 1468 return 0; 1469 } 1470 1471 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1472 { 1473 Mat mat; 1474 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1475 int ierr, len,flg; 1476 1477 *newmat = 0; 1478 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1479 PLogObjectCreate(mat); 1480 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1481 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1482 mat->destroy = MatDestroy_MPIAIJ; 1483 mat->view = MatView_MPIAIJ; 1484 mat->factor = matin->factor; 1485 mat->assembled = PETSC_TRUE; 1486 1487 a->m = oldmat->m; 1488 a->n = oldmat->n; 1489 a->M = oldmat->M; 1490 a->N = oldmat->N; 1491 1492 a->rstart = oldmat->rstart; 1493 a->rend = oldmat->rend; 1494 a->cstart = oldmat->cstart; 1495 a->cend = oldmat->cend; 1496 a->size = oldmat->size; 1497 a->rank = oldmat->rank; 1498 a->insertmode = NOT_SET_VALUES; 1499 1500 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1501 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1502 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1503 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1504 if (oldmat->colmap) { 1505 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1506 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1507 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1508 } else a->colmap = 0; 1509 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1510 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1511 PLogObjectMemory(mat,len*sizeof(int)); 1512 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1513 } else a->garray = 0; 1514 1515 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1516 PLogObjectParent(mat,a->lvec); 1517 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1518 PLogObjectParent(mat,a->Mvctx); 1519 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1520 PLogObjectParent(mat,a->A); 1521 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1522 PLogObjectParent(mat,a->B); 1523 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1524 if (flg) { 1525 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1526 } 1527 *newmat = mat; 1528 return 0; 1529 } 1530 1531 #include "sysio.h" 1532 1533 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1534 { 1535 Mat A; 1536 int i, nz, ierr, j,rstart, rend, fd; 1537 Scalar *vals,*svals; 1538 PetscObject vobj = (PetscObject) bview; 1539 MPI_Comm comm = vobj->comm; 1540 MPI_Status status; 1541 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1542 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1543 int tag = ((PetscObject)bview)->tag; 1544 1545 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1546 if (!rank) { 1547 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1548 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1549 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1550 } 1551 1552 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1553 M = header[1]; N = header[2]; 1554 /* determine ownership of all rows */ 1555 m = M/size + ((M % size) > rank); 1556 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1557 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1558 rowners[0] = 0; 1559 for ( i=2; i<=size; i++ ) { 1560 rowners[i] += rowners[i-1]; 1561 } 1562 rstart = rowners[rank]; 1563 rend = rowners[rank+1]; 1564 1565 /* distribute row lengths to all processors */ 1566 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1567 offlens = ourlens + (rend-rstart); 1568 if (!rank) { 1569 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1570 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1571 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1572 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1573 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1574 PetscFree(sndcounts); 1575 } 1576 else { 1577 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1578 } 1579 1580 if (!rank) { 1581 /* calculate the number of nonzeros on each processor */ 1582 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1583 PetscMemzero(procsnz,size*sizeof(int)); 1584 for ( i=0; i<size; i++ ) { 1585 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1586 procsnz[i] += rowlengths[j]; 1587 } 1588 } 1589 PetscFree(rowlengths); 1590 1591 /* determine max buffer needed and allocate it */ 1592 maxnz = 0; 1593 for ( i=0; i<size; i++ ) { 1594 maxnz = PetscMax(maxnz,procsnz[i]); 1595 } 1596 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1597 1598 /* read in my part of the matrix column indices */ 1599 nz = procsnz[0]; 1600 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1601 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1602 1603 /* read in every one elses and ship off */ 1604 for ( i=1; i<size; i++ ) { 1605 nz = procsnz[i]; 1606 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1607 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1608 } 1609 PetscFree(cols); 1610 } 1611 else { 1612 /* determine buffer space needed for message */ 1613 nz = 0; 1614 for ( i=0; i<m; i++ ) { 1615 nz += ourlens[i]; 1616 } 1617 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1618 1619 /* receive message of column indices*/ 1620 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1621 MPI_Get_count(&status,MPI_INT,&maxnz); 1622 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1623 } 1624 1625 /* loop over local rows, determining number of off diagonal entries */ 1626 PetscMemzero(offlens,m*sizeof(int)); 1627 jj = 0; 1628 for ( i=0; i<m; i++ ) { 1629 for ( j=0; j<ourlens[i]; j++ ) { 1630 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1631 jj++; 1632 } 1633 } 1634 1635 /* create our matrix */ 1636 for ( i=0; i<m; i++ ) { 1637 ourlens[i] -= offlens[i]; 1638 } 1639 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1640 A = *newmat; 1641 MatSetOption(A,COLUMNS_SORTED); 1642 for ( i=0; i<m; i++ ) { 1643 ourlens[i] += offlens[i]; 1644 } 1645 1646 if (!rank) { 1647 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1648 1649 /* read in my part of the matrix numerical values */ 1650 nz = procsnz[0]; 1651 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1652 1653 /* insert into matrix */ 1654 jj = rstart; 1655 smycols = mycols; 1656 svals = vals; 1657 for ( i=0; i<m; i++ ) { 1658 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1659 smycols += ourlens[i]; 1660 svals += ourlens[i]; 1661 jj++; 1662 } 1663 1664 /* read in other processors and ship out */ 1665 for ( i=1; i<size; i++ ) { 1666 nz = procsnz[i]; 1667 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1668 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1669 } 1670 PetscFree(procsnz); 1671 } 1672 else { 1673 /* receive numeric values */ 1674 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1675 1676 /* receive message of values*/ 1677 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1678 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1679 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1680 1681 /* insert into matrix */ 1682 jj = rstart; 1683 smycols = mycols; 1684 svals = vals; 1685 for ( i=0; i<m; i++ ) { 1686 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1687 smycols += ourlens[i]; 1688 svals += ourlens[i]; 1689 jj++; 1690 } 1691 } 1692 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1693 1694 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1695 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1696 return 0; 1697 } 1698