1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.125 1996/02/09 15:00:36 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 return 0; 296 } 297 298 static int MatZeroEntries_MPIAIJ(Mat A) 299 { 300 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301 int ierr; 302 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 303 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 304 return 0; 305 } 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 314 { 315 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 319 int *rvalues,tag = A->tag,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 327 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 333 for ( i=0; i<N; i++ ) { 334 idx = rows[i]; 335 found = 0; 336 for ( j=0; j<size; j++ ) { 337 if (idx >= owners[j] && idx < owners[j+1]) { 338 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 339 } 340 } 341 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 342 } 343 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 344 345 /* inform other processors of number of messages and max length*/ 346 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 347 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 348 nrecvs = work[rank]; 349 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 350 nmax = work[rank]; 351 PetscFree(work); 352 353 /* post receives: */ 354 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nrecvs; i++ ) { 359 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 368 CHKPTRQ(send_waits); 369 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 370 starts[0] = 0; 371 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<N; i++ ) { 373 svalues[starts[owner[i]]++] = rows[i]; 374 } 375 ISRestoreIndices(is,&rows); 376 377 starts[0] = 0; 378 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); 386 387 base = owners[rank]; 388 389 /* wait on receives */ 390 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 391 source = lens + nrecvs; 392 count = nrecvs; slen = 0; 393 while (count) { 394 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 395 /* unpack receives into our local space */ 396 MPI_Get_count(&recv_status,MPI_INT,&n); 397 source[imdex] = recv_status.MPI_SOURCE; 398 lens[imdex] = n; 399 slen += n; 400 count--; 401 } 402 PetscFree(recv_waits); 403 404 /* move the data into the send scatter */ 405 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 406 count = 0; 407 for ( i=0; i<nrecvs; i++ ) { 408 values = rvalues + i*nmax; 409 for ( j=0; j<lens[i]; j++ ) { 410 lrows[count++] = values[j] - base; 411 } 412 } 413 PetscFree(rvalues); PetscFree(lens); 414 PetscFree(owner); PetscFree(nprocs); 415 416 /* actually zap the local rows */ 417 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418 PLogObjectParent(A,istmp); 419 PetscFree(lrows); 420 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 421 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 422 ierr = ISDestroy(istmp); CHKERRQ(ierr); 423 424 /* wait on sends */ 425 if (nsends) { 426 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 427 CHKPTRQ(send_status); 428 MPI_Waitall(nsends,send_waits,send_status); 429 PetscFree(send_status); 430 } 431 PetscFree(send_waits); PetscFree(svalues); 432 433 return 0; 434 } 435 436 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 437 { 438 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 439 int ierr; 440 441 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 443 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 445 return 0; 446 } 447 448 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451 int ierr; 452 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 454 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460 { 461 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462 int ierr; 463 464 /* do nondiagonal part */ 465 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466 /* send it on its way */ 467 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 468 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469 /* do local part */ 470 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471 /* receive remote parts: note this assumes the values are not actually */ 472 /* inserted in yy until the next line, which is true for my implementation*/ 473 /* but is not perhaps always true. */ 474 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 475 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476 return 0; 477 } 478 479 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482 int ierr; 483 484 /* do nondiagonal part */ 485 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486 /* send it on its way */ 487 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 488 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489 /* do local part */ 490 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491 /* receive remote parts: note this assumes the values are not actually */ 492 /* inserted in yy until the next line, which is true for my implementation*/ 493 /* but is not perhaps always true. */ 494 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 495 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496 return 0; 497 } 498 499 /* 500 This only works correctly for square matrices where the subblock A->A is the 501 diagonal block 502 */ 503 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 504 { 505 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506 return MatGetDiagonal(a->A,v); 507 } 508 509 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515 return 0; 516 } 517 518 static int MatDestroy_MPIAIJ(PetscObject obj) 519 { 520 Mat mat = (Mat) obj; 521 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 522 int ierr; 523 #if defined(PETSC_LOG) 524 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525 #endif 526 PetscFree(aij->rowners); 527 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 528 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 529 if (aij->colmap) PetscFree(aij->colmap); 530 if (aij->garray) PetscFree(aij->garray); 531 if (aij->lvec) VecDestroy(aij->lvec); 532 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 533 PetscFree(aij); 534 PLogObjectDestroy(mat); 535 PetscHeaderDestroy(mat); 536 return 0; 537 } 538 #include "draw.h" 539 #include "pinclude/pviewer.h" 540 541 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 int ierr; 545 546 if (aij->size == 1) { 547 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548 } 549 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550 return 0; 551 } 552 553 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554 { 555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557 int ierr, format,shift = C->indexshift,rank; 558 PetscObject vobj = (PetscObject) viewer; 559 FILE *fd; 560 561 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562 ierr = ViewerFileGetFormat_Private(viewer,&format); 563 if (format == FILE_FORMAT_INFO_DETAILED) { 564 int nz,nzalloc,mem; 565 MPI_Comm_rank(mat->comm,&rank); 566 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568 MPIU_Seq_begin(mat->comm,1); 569 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570 nzalloc,mem); 571 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 572 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 573 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 574 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575 fflush(fd); 576 MPIU_Seq_end(mat->comm,1); 577 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578 return 0; 579 } 580 else if (format == FILE_FORMAT_INFO) { 581 return 0; 582 } 583 } 584 585 if (vobj->type == ASCII_FILE_VIEWER) { 586 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 587 MPIU_Seq_begin(mat->comm,1); 588 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 589 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 590 aij->cend); 591 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 592 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593 fflush(fd); 594 MPIU_Seq_end(mat->comm,1); 595 } 596 else { 597 int size = aij->size; 598 rank = aij->rank; 599 if (size == 1) { 600 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 601 } 602 else { 603 /* assemble the entire matrix onto first processor. */ 604 Mat A; 605 Mat_SeqAIJ *Aloc; 606 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 607 Scalar *a; 608 609 if (!rank) { 610 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611 CHKERRQ(ierr); 612 } 613 else { 614 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615 CHKERRQ(ierr); 616 } 617 PLogObjectParent(mat,A); 618 619 /* copy over the A part */ 620 Aloc = (Mat_SeqAIJ*) aij->A->data; 621 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 622 row = aij->rstart; 623 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 624 for ( i=0; i<m; i++ ) { 625 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 626 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 627 } 628 aj = Aloc->j; 629 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 630 631 /* copy over the B part */ 632 Aloc = (Mat_SeqAIJ*) aij->B->data; 633 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 634 row = aij->rstart; 635 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 637 for ( i=0; i<m; i++ ) { 638 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 639 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 640 } 641 PetscFree(ct); 642 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 644 if (!rank) { 645 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 646 } 647 ierr = MatDestroy(A); CHKERRQ(ierr); 648 } 649 } 650 return 0; 651 } 652 653 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654 { 655 Mat mat = (Mat) obj; 656 int ierr; 657 PetscObject vobj = (PetscObject) viewer; 658 659 if (!viewer) { 660 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661 } 662 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665 } 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == DRAW_COOKIE) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->type == BINARY_FILE_VIEWER) { 676 return MatView_MPIAIJ_Binary(mat,viewer); 677 } 678 return 0; 679 } 680 681 extern int MatMarkDiag_SeqAIJ(Mat); 682 /* 683 This has to provide several versions. 684 685 1) per sequential 686 2) a) use only local smoothing updating outer values only once. 687 b) local smoothing updating outer values each inner iteration 688 3) color updating out values betwen colors. 689 */ 690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691 double fshift,int its,Vec xx) 692 { 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694 Mat AA = mat->A, BB = mat->B; 695 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 696 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 697 int ierr,*idx, *diag; 698 int n = mat->n, m = mat->m, i,shift = A->indexshift; 699 Vec tt; 700 701 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702 xs = x + shift; /* shift by one for index start of 1 */ 703 ls = ls + shift; 704 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705 diag = A->diag; 706 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 707 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708 } 709 if (flag & SOR_EISENSTAT) { 710 /* Let A = L + U + D; where L is lower trianglar, 711 U is upper triangular, E is diagonal; This routine applies 712 713 (L + E)^{-1} A (U + E)^{-1} 714 715 to a vector efficiently using Eisenstat's trick. This is for 716 the case of SSOR preconditioner, so E is D/omega where omega 717 is the relaxation factor. 718 */ 719 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720 PLogObjectParent(matin,tt); 721 VecGetArray(tt,&t); 722 scale = (2.0/omega) - 1.0; 723 /* x = (E + U)^{-1} b */ 724 VecSet(&zero,mat->lvec); 725 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 726 mat->Mvctx); CHKERRQ(ierr); 727 for ( i=m-1; i>-1; i-- ) { 728 n = A->i[i+1] - diag[i] - 1; 729 idx = A->j + diag[i] + !shift; 730 v = A->a + diag[i] + !shift; 731 sum = b[i]; 732 SPARSEDENSEMDOT(sum,xs,v,idx,n); 733 d = fshift + A->a[diag[i]+shift]; 734 n = B->i[i+1] - B->i[i]; 735 idx = B->j + B->i[i] + shift; 736 v = B->a + B->i[i] + shift; 737 SPARSEDENSEMDOT(sum,ls,v,idx,n); 738 x[i] = omega*(sum/d); 739 } 740 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 741 mat->Mvctx); CHKERRQ(ierr); 742 743 /* t = b - (2*E - D)x */ 744 v = A->a; 745 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746 747 /* t = (E + L)^{-1}t */ 748 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749 diag = A->diag; 750 VecSet(&zero,mat->lvec); 751 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 752 mat->Mvctx); CHKERRQ(ierr); 753 for ( i=0; i<m; i++ ) { 754 n = diag[i] - A->i[i]; 755 idx = A->j + A->i[i] + shift; 756 v = A->a + A->i[i] + shift; 757 sum = t[i]; 758 SPARSEDENSEMDOT(sum,ts,v,idx,n); 759 d = fshift + A->a[diag[i]+shift]; 760 n = B->i[i+1] - B->i[i]; 761 idx = B->j + B->i[i] + shift; 762 v = B->a + B->i[i] + shift; 763 SPARSEDENSEMDOT(sum,ls,v,idx,n); 764 t[i] = omega*(sum/d); 765 } 766 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 /* x = x + t */ 769 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770 VecDestroy(tt); 771 return 0; 772 } 773 774 775 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776 if (flag & SOR_ZERO_INITIAL_GUESS) { 777 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778 } 779 else { 780 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 781 CHKERRQ(ierr); 782 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 } 785 while (its--) { 786 /* go down through the rows */ 787 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 788 mat->Mvctx); CHKERRQ(ierr); 789 for ( i=0; i<m; i++ ) { 790 n = A->i[i+1] - A->i[i]; 791 idx = A->j + A->i[i] + shift; 792 v = A->a + A->i[i] + shift; 793 sum = b[i]; 794 SPARSEDENSEMDOT(sum,xs,v,idx,n); 795 d = fshift + A->a[diag[i]+shift]; 796 n = B->i[i+1] - B->i[i]; 797 idx = B->j + B->i[i] + shift; 798 v = B->a + B->i[i] + shift; 799 SPARSEDENSEMDOT(sum,ls,v,idx,n); 800 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801 } 802 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 803 mat->Mvctx); CHKERRQ(ierr); 804 /* come up through the rows */ 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=m-1; i>-1; i-- ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 821 mat->Mvctx); CHKERRQ(ierr); 822 } 823 } 824 else if (flag & SOR_FORWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 VecSet(&zero,mat->lvec); 827 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 828 mat->Mvctx); CHKERRQ(ierr); 829 for ( i=0; i<m; i++ ) { 830 n = diag[i] - A->i[i]; 831 idx = A->j + A->i[i] + shift; 832 v = A->a + A->i[i] + shift; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = fshift + A->a[diag[i]+shift]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] + shift; 838 v = B->a + B->i[i] + shift; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = omega*(sum/d); 841 } 842 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 843 mat->Mvctx); CHKERRQ(ierr); 844 its--; 845 } 846 while (its--) { 847 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 848 CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 850 CHKERRQ(ierr); 851 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 852 mat->Mvctx); CHKERRQ(ierr); 853 for ( i=0; i<m; i++ ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 867 mat->Mvctx); CHKERRQ(ierr); 868 } 869 } 870 else if (flag & SOR_BACKWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 VecSet(&zero,mat->lvec); 873 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 874 mat->Mvctx); CHKERRQ(ierr); 875 for ( i=m-1; i>-1; i-- ) { 876 n = A->i[i+1] - diag[i] - 1; 877 idx = A->j + diag[i] + !shift; 878 v = A->a + diag[i] + !shift; 879 sum = b[i]; 880 SPARSEDENSEMDOT(sum,xs,v,idx,n); 881 d = fshift + A->a[diag[i]+shift]; 882 n = B->i[i+1] - B->i[i]; 883 idx = B->j + B->i[i] + shift; 884 v = B->a + B->i[i] + shift; 885 SPARSEDENSEMDOT(sum,ls,v,idx,n); 886 x[i] = omega*(sum/d); 887 } 888 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 889 mat->Mvctx); CHKERRQ(ierr); 890 its--; 891 } 892 while (its--) { 893 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 894 mat->Mvctx); CHKERRQ(ierr); 895 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 896 mat->Mvctx); CHKERRQ(ierr); 897 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 898 mat->Mvctx); CHKERRQ(ierr); 899 for ( i=m-1; i>-1; i-- ) { 900 n = A->i[i+1] - A->i[i]; 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911 } 912 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 913 mat->Mvctx); CHKERRQ(ierr); 914 } 915 } 916 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917 if (flag & SOR_ZERO_INITIAL_GUESS) { 918 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919 } 920 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 921 CHKERRQ(ierr); 922 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 923 CHKERRQ(ierr); 924 while (its--) { 925 /* go down through the rows */ 926 for ( i=0; i<m; i++ ) { 927 n = A->i[i+1] - A->i[i]; 928 idx = A->j + A->i[i] + shift; 929 v = A->a + A->i[i] + shift; 930 sum = b[i]; 931 SPARSEDENSEMDOT(sum,xs,v,idx,n); 932 d = fshift + A->a[diag[i]+shift]; 933 n = B->i[i+1] - B->i[i]; 934 idx = B->j + B->i[i] + shift; 935 v = B->a + B->i[i] + shift; 936 SPARSEDENSEMDOT(sum,ls,v,idx,n); 937 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938 } 939 /* come up through the rows */ 940 for ( i=m-1; i>-1; i-- ) { 941 n = A->i[i+1] - A->i[i]; 942 idx = A->j + A->i[i] + shift; 943 v = A->a + A->i[i] + shift; 944 sum = b[i]; 945 SPARSEDENSEMDOT(sum,xs,v,idx,n); 946 d = fshift + A->a[diag[i]+shift]; 947 n = B->i[i+1] - B->i[i]; 948 idx = B->j + B->i[i] + shift; 949 v = B->a + B->i[i] + shift; 950 SPARSEDENSEMDOT(sum,ls,v,idx,n); 951 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952 } 953 } 954 } 955 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956 if (flag & SOR_ZERO_INITIAL_GUESS) { 957 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958 } 959 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 960 CHKERRQ(ierr); 961 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 962 CHKERRQ(ierr); 963 while (its--) { 964 for ( i=0; i<m; i++ ) { 965 n = A->i[i+1] - A->i[i]; 966 idx = A->j + A->i[i] + shift; 967 v = A->a + A->i[i] + shift; 968 sum = b[i]; 969 SPARSEDENSEMDOT(sum,xs,v,idx,n); 970 d = fshift + A->a[diag[i]+shift]; 971 n = B->i[i+1] - B->i[i]; 972 idx = B->j + B->i[i] + shift; 973 v = B->a + B->i[i] + shift; 974 SPARSEDENSEMDOT(sum,ls,v,idx,n); 975 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976 } 977 } 978 } 979 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980 if (flag & SOR_ZERO_INITIAL_GUESS) { 981 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982 } 983 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 984 mat->Mvctx); CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 986 mat->Mvctx); CHKERRQ(ierr); 987 while (its--) { 988 for ( i=m-1; i>-1; i-- ) { 989 n = A->i[i+1] - A->i[i]; 990 idx = A->j + A->i[i] + shift; 991 v = A->a + A->i[i] + shift; 992 sum = b[i]; 993 SPARSEDENSEMDOT(sum,xs,v,idx,n); 994 d = fshift + A->a[diag[i]+shift]; 995 n = B->i[i+1] - B->i[i]; 996 idx = B->j + B->i[i] + shift; 997 v = B->a + B->i[i] + shift; 998 SPARSEDENSEMDOT(sum,ls,v,idx,n); 999 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000 } 1001 } 1002 } 1003 return 0; 1004 } 1005 1006 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007 int *nzalloc,int *mem) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012 1013 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1014 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016 if (flag == MAT_LOCAL) { 1017 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018 } else if (flag == MAT_GLOBAL_MAX) { 1019 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021 } else if (flag == MAT_GLOBAL_SUM) { 1022 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024 } 1025 return 0; 1026 } 1027 1028 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036 1037 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040 1041 if (op == NO_NEW_NONZERO_LOCATIONS || 1042 op == YES_NEW_NONZERO_LOCATIONS || 1043 op == COLUMNS_SORTED || 1044 op == ROW_ORIENTED) { 1045 MatSetOption(a->A,op); 1046 MatSetOption(a->B,op); 1047 } 1048 else if (op == ROWS_SORTED || 1049 op == SYMMETRIC_MATRIX || 1050 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051 op == YES_NEW_DIAGONALS) 1052 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1053 else if (op == COLUMN_ORIENTED) { 1054 a->roworiented = 0; 1055 MatSetOption(a->A,op); 1056 MatSetOption(a->B,op); 1057 } 1058 else if (op == NO_NEW_DIAGONALS) 1059 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060 else 1061 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062 return 0; 1063 } 1064 1065 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066 { 1067 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068 *m = mat->M; *n = mat->N; 1069 return 0; 1070 } 1071 1072 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073 { 1074 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075 *m = mat->m; *n = mat->N; 1076 return 0; 1077 } 1078 1079 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082 *m = mat->rstart; *n = mat->rend; 1083 return 0; 1084 } 1085 1086 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1087 { 1088 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089 Scalar *vworkA, *vworkB, **pvA, **pvB; 1090 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1092 1093 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094 lrow = row - rstart; 1095 1096 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097 if (!v) {pvA = 0; pvB = 0;} 1098 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1099 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1100 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101 nztot = nzA + nzB; 1102 1103 if (v || idx) { 1104 if (nztot) { 1105 /* Sort by increasing column numbers, assuming A and B already sorted */ 1106 int imark; 1107 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108 if (v) { 1109 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1110 for ( i=0; i<nzB; i++ ) { 1111 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112 else break; 1113 } 1114 imark = i; 1115 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117 } 1118 if (idx) { 1119 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121 for ( i=0; i<nzB; i++ ) { 1122 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123 else break; 1124 } 1125 imark = i; 1126 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1128 } 1129 } 1130 else {*idx = 0; *v=0;} 1131 } 1132 *nz = nztot; 1133 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 return 0; 1136 } 1137 1138 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1139 { 1140 if (idx) PetscFree(*idx); 1141 if (v) PetscFree(*v); 1142 return 0; 1143 } 1144 1145 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150 double sum = 0.0; 1151 Scalar *v; 1152 1153 if (aij->size == 1) { 1154 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1155 } else { 1156 if (type == NORM_FROBENIUS) { 1157 v = amat->a; 1158 for (i=0; i<amat->nz; i++ ) { 1159 #if defined(PETSC_COMPLEX) 1160 sum += real(conj(*v)*(*v)); v++; 1161 #else 1162 sum += (*v)*(*v); v++; 1163 #endif 1164 } 1165 v = bmat->a; 1166 for (i=0; i<bmat->nz; i++ ) { 1167 #if defined(PETSC_COMPLEX) 1168 sum += real(conj(*v)*(*v)); v++; 1169 #else 1170 sum += (*v)*(*v); v++; 1171 #endif 1172 } 1173 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1174 *norm = sqrt(*norm); 1175 } 1176 else if (type == NORM_1) { /* max column norm */ 1177 double *tmp, *tmp2; 1178 int *jj, *garray = aij->garray; 1179 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1180 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181 PetscMemzero(tmp,aij->N*sizeof(double)); 1182 *norm = 0.0; 1183 v = amat->a; jj = amat->j; 1184 for ( j=0; j<amat->nz; j++ ) { 1185 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1186 } 1187 v = bmat->a; jj = bmat->j; 1188 for ( j=0; j<bmat->nz; j++ ) { 1189 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1190 } 1191 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1192 for ( j=0; j<aij->N; j++ ) { 1193 if (tmp2[j] > *norm) *norm = tmp2[j]; 1194 } 1195 PetscFree(tmp); PetscFree(tmp2); 1196 } 1197 else if (type == NORM_INFINITY) { /* max row norm */ 1198 double ntemp = 0.0; 1199 for ( j=0; j<amat->m; j++ ) { 1200 v = amat->a + amat->i[j] + shift; 1201 sum = 0.0; 1202 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203 sum += PetscAbsScalar(*v); v++; 1204 } 1205 v = bmat->a + bmat->i[j] + shift; 1206 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207 sum += PetscAbsScalar(*v); v++; 1208 } 1209 if (sum > ntemp) ntemp = sum; 1210 } 1211 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1212 } 1213 else { 1214 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1215 } 1216 } 1217 return 0; 1218 } 1219 1220 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224 int ierr,shift = Aloc->indexshift; 1225 Mat B; 1226 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227 Scalar *array; 1228 1229 if (matout == PETSC_NULL && M != N) 1230 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1231 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1232 PETSC_NULL,&B); CHKERRQ(ierr); 1233 1234 /* copy over the A part */ 1235 Aloc = (Mat_SeqAIJ*) a->A->data; 1236 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1237 row = a->rstart; 1238 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1239 for ( i=0; i<m; i++ ) { 1240 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1241 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1242 } 1243 aj = Aloc->j; 1244 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1245 1246 /* copy over the B part */ 1247 Aloc = (Mat_SeqAIJ*) a->B->data; 1248 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1249 row = a->rstart; 1250 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1251 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1252 for ( i=0; i<m; i++ ) { 1253 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1254 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1255 } 1256 PetscFree(ct); 1257 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1258 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1259 if (matout != PETSC_NULL) { 1260 *matout = B; 1261 } else { 1262 /* This isn't really an in-place transpose .... but free data structures from a */ 1263 PetscFree(a->rowners); 1264 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1265 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1266 if (a->colmap) PetscFree(a->colmap); 1267 if (a->garray) PetscFree(a->garray); 1268 if (a->lvec) VecDestroy(a->lvec); 1269 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1270 PetscFree(a); 1271 PetscMemcpy(A,B,sizeof(struct _Mat)); 1272 PetscHeaderDestroy(B); 1273 } 1274 return 0; 1275 } 1276 1277 extern int MatPrintHelp_SeqAIJ(Mat); 1278 static int MatPrintHelp_MPIAIJ(Mat A) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1281 1282 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1283 else return 0; 1284 } 1285 1286 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1287 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1288 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1289 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1290 /* -------------------------------------------------------------------*/ 1291 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1292 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1293 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1294 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1295 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1296 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1297 MatLUFactor_MPIAIJ,0, 1298 MatRelax_MPIAIJ, 1299 MatTranspose_MPIAIJ, 1300 MatGetInfo_MPIAIJ,0, 1301 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1302 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1303 0, 1304 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1305 MatGetReordering_MPIAIJ, 1306 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1307 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1308 MatILUFactorSymbolic_MPIAIJ,0, 1309 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1310 0,0,0, 1311 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1312 MatPrintHelp_MPIAIJ, 1313 MatScale_MPIAIJ}; 1314 1315 /*@C 1316 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1317 (the default parallel PETSc format). For good matrix assembly performance 1318 the user should preallocate the matrix storage by setting the parameters 1319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1320 performance can be increased by more than a factor of 50. 1321 1322 Input Parameters: 1323 . comm - MPI communicator 1324 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1325 . n - number of local columns (or PETSC_DECIDE to have calculated 1326 if N is given) 1327 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1328 . N - number of global columns (or PETSC_DECIDE to have calculated 1329 if n is given) 1330 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1331 (same for all local rows) 1332 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1333 or null (possibly different for each row). You must leave room 1334 for the diagonal entry even if it is zero. 1335 . o_nz - number of nonzeros per row in off-diagonal portion of local 1336 submatrix (same for all local rows). 1337 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1338 submatrix or null (possibly different for each row). 1339 1340 Output Parameter: 1341 . newmat - the matrix 1342 1343 Notes: 1344 The AIJ format (also called the Yale sparse matrix format or 1345 compressed row storage), is fully compatible with standard Fortran 77 1346 storage. That is, the stored row and column indices can begin at 1347 either one (as in Fortran) or zero. See the users manual for details. 1348 1349 The user MUST specify either the local or global matrix dimensions 1350 (possibly both). 1351 1352 By default, this format uses inodes (identical nodes) when possible. 1353 We search for consecutive rows with the same nonzero structure, thereby 1354 reusing matrix information to achieve increased efficiency. 1355 1356 Options Database Keys: 1357 $ -mat_aij_no_inode - Do not use inodes 1358 $ -mat_aij_inode_limit <limit> - Set inode limit. 1359 $ (max limit=5) 1360 $ -mat_aij_oneindex - Internally use indexing starting at 1 1361 $ rather than 0. Note: When calling MatSetValues(), 1362 $ the user still MUST index entries starting at 0! 1363 1364 Storage Information: 1365 For a square global matrix we define each processor's diagonal portion 1366 to be its local rows and the corresponding columns (a square submatrix); 1367 each processor's off-diagonal portion encompasses the remainder of the 1368 local matrix (a rectangular submatrix). 1369 1370 The user can specify preallocated storage for the diagonal part of 1371 the local submatrix with either d_nz or d_nnz (not both). Set 1372 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1373 memory allocation. Likewise, specify preallocated storage for the 1374 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1375 1376 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1377 the figure below we depict these three local rows and all columns (0-11). 1378 1379 $ 0 1 2 3 4 5 6 7 8 9 10 11 1380 $ ------------------- 1381 $ row 3 | o o o d d d o o o o o o 1382 $ row 4 | o o o d d d o o o o o o 1383 $ row 5 | o o o d d d o o o o o o 1384 $ ------------------- 1385 $ 1386 1387 Thus, any entries in the d locations are stored in the d (diagonal) 1388 submatrix, and any entries in the o locations are stored in the 1389 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1390 stored simply in the MATSEQAIJ format for compressed row storage. 1391 1392 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1393 and o_nz should indicate the number of nonzeros per row in the o matrix. 1394 In general, for PDE problems in which most nonzeros are near the diagonal, 1395 one expects d_nz >> o_nz. For additional details, see the users manual 1396 chapter on matrices and the file $(PETSC_DIR)/Performance. 1397 1398 .keywords: matrix, aij, compressed row, sparse, parallel 1399 1400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1401 @*/ 1402 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1403 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1404 { 1405 Mat mat; 1406 Mat_MPIAIJ *a; 1407 int ierr, i,sum[2],work[2]; 1408 1409 *newmat = 0; 1410 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1411 PLogObjectCreate(mat); 1412 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1413 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1414 mat->destroy = MatDestroy_MPIAIJ; 1415 mat->view = MatView_MPIAIJ; 1416 mat->factor = 0; 1417 mat->assembled = PETSC_FALSE; 1418 1419 a->insertmode = NOT_SET_VALUES; 1420 MPI_Comm_rank(comm,&a->rank); 1421 MPI_Comm_size(comm,&a->size); 1422 1423 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1424 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1425 1426 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1427 work[0] = m; work[1] = n; 1428 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1429 if (M == PETSC_DECIDE) M = sum[0]; 1430 if (N == PETSC_DECIDE) N = sum[1]; 1431 } 1432 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1433 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1434 a->m = m; 1435 a->n = n; 1436 a->N = N; 1437 a->M = M; 1438 1439 /* build local table of row and column ownerships */ 1440 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1441 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1442 a->cowners = a->rowners + a->size + 1; 1443 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1444 a->rowners[0] = 0; 1445 for ( i=2; i<=a->size; i++ ) { 1446 a->rowners[i] += a->rowners[i-1]; 1447 } 1448 a->rstart = a->rowners[a->rank]; 1449 a->rend = a->rowners[a->rank+1]; 1450 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1451 a->cowners[0] = 0; 1452 for ( i=2; i<=a->size; i++ ) { 1453 a->cowners[i] += a->cowners[i-1]; 1454 } 1455 a->cstart = a->cowners[a->rank]; 1456 a->cend = a->cowners[a->rank+1]; 1457 1458 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1459 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1460 PLogObjectParent(mat,a->A); 1461 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1462 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1463 PLogObjectParent(mat,a->B); 1464 1465 /* build cache for off array entries formed */ 1466 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1467 a->colmap = 0; 1468 a->garray = 0; 1469 a->roworiented = 1; 1470 1471 /* stuff used for matrix vector multiply */ 1472 a->lvec = 0; 1473 a->Mvctx = 0; 1474 1475 *newmat = mat; 1476 return 0; 1477 } 1478 1479 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1480 { 1481 Mat mat; 1482 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1483 int ierr, len,flg; 1484 1485 *newmat = 0; 1486 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1487 PLogObjectCreate(mat); 1488 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1489 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1490 mat->destroy = MatDestroy_MPIAIJ; 1491 mat->view = MatView_MPIAIJ; 1492 mat->factor = matin->factor; 1493 mat->assembled = PETSC_TRUE; 1494 1495 a->m = oldmat->m; 1496 a->n = oldmat->n; 1497 a->M = oldmat->M; 1498 a->N = oldmat->N; 1499 1500 a->rstart = oldmat->rstart; 1501 a->rend = oldmat->rend; 1502 a->cstart = oldmat->cstart; 1503 a->cend = oldmat->cend; 1504 a->size = oldmat->size; 1505 a->rank = oldmat->rank; 1506 a->insertmode = NOT_SET_VALUES; 1507 1508 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1509 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1510 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1511 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1512 if (oldmat->colmap) { 1513 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1514 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1515 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1516 } else a->colmap = 0; 1517 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1518 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1519 PLogObjectMemory(mat,len*sizeof(int)); 1520 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1521 } else a->garray = 0; 1522 1523 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1524 PLogObjectParent(mat,a->lvec); 1525 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1526 PLogObjectParent(mat,a->Mvctx); 1527 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1528 PLogObjectParent(mat,a->A); 1529 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1530 PLogObjectParent(mat,a->B); 1531 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1532 if (flg) { 1533 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1534 } 1535 *newmat = mat; 1536 return 0; 1537 } 1538 1539 #include "sysio.h" 1540 1541 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1542 { 1543 Mat A; 1544 int i, nz, ierr, j,rstart, rend, fd; 1545 Scalar *vals,*svals; 1546 PetscObject vobj = (PetscObject) bview; 1547 MPI_Comm comm = vobj->comm; 1548 MPI_Status status; 1549 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1550 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1551 int tag = ((PetscObject)bview)->tag; 1552 1553 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1554 if (!rank) { 1555 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1556 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1557 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1558 } 1559 1560 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1561 M = header[1]; N = header[2]; 1562 /* determine ownership of all rows */ 1563 m = M/size + ((M % size) > rank); 1564 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1565 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1566 rowners[0] = 0; 1567 for ( i=2; i<=size; i++ ) { 1568 rowners[i] += rowners[i-1]; 1569 } 1570 rstart = rowners[rank]; 1571 rend = rowners[rank+1]; 1572 1573 /* distribute row lengths to all processors */ 1574 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1575 offlens = ourlens + (rend-rstart); 1576 if (!rank) { 1577 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1578 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1579 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1580 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1581 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1582 PetscFree(sndcounts); 1583 } 1584 else { 1585 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1586 } 1587 1588 if (!rank) { 1589 /* calculate the number of nonzeros on each processor */ 1590 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1591 PetscMemzero(procsnz,size*sizeof(int)); 1592 for ( i=0; i<size; i++ ) { 1593 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1594 procsnz[i] += rowlengths[j]; 1595 } 1596 } 1597 PetscFree(rowlengths); 1598 1599 /* determine max buffer needed and allocate it */ 1600 maxnz = 0; 1601 for ( i=0; i<size; i++ ) { 1602 maxnz = PetscMax(maxnz,procsnz[i]); 1603 } 1604 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1605 1606 /* read in my part of the matrix column indices */ 1607 nz = procsnz[0]; 1608 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1609 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1610 1611 /* read in every one elses and ship off */ 1612 for ( i=1; i<size; i++ ) { 1613 nz = procsnz[i]; 1614 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1615 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1616 } 1617 PetscFree(cols); 1618 } 1619 else { 1620 /* determine buffer space needed for message */ 1621 nz = 0; 1622 for ( i=0; i<m; i++ ) { 1623 nz += ourlens[i]; 1624 } 1625 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1626 1627 /* receive message of column indices*/ 1628 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1629 MPI_Get_count(&status,MPI_INT,&maxnz); 1630 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1631 } 1632 1633 /* loop over local rows, determining number of off diagonal entries */ 1634 PetscMemzero(offlens,m*sizeof(int)); 1635 jj = 0; 1636 for ( i=0; i<m; i++ ) { 1637 for ( j=0; j<ourlens[i]; j++ ) { 1638 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1639 jj++; 1640 } 1641 } 1642 1643 /* create our matrix */ 1644 for ( i=0; i<m; i++ ) { 1645 ourlens[i] -= offlens[i]; 1646 } 1647 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1648 A = *newmat; 1649 MatSetOption(A,COLUMNS_SORTED); 1650 for ( i=0; i<m; i++ ) { 1651 ourlens[i] += offlens[i]; 1652 } 1653 1654 if (!rank) { 1655 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1656 1657 /* read in my part of the matrix numerical values */ 1658 nz = procsnz[0]; 1659 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1660 1661 /* insert into matrix */ 1662 jj = rstart; 1663 smycols = mycols; 1664 svals = vals; 1665 for ( i=0; i<m; i++ ) { 1666 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1667 smycols += ourlens[i]; 1668 svals += ourlens[i]; 1669 jj++; 1670 } 1671 1672 /* read in other processors and ship out */ 1673 for ( i=1; i<size; i++ ) { 1674 nz = procsnz[i]; 1675 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1676 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1677 } 1678 PetscFree(procsnz); 1679 } 1680 else { 1681 /* receive numeric values */ 1682 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1683 1684 /* receive message of values*/ 1685 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1686 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1687 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1688 1689 /* insert into matrix */ 1690 jj = rstart; 1691 smycols = mycols; 1692 svals = vals; 1693 for ( i=0; i<m; i++ ) { 1694 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1695 smycols += ourlens[i]; 1696 svals += ourlens[i]; 1697 jj++; 1698 } 1699 } 1700 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1701 1702 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1703 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1704 return 0; 1705 } 1706