1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.126 1996/02/09 15:10:11 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 return 0; 296 } 297 298 static int MatZeroEntries_MPIAIJ(Mat A) 299 { 300 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301 int ierr; 302 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 303 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 304 return 0; 305 } 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 314 { 315 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 319 int *rvalues,tag = A->tag,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 327 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 333 for ( i=0; i<N; i++ ) { 334 idx = rows[i]; 335 found = 0; 336 for ( j=0; j<size; j++ ) { 337 if (idx >= owners[j] && idx < owners[j+1]) { 338 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 339 } 340 } 341 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 342 } 343 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 344 345 /* inform other processors of number of messages and max length*/ 346 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 347 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 348 nrecvs = work[rank]; 349 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 350 nmax = work[rank]; 351 PetscFree(work); 352 353 /* post receives: */ 354 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nrecvs; i++ ) { 359 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 368 CHKPTRQ(send_waits); 369 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 370 starts[0] = 0; 371 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<N; i++ ) { 373 svalues[starts[owner[i]]++] = rows[i]; 374 } 375 ISRestoreIndices(is,&rows); 376 377 starts[0] = 0; 378 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); 386 387 base = owners[rank]; 388 389 /* wait on receives */ 390 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 391 source = lens + nrecvs; 392 count = nrecvs; slen = 0; 393 while (count) { 394 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 395 /* unpack receives into our local space */ 396 MPI_Get_count(&recv_status,MPI_INT,&n); 397 source[imdex] = recv_status.MPI_SOURCE; 398 lens[imdex] = n; 399 slen += n; 400 count--; 401 } 402 PetscFree(recv_waits); 403 404 /* move the data into the send scatter */ 405 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 406 count = 0; 407 for ( i=0; i<nrecvs; i++ ) { 408 values = rvalues + i*nmax; 409 for ( j=0; j<lens[i]; j++ ) { 410 lrows[count++] = values[j] - base; 411 } 412 } 413 PetscFree(rvalues); PetscFree(lens); 414 PetscFree(owner); PetscFree(nprocs); 415 416 /* actually zap the local rows */ 417 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418 PLogObjectParent(A,istmp); 419 PetscFree(lrows); 420 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 421 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 422 ierr = ISDestroy(istmp); CHKERRQ(ierr); 423 424 /* wait on sends */ 425 if (nsends) { 426 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 427 CHKPTRQ(send_status); 428 MPI_Waitall(nsends,send_waits,send_status); 429 PetscFree(send_status); 430 } 431 PetscFree(send_waits); PetscFree(svalues); 432 433 return 0; 434 } 435 436 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 437 { 438 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 439 int ierr; 440 441 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 443 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 445 return 0; 446 } 447 448 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451 int ierr; 452 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 454 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460 { 461 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462 int ierr; 463 464 /* do nondiagonal part */ 465 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466 /* send it on its way */ 467 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 468 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469 /* do local part */ 470 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471 /* receive remote parts: note this assumes the values are not actually */ 472 /* inserted in yy until the next line, which is true for my implementation*/ 473 /* but is not perhaps always true. */ 474 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 475 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476 return 0; 477 } 478 479 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482 int ierr; 483 484 /* do nondiagonal part */ 485 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486 /* send it on its way */ 487 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 488 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489 /* do local part */ 490 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491 /* receive remote parts: note this assumes the values are not actually */ 492 /* inserted in yy until the next line, which is true for my implementation*/ 493 /* but is not perhaps always true. */ 494 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 495 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496 return 0; 497 } 498 499 /* 500 This only works correctly for square matrices where the subblock A->A is the 501 diagonal block 502 */ 503 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 504 { 505 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506 return MatGetDiagonal(a->A,v); 507 } 508 509 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515 return 0; 516 } 517 518 static int MatDestroy_MPIAIJ(PetscObject obj) 519 { 520 Mat mat = (Mat) obj; 521 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 522 int ierr; 523 #if defined(PETSC_LOG) 524 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525 #endif 526 PetscFree(aij->rowners); 527 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 528 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 529 if (aij->colmap) PetscFree(aij->colmap); 530 if (aij->garray) PetscFree(aij->garray); 531 if (aij->lvec) VecDestroy(aij->lvec); 532 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 533 PetscFree(aij); 534 PLogObjectDestroy(mat); 535 PetscHeaderDestroy(mat); 536 return 0; 537 } 538 #include "draw.h" 539 #include "pinclude/pviewer.h" 540 541 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 int ierr; 545 546 if (aij->size == 1) { 547 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548 } 549 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550 return 0; 551 } 552 553 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554 { 555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557 int ierr, format,shift = C->indexshift,rank; 558 PetscObject vobj = (PetscObject) viewer; 559 FILE *fd; 560 561 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562 ierr = ViewerFileGetFormat_Private(viewer,&format); 563 if (format == FILE_FORMAT_INFO_DETAILED) { 564 int nz, nzalloc, mem, flg; 565 MPI_Comm_rank(mat->comm,&rank); 566 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 569 MPIU_Seq_begin(mat->comm,1); 570 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 571 rank,aij->m,nz,nzalloc,mem); 572 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 573 rank,aij->m,nz,nzalloc,mem); 574 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 575 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 576 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 577 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 578 fflush(fd); 579 MPIU_Seq_end(mat->comm,1); 580 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 581 return 0; 582 } 583 else if (format == FILE_FORMAT_INFO) { 584 return 0; 585 } 586 } 587 588 if (vobj->type == ASCII_FILE_VIEWER) { 589 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 590 MPIU_Seq_begin(mat->comm,1); 591 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 592 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 593 aij->cend); 594 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 595 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 596 fflush(fd); 597 MPIU_Seq_end(mat->comm,1); 598 } 599 else { 600 int size = aij->size; 601 rank = aij->rank; 602 if (size == 1) { 603 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 604 } 605 else { 606 /* assemble the entire matrix onto first processor. */ 607 Mat A; 608 Mat_SeqAIJ *Aloc; 609 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 610 Scalar *a; 611 612 if (!rank) { 613 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 614 CHKERRQ(ierr); 615 } 616 else { 617 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 618 CHKERRQ(ierr); 619 } 620 PLogObjectParent(mat,A); 621 622 /* copy over the A part */ 623 Aloc = (Mat_SeqAIJ*) aij->A->data; 624 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 625 row = aij->rstart; 626 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 627 for ( i=0; i<m; i++ ) { 628 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 629 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 630 } 631 aj = Aloc->j; 632 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 633 634 /* copy over the B part */ 635 Aloc = (Mat_SeqAIJ*) aij->B->data; 636 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 637 row = aij->rstart; 638 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 639 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 640 for ( i=0; i<m; i++ ) { 641 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 642 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 643 } 644 PetscFree(ct); 645 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 646 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 647 if (!rank) { 648 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 649 } 650 ierr = MatDestroy(A); CHKERRQ(ierr); 651 } 652 } 653 return 0; 654 } 655 656 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 657 { 658 Mat mat = (Mat) obj; 659 int ierr; 660 PetscObject vobj = (PetscObject) viewer; 661 662 if (!viewer) { 663 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 664 } 665 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->cookie == DRAW_COOKIE) { 676 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 677 } 678 else if (vobj->type == BINARY_FILE_VIEWER) { 679 return MatView_MPIAIJ_Binary(mat,viewer); 680 } 681 return 0; 682 } 683 684 extern int MatMarkDiag_SeqAIJ(Mat); 685 /* 686 This has to provide several versions. 687 688 1) per sequential 689 2) a) use only local smoothing updating outer values only once. 690 b) local smoothing updating outer values each inner iteration 691 3) color updating out values betwen colors. 692 */ 693 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 694 double fshift,int its,Vec xx) 695 { 696 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 697 Mat AA = mat->A, BB = mat->B; 698 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 699 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 700 int ierr,*idx, *diag; 701 int n = mat->n, m = mat->m, i,shift = A->indexshift; 702 Vec tt; 703 704 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 705 xs = x + shift; /* shift by one for index start of 1 */ 706 ls = ls + shift; 707 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 708 diag = A->diag; 709 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 710 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 711 } 712 if (flag & SOR_EISENSTAT) { 713 /* Let A = L + U + D; where L is lower trianglar, 714 U is upper triangular, E is diagonal; This routine applies 715 716 (L + E)^{-1} A (U + E)^{-1} 717 718 to a vector efficiently using Eisenstat's trick. This is for 719 the case of SSOR preconditioner, so E is D/omega where omega 720 is the relaxation factor. 721 */ 722 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 723 PLogObjectParent(matin,tt); 724 VecGetArray(tt,&t); 725 scale = (2.0/omega) - 1.0; 726 /* x = (E + U)^{-1} b */ 727 VecSet(&zero,mat->lvec); 728 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 729 mat->Mvctx); CHKERRQ(ierr); 730 for ( i=m-1; i>-1; i-- ) { 731 n = A->i[i+1] - diag[i] - 1; 732 idx = A->j + diag[i] + !shift; 733 v = A->a + diag[i] + !shift; 734 sum = b[i]; 735 SPARSEDENSEMDOT(sum,xs,v,idx,n); 736 d = fshift + A->a[diag[i]+shift]; 737 n = B->i[i+1] - B->i[i]; 738 idx = B->j + B->i[i] + shift; 739 v = B->a + B->i[i] + shift; 740 SPARSEDENSEMDOT(sum,ls,v,idx,n); 741 x[i] = omega*(sum/d); 742 } 743 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 744 mat->Mvctx); CHKERRQ(ierr); 745 746 /* t = b - (2*E - D)x */ 747 v = A->a; 748 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 749 750 /* t = (E + L)^{-1}t */ 751 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 752 diag = A->diag; 753 VecSet(&zero,mat->lvec); 754 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 755 mat->Mvctx); CHKERRQ(ierr); 756 for ( i=0; i<m; i++ ) { 757 n = diag[i] - A->i[i]; 758 idx = A->j + A->i[i] + shift; 759 v = A->a + A->i[i] + shift; 760 sum = t[i]; 761 SPARSEDENSEMDOT(sum,ts,v,idx,n); 762 d = fshift + A->a[diag[i]+shift]; 763 n = B->i[i+1] - B->i[i]; 764 idx = B->j + B->i[i] + shift; 765 v = B->a + B->i[i] + shift; 766 SPARSEDENSEMDOT(sum,ls,v,idx,n); 767 t[i] = omega*(sum/d); 768 } 769 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 770 mat->Mvctx); CHKERRQ(ierr); 771 /* x = x + t */ 772 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 773 VecDestroy(tt); 774 return 0; 775 } 776 777 778 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 779 if (flag & SOR_ZERO_INITIAL_GUESS) { 780 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 781 } 782 else { 783 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 784 CHKERRQ(ierr); 785 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 } 788 while (its--) { 789 /* go down through the rows */ 790 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 791 mat->Mvctx); CHKERRQ(ierr); 792 for ( i=0; i<m; i++ ) { 793 n = A->i[i+1] - A->i[i]; 794 idx = A->j + A->i[i] + shift; 795 v = A->a + A->i[i] + shift; 796 sum = b[i]; 797 SPARSEDENSEMDOT(sum,xs,v,idx,n); 798 d = fshift + A->a[diag[i]+shift]; 799 n = B->i[i+1] - B->i[i]; 800 idx = B->j + B->i[i] + shift; 801 v = B->a + B->i[i] + shift; 802 SPARSEDENSEMDOT(sum,ls,v,idx,n); 803 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 804 } 805 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 806 mat->Mvctx); CHKERRQ(ierr); 807 /* come up through the rows */ 808 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 809 mat->Mvctx); CHKERRQ(ierr); 810 for ( i=m-1; i>-1; i-- ) { 811 n = A->i[i+1] - A->i[i]; 812 idx = A->j + A->i[i] + shift; 813 v = A->a + A->i[i] + shift; 814 sum = b[i]; 815 SPARSEDENSEMDOT(sum,xs,v,idx,n); 816 d = fshift + A->a[diag[i]+shift]; 817 n = B->i[i+1] - B->i[i]; 818 idx = B->j + B->i[i] + shift; 819 v = B->a + B->i[i] + shift; 820 SPARSEDENSEMDOT(sum,ls,v,idx,n); 821 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 822 } 823 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 824 mat->Mvctx); CHKERRQ(ierr); 825 } 826 } 827 else if (flag & SOR_FORWARD_SWEEP){ 828 if (flag & SOR_ZERO_INITIAL_GUESS) { 829 VecSet(&zero,mat->lvec); 830 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 831 mat->Mvctx); CHKERRQ(ierr); 832 for ( i=0; i<m; i++ ) { 833 n = diag[i] - A->i[i]; 834 idx = A->j + A->i[i] + shift; 835 v = A->a + A->i[i] + shift; 836 sum = b[i]; 837 SPARSEDENSEMDOT(sum,xs,v,idx,n); 838 d = fshift + A->a[diag[i]+shift]; 839 n = B->i[i+1] - B->i[i]; 840 idx = B->j + B->i[i] + shift; 841 v = B->a + B->i[i] + shift; 842 SPARSEDENSEMDOT(sum,ls,v,idx,n); 843 x[i] = omega*(sum/d); 844 } 845 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 846 mat->Mvctx); CHKERRQ(ierr); 847 its--; 848 } 849 while (its--) { 850 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 851 CHKERRQ(ierr); 852 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 855 mat->Mvctx); CHKERRQ(ierr); 856 for ( i=0; i<m; i++ ) { 857 n = A->i[i+1] - A->i[i]; 858 idx = A->j + A->i[i] + shift; 859 v = A->a + A->i[i] + shift; 860 sum = b[i]; 861 SPARSEDENSEMDOT(sum,xs,v,idx,n); 862 d = fshift + A->a[diag[i]+shift]; 863 n = B->i[i+1] - B->i[i]; 864 idx = B->j + B->i[i] + shift; 865 v = B->a + B->i[i] + shift; 866 SPARSEDENSEMDOT(sum,ls,v,idx,n); 867 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 868 } 869 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 870 mat->Mvctx); CHKERRQ(ierr); 871 } 872 } 873 else if (flag & SOR_BACKWARD_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 VecSet(&zero,mat->lvec); 876 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 877 mat->Mvctx); CHKERRQ(ierr); 878 for ( i=m-1; i>-1; i-- ) { 879 n = A->i[i+1] - diag[i] - 1; 880 idx = A->j + diag[i] + !shift; 881 v = A->a + diag[i] + !shift; 882 sum = b[i]; 883 SPARSEDENSEMDOT(sum,xs,v,idx,n); 884 d = fshift + A->a[diag[i]+shift]; 885 n = B->i[i+1] - B->i[i]; 886 idx = B->j + B->i[i] + shift; 887 v = B->a + B->i[i] + shift; 888 SPARSEDENSEMDOT(sum,ls,v,idx,n); 889 x[i] = omega*(sum/d); 890 } 891 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 892 mat->Mvctx); CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 897 mat->Mvctx); CHKERRQ(ierr); 898 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 901 mat->Mvctx); CHKERRQ(ierr); 902 for ( i=m-1; i>-1; i-- ) { 903 n = A->i[i+1] - A->i[i]; 904 idx = A->j + A->i[i] + shift; 905 v = A->a + A->i[i] + shift; 906 sum = b[i]; 907 SPARSEDENSEMDOT(sum,xs,v,idx,n); 908 d = fshift + A->a[diag[i]+shift]; 909 n = B->i[i+1] - B->i[i]; 910 idx = B->j + B->i[i] + shift; 911 v = B->a + B->i[i] + shift; 912 SPARSEDENSEMDOT(sum,ls,v,idx,n); 913 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 914 } 915 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 916 mat->Mvctx); CHKERRQ(ierr); 917 } 918 } 919 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 920 if (flag & SOR_ZERO_INITIAL_GUESS) { 921 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 922 } 923 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 924 CHKERRQ(ierr); 925 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 while (its--) { 928 /* go down through the rows */ 929 for ( i=0; i<m; i++ ) { 930 n = A->i[i+1] - A->i[i]; 931 idx = A->j + A->i[i] + shift; 932 v = A->a + A->i[i] + shift; 933 sum = b[i]; 934 SPARSEDENSEMDOT(sum,xs,v,idx,n); 935 d = fshift + A->a[diag[i]+shift]; 936 n = B->i[i+1] - B->i[i]; 937 idx = B->j + B->i[i] + shift; 938 v = B->a + B->i[i] + shift; 939 SPARSEDENSEMDOT(sum,ls,v,idx,n); 940 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 941 } 942 /* come up through the rows */ 943 for ( i=m-1; i>-1; i-- ) { 944 n = A->i[i+1] - A->i[i]; 945 idx = A->j + A->i[i] + shift; 946 v = A->a + A->i[i] + shift; 947 sum = b[i]; 948 SPARSEDENSEMDOT(sum,xs,v,idx,n); 949 d = fshift + A->a[diag[i]+shift]; 950 n = B->i[i+1] - B->i[i]; 951 idx = B->j + B->i[i] + shift; 952 v = B->a + B->i[i] + shift; 953 SPARSEDENSEMDOT(sum,ls,v,idx,n); 954 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 955 } 956 } 957 } 958 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 959 if (flag & SOR_ZERO_INITIAL_GUESS) { 960 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 961 } 962 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 963 CHKERRQ(ierr); 964 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 while (its--) { 967 for ( i=0; i<m; i++ ) { 968 n = A->i[i+1] - A->i[i]; 969 idx = A->j + A->i[i] + shift; 970 v = A->a + A->i[i] + shift; 971 sum = b[i]; 972 SPARSEDENSEMDOT(sum,xs,v,idx,n); 973 d = fshift + A->a[diag[i]+shift]; 974 n = B->i[i+1] - B->i[i]; 975 idx = B->j + B->i[i] + shift; 976 v = B->a + B->i[i] + shift; 977 SPARSEDENSEMDOT(sum,ls,v,idx,n); 978 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 979 } 980 } 981 } 982 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 983 if (flag & SOR_ZERO_INITIAL_GUESS) { 984 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 985 } 986 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 987 mat->Mvctx); CHKERRQ(ierr); 988 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 while (its--) { 991 for ( i=m-1; i>-1; i-- ) { 992 n = A->i[i+1] - A->i[i]; 993 idx = A->j + A->i[i] + shift; 994 v = A->a + A->i[i] + shift; 995 sum = b[i]; 996 SPARSEDENSEMDOT(sum,xs,v,idx,n); 997 d = fshift + A->a[diag[i]+shift]; 998 n = B->i[i+1] - B->i[i]; 999 idx = B->j + B->i[i] + shift; 1000 v = B->a + B->i[i] + shift; 1001 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1002 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1003 } 1004 } 1005 } 1006 return 0; 1007 } 1008 1009 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1010 int *nzalloc,int *mem) 1011 { 1012 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1013 Mat A = mat->A, B = mat->B; 1014 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1015 1016 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1017 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1018 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1019 if (flag == MAT_LOCAL) { 1020 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1021 } else if (flag == MAT_GLOBAL_MAX) { 1022 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1023 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024 } else if (flag == MAT_GLOBAL_SUM) { 1025 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1026 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1027 } 1028 return 0; 1029 } 1030 1031 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1032 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1033 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1034 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1035 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1036 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1037 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1038 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1039 1040 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1041 { 1042 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1043 1044 if (op == NO_NEW_NONZERO_LOCATIONS || 1045 op == YES_NEW_NONZERO_LOCATIONS || 1046 op == COLUMNS_SORTED || 1047 op == ROW_ORIENTED) { 1048 MatSetOption(a->A,op); 1049 MatSetOption(a->B,op); 1050 } 1051 else if (op == ROWS_SORTED || 1052 op == SYMMETRIC_MATRIX || 1053 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1054 op == YES_NEW_DIAGONALS) 1055 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1056 else if (op == COLUMN_ORIENTED) { 1057 a->roworiented = 0; 1058 MatSetOption(a->A,op); 1059 MatSetOption(a->B,op); 1060 } 1061 else if (op == NO_NEW_DIAGONALS) 1062 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1063 else 1064 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1065 return 0; 1066 } 1067 1068 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1069 { 1070 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1071 *m = mat->M; *n = mat->N; 1072 return 0; 1073 } 1074 1075 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1076 { 1077 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1078 *m = mat->m; *n = mat->N; 1079 return 0; 1080 } 1081 1082 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1083 { 1084 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1085 *m = mat->rstart; *n = mat->rend; 1086 return 0; 1087 } 1088 1089 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 Scalar *vworkA, *vworkB, **pvA, **pvB; 1093 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1094 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1095 1096 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1097 lrow = row - rstart; 1098 1099 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1100 if (!v) {pvA = 0; pvB = 0;} 1101 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1102 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1103 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1104 nztot = nzA + nzB; 1105 1106 if (v || idx) { 1107 if (nztot) { 1108 /* Sort by increasing column numbers, assuming A and B already sorted */ 1109 int imark; 1110 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1111 if (v) { 1112 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1113 for ( i=0; i<nzB; i++ ) { 1114 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1115 else break; 1116 } 1117 imark = i; 1118 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1119 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1120 } 1121 if (idx) { 1122 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1123 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1124 for ( i=0; i<nzB; i++ ) { 1125 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1126 else break; 1127 } 1128 imark = i; 1129 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1130 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1131 } 1132 } 1133 else {*idx = 0; *v=0;} 1134 } 1135 *nz = nztot; 1136 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1137 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1138 return 0; 1139 } 1140 1141 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1142 { 1143 if (idx) PetscFree(*idx); 1144 if (v) PetscFree(*v); 1145 return 0; 1146 } 1147 1148 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1149 { 1150 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1151 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1152 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1153 double sum = 0.0; 1154 Scalar *v; 1155 1156 if (aij->size == 1) { 1157 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1158 } else { 1159 if (type == NORM_FROBENIUS) { 1160 v = amat->a; 1161 for (i=0; i<amat->nz; i++ ) { 1162 #if defined(PETSC_COMPLEX) 1163 sum += real(conj(*v)*(*v)); v++; 1164 #else 1165 sum += (*v)*(*v); v++; 1166 #endif 1167 } 1168 v = bmat->a; 1169 for (i=0; i<bmat->nz; i++ ) { 1170 #if defined(PETSC_COMPLEX) 1171 sum += real(conj(*v)*(*v)); v++; 1172 #else 1173 sum += (*v)*(*v); v++; 1174 #endif 1175 } 1176 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1177 *norm = sqrt(*norm); 1178 } 1179 else if (type == NORM_1) { /* max column norm */ 1180 double *tmp, *tmp2; 1181 int *jj, *garray = aij->garray; 1182 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1183 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1184 PetscMemzero(tmp,aij->N*sizeof(double)); 1185 *norm = 0.0; 1186 v = amat->a; jj = amat->j; 1187 for ( j=0; j<amat->nz; j++ ) { 1188 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1189 } 1190 v = bmat->a; jj = bmat->j; 1191 for ( j=0; j<bmat->nz; j++ ) { 1192 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1193 } 1194 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1195 for ( j=0; j<aij->N; j++ ) { 1196 if (tmp2[j] > *norm) *norm = tmp2[j]; 1197 } 1198 PetscFree(tmp); PetscFree(tmp2); 1199 } 1200 else if (type == NORM_INFINITY) { /* max row norm */ 1201 double ntemp = 0.0; 1202 for ( j=0; j<amat->m; j++ ) { 1203 v = amat->a + amat->i[j] + shift; 1204 sum = 0.0; 1205 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1206 sum += PetscAbsScalar(*v); v++; 1207 } 1208 v = bmat->a + bmat->i[j] + shift; 1209 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1210 sum += PetscAbsScalar(*v); v++; 1211 } 1212 if (sum > ntemp) ntemp = sum; 1213 } 1214 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1215 } 1216 else { 1217 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1218 } 1219 } 1220 return 0; 1221 } 1222 1223 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1226 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1227 int ierr,shift = Aloc->indexshift; 1228 Mat B; 1229 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1230 Scalar *array; 1231 1232 if (matout == PETSC_NULL && M != N) 1233 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1234 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1235 PETSC_NULL,&B); CHKERRQ(ierr); 1236 1237 /* copy over the A part */ 1238 Aloc = (Mat_SeqAIJ*) a->A->data; 1239 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1240 row = a->rstart; 1241 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1242 for ( i=0; i<m; i++ ) { 1243 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1244 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1245 } 1246 aj = Aloc->j; 1247 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1248 1249 /* copy over the B part */ 1250 Aloc = (Mat_SeqAIJ*) a->B->data; 1251 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1252 row = a->rstart; 1253 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1254 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1255 for ( i=0; i<m; i++ ) { 1256 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1257 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1258 } 1259 PetscFree(ct); 1260 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1261 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1262 if (matout != PETSC_NULL) { 1263 *matout = B; 1264 } else { 1265 /* This isn't really an in-place transpose .... but free data structures from a */ 1266 PetscFree(a->rowners); 1267 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1268 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1269 if (a->colmap) PetscFree(a->colmap); 1270 if (a->garray) PetscFree(a->garray); 1271 if (a->lvec) VecDestroy(a->lvec); 1272 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1273 PetscFree(a); 1274 PetscMemcpy(A,B,sizeof(struct _Mat)); 1275 PetscHeaderDestroy(B); 1276 } 1277 return 0; 1278 } 1279 1280 extern int MatPrintHelp_SeqAIJ(Mat); 1281 static int MatPrintHelp_MPIAIJ(Mat A) 1282 { 1283 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1284 1285 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1286 else return 0; 1287 } 1288 1289 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1290 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1291 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1292 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1293 /* -------------------------------------------------------------------*/ 1294 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1295 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1296 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1297 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1298 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1299 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1300 MatLUFactor_MPIAIJ,0, 1301 MatRelax_MPIAIJ, 1302 MatTranspose_MPIAIJ, 1303 MatGetInfo_MPIAIJ,0, 1304 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1305 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1306 0, 1307 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1308 MatGetReordering_MPIAIJ, 1309 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1310 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1311 MatILUFactorSymbolic_MPIAIJ,0, 1312 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1313 0,0,0, 1314 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1315 MatPrintHelp_MPIAIJ, 1316 MatScale_MPIAIJ}; 1317 1318 /*@C 1319 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1320 (the default parallel PETSc format). For good matrix assembly performance 1321 the user should preallocate the matrix storage by setting the parameters 1322 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1323 performance can be increased by more than a factor of 50. 1324 1325 Input Parameters: 1326 . comm - MPI communicator 1327 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1328 . n - number of local columns (or PETSC_DECIDE to have calculated 1329 if N is given) 1330 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1331 . N - number of global columns (or PETSC_DECIDE to have calculated 1332 if n is given) 1333 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1334 (same for all local rows) 1335 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1336 or null (possibly different for each row). You must leave room 1337 for the diagonal entry even if it is zero. 1338 . o_nz - number of nonzeros per row in off-diagonal portion of local 1339 submatrix (same for all local rows). 1340 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1341 submatrix or null (possibly different for each row). 1342 1343 Output Parameter: 1344 . newmat - the matrix 1345 1346 Notes: 1347 The AIJ format (also called the Yale sparse matrix format or 1348 compressed row storage), is fully compatible with standard Fortran 77 1349 storage. That is, the stored row and column indices can begin at 1350 either one (as in Fortran) or zero. See the users manual for details. 1351 1352 The user MUST specify either the local or global matrix dimensions 1353 (possibly both). 1354 1355 By default, this format uses inodes (identical nodes) when possible. 1356 We search for consecutive rows with the same nonzero structure, thereby 1357 reusing matrix information to achieve increased efficiency. 1358 1359 Options Database Keys: 1360 $ -mat_aij_no_inode - Do not use inodes 1361 $ -mat_aij_inode_limit <limit> - Set inode limit. 1362 $ (max limit=5) 1363 $ -mat_aij_oneindex - Internally use indexing starting at 1 1364 $ rather than 0. Note: When calling MatSetValues(), 1365 $ the user still MUST index entries starting at 0! 1366 1367 Storage Information: 1368 For a square global matrix we define each processor's diagonal portion 1369 to be its local rows and the corresponding columns (a square submatrix); 1370 each processor's off-diagonal portion encompasses the remainder of the 1371 local matrix (a rectangular submatrix). 1372 1373 The user can specify preallocated storage for the diagonal part of 1374 the local submatrix with either d_nz or d_nnz (not both). Set 1375 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1376 memory allocation. Likewise, specify preallocated storage for the 1377 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1378 1379 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1380 the figure below we depict these three local rows and all columns (0-11). 1381 1382 $ 0 1 2 3 4 5 6 7 8 9 10 11 1383 $ ------------------- 1384 $ row 3 | o o o d d d o o o o o o 1385 $ row 4 | o o o d d d o o o o o o 1386 $ row 5 | o o o d d d o o o o o o 1387 $ ------------------- 1388 $ 1389 1390 Thus, any entries in the d locations are stored in the d (diagonal) 1391 submatrix, and any entries in the o locations are stored in the 1392 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1393 stored simply in the MATSEQAIJ format for compressed row storage. 1394 1395 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1396 and o_nz should indicate the number of nonzeros per row in the o matrix. 1397 In general, for PDE problems in which most nonzeros are near the diagonal, 1398 one expects d_nz >> o_nz. For additional details, see the users manual 1399 chapter on matrices and the file $(PETSC_DIR)/Performance. 1400 1401 .keywords: matrix, aij, compressed row, sparse, parallel 1402 1403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1404 @*/ 1405 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1406 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1407 { 1408 Mat mat; 1409 Mat_MPIAIJ *a; 1410 int ierr, i,sum[2],work[2]; 1411 1412 *newmat = 0; 1413 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1414 PLogObjectCreate(mat); 1415 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1416 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1417 mat->destroy = MatDestroy_MPIAIJ; 1418 mat->view = MatView_MPIAIJ; 1419 mat->factor = 0; 1420 mat->assembled = PETSC_FALSE; 1421 1422 a->insertmode = NOT_SET_VALUES; 1423 MPI_Comm_rank(comm,&a->rank); 1424 MPI_Comm_size(comm,&a->size); 1425 1426 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1427 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1428 1429 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1430 work[0] = m; work[1] = n; 1431 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1432 if (M == PETSC_DECIDE) M = sum[0]; 1433 if (N == PETSC_DECIDE) N = sum[1]; 1434 } 1435 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1436 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1437 a->m = m; 1438 a->n = n; 1439 a->N = N; 1440 a->M = M; 1441 1442 /* build local table of row and column ownerships */ 1443 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1444 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1445 a->cowners = a->rowners + a->size + 1; 1446 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1447 a->rowners[0] = 0; 1448 for ( i=2; i<=a->size; i++ ) { 1449 a->rowners[i] += a->rowners[i-1]; 1450 } 1451 a->rstart = a->rowners[a->rank]; 1452 a->rend = a->rowners[a->rank+1]; 1453 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1454 a->cowners[0] = 0; 1455 for ( i=2; i<=a->size; i++ ) { 1456 a->cowners[i] += a->cowners[i-1]; 1457 } 1458 a->cstart = a->cowners[a->rank]; 1459 a->cend = a->cowners[a->rank+1]; 1460 1461 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1462 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1463 PLogObjectParent(mat,a->A); 1464 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1465 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1466 PLogObjectParent(mat,a->B); 1467 1468 /* build cache for off array entries formed */ 1469 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1470 a->colmap = 0; 1471 a->garray = 0; 1472 a->roworiented = 1; 1473 1474 /* stuff used for matrix vector multiply */ 1475 a->lvec = 0; 1476 a->Mvctx = 0; 1477 1478 *newmat = mat; 1479 return 0; 1480 } 1481 1482 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1483 { 1484 Mat mat; 1485 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1486 int ierr, len,flg; 1487 1488 *newmat = 0; 1489 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1490 PLogObjectCreate(mat); 1491 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1492 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1493 mat->destroy = MatDestroy_MPIAIJ; 1494 mat->view = MatView_MPIAIJ; 1495 mat->factor = matin->factor; 1496 mat->assembled = PETSC_TRUE; 1497 1498 a->m = oldmat->m; 1499 a->n = oldmat->n; 1500 a->M = oldmat->M; 1501 a->N = oldmat->N; 1502 1503 a->rstart = oldmat->rstart; 1504 a->rend = oldmat->rend; 1505 a->cstart = oldmat->cstart; 1506 a->cend = oldmat->cend; 1507 a->size = oldmat->size; 1508 a->rank = oldmat->rank; 1509 a->insertmode = NOT_SET_VALUES; 1510 1511 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1512 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1513 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1514 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1515 if (oldmat->colmap) { 1516 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1517 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1518 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1519 } else a->colmap = 0; 1520 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1521 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1522 PLogObjectMemory(mat,len*sizeof(int)); 1523 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1524 } else a->garray = 0; 1525 1526 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1527 PLogObjectParent(mat,a->lvec); 1528 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1529 PLogObjectParent(mat,a->Mvctx); 1530 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1531 PLogObjectParent(mat,a->A); 1532 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1533 PLogObjectParent(mat,a->B); 1534 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1535 if (flg) { 1536 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1537 } 1538 *newmat = mat; 1539 return 0; 1540 } 1541 1542 #include "sysio.h" 1543 1544 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1545 { 1546 Mat A; 1547 int i, nz, ierr, j,rstart, rend, fd; 1548 Scalar *vals,*svals; 1549 PetscObject vobj = (PetscObject) bview; 1550 MPI_Comm comm = vobj->comm; 1551 MPI_Status status; 1552 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1553 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1554 int tag = ((PetscObject)bview)->tag; 1555 1556 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1557 if (!rank) { 1558 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1559 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1560 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1561 } 1562 1563 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1564 M = header[1]; N = header[2]; 1565 /* determine ownership of all rows */ 1566 m = M/size + ((M % size) > rank); 1567 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1568 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1569 rowners[0] = 0; 1570 for ( i=2; i<=size; i++ ) { 1571 rowners[i] += rowners[i-1]; 1572 } 1573 rstart = rowners[rank]; 1574 rend = rowners[rank+1]; 1575 1576 /* distribute row lengths to all processors */ 1577 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1578 offlens = ourlens + (rend-rstart); 1579 if (!rank) { 1580 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1581 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1582 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1583 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1584 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1585 PetscFree(sndcounts); 1586 } 1587 else { 1588 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1589 } 1590 1591 if (!rank) { 1592 /* calculate the number of nonzeros on each processor */ 1593 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1594 PetscMemzero(procsnz,size*sizeof(int)); 1595 for ( i=0; i<size; i++ ) { 1596 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1597 procsnz[i] += rowlengths[j]; 1598 } 1599 } 1600 PetscFree(rowlengths); 1601 1602 /* determine max buffer needed and allocate it */ 1603 maxnz = 0; 1604 for ( i=0; i<size; i++ ) { 1605 maxnz = PetscMax(maxnz,procsnz[i]); 1606 } 1607 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1608 1609 /* read in my part of the matrix column indices */ 1610 nz = procsnz[0]; 1611 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1612 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1613 1614 /* read in every one elses and ship off */ 1615 for ( i=1; i<size; i++ ) { 1616 nz = procsnz[i]; 1617 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1618 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1619 } 1620 PetscFree(cols); 1621 } 1622 else { 1623 /* determine buffer space needed for message */ 1624 nz = 0; 1625 for ( i=0; i<m; i++ ) { 1626 nz += ourlens[i]; 1627 } 1628 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1629 1630 /* receive message of column indices*/ 1631 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1632 MPI_Get_count(&status,MPI_INT,&maxnz); 1633 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1634 } 1635 1636 /* loop over local rows, determining number of off diagonal entries */ 1637 PetscMemzero(offlens,m*sizeof(int)); 1638 jj = 0; 1639 for ( i=0; i<m; i++ ) { 1640 for ( j=0; j<ourlens[i]; j++ ) { 1641 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1642 jj++; 1643 } 1644 } 1645 1646 /* create our matrix */ 1647 for ( i=0; i<m; i++ ) { 1648 ourlens[i] -= offlens[i]; 1649 } 1650 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1651 A = *newmat; 1652 MatSetOption(A,COLUMNS_SORTED); 1653 for ( i=0; i<m; i++ ) { 1654 ourlens[i] += offlens[i]; 1655 } 1656 1657 if (!rank) { 1658 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1659 1660 /* read in my part of the matrix numerical values */ 1661 nz = procsnz[0]; 1662 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1663 1664 /* insert into matrix */ 1665 jj = rstart; 1666 smycols = mycols; 1667 svals = vals; 1668 for ( i=0; i<m; i++ ) { 1669 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1670 smycols += ourlens[i]; 1671 svals += ourlens[i]; 1672 jj++; 1673 } 1674 1675 /* read in other processors and ship out */ 1676 for ( i=1; i<size; i++ ) { 1677 nz = procsnz[i]; 1678 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1679 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1680 } 1681 PetscFree(procsnz); 1682 } 1683 else { 1684 /* receive numeric values */ 1685 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1686 1687 /* receive message of values*/ 1688 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1689 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1690 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1691 1692 /* insert into matrix */ 1693 jj = rstart; 1694 smycols = mycols; 1695 svals = vals; 1696 for ( i=0; i<m; i++ ) { 1697 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1698 smycols += ourlens[i]; 1699 svals += ourlens[i]; 1700 jj++; 1701 } 1702 } 1703 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1704 1705 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1706 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1707 return 0; 1708 } 1709