1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.123 1996/02/07 23:13:36 balay Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 return 0; 296 } 297 298 static int MatZeroEntries_MPIAIJ(Mat A) 299 { 300 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301 int ierr; 302 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 303 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 304 return 0; 305 } 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 314 { 315 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 319 int *rvalues,tag = A->tag,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 327 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 328 329 /* first count number of contributors to each processor */ 330 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 331 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 332 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 333 for ( i=0; i<N; i++ ) { 334 idx = rows[i]; 335 found = 0; 336 for ( j=0; j<size; j++ ) { 337 if (idx >= owners[j] && idx < owners[j+1]) { 338 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 339 } 340 } 341 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 342 } 343 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 344 345 /* inform other processors of number of messages and max length*/ 346 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 347 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 348 nrecvs = work[rank]; 349 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 350 nmax = work[rank]; 351 PetscFree(work); 352 353 /* post receives: */ 354 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nrecvs; i++ ) { 359 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 368 CHKPTRQ(send_waits); 369 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 370 starts[0] = 0; 371 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<N; i++ ) { 373 svalues[starts[owner[i]]++] = rows[i]; 374 } 375 ISRestoreIndices(is,&rows); 376 377 starts[0] = 0; 378 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); 386 387 base = owners[rank]; 388 389 /* wait on receives */ 390 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 391 source = lens + nrecvs; 392 count = nrecvs; slen = 0; 393 while (count) { 394 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 395 /* unpack receives into our local space */ 396 MPI_Get_count(&recv_status,MPI_INT,&n); 397 source[imdex] = recv_status.MPI_SOURCE; 398 lens[imdex] = n; 399 slen += n; 400 count--; 401 } 402 PetscFree(recv_waits); 403 404 /* move the data into the send scatter */ 405 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 406 count = 0; 407 for ( i=0; i<nrecvs; i++ ) { 408 values = rvalues + i*nmax; 409 for ( j=0; j<lens[i]; j++ ) { 410 lrows[count++] = values[j] - base; 411 } 412 } 413 PetscFree(rvalues); PetscFree(lens); 414 PetscFree(owner); PetscFree(nprocs); 415 416 /* actually zap the local rows */ 417 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 418 PLogObjectParent(A,istmp); 419 PetscFree(lrows); 420 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 421 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 422 ierr = ISDestroy(istmp); CHKERRQ(ierr); 423 424 /* wait on sends */ 425 if (nsends) { 426 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 427 CHKPTRQ(send_status); 428 MPI_Waitall(nsends,send_waits,send_status); 429 PetscFree(send_status); 430 } 431 PetscFree(send_waits); PetscFree(svalues); 432 433 return 0; 434 } 435 436 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 437 { 438 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 439 int ierr; 440 441 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 442 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 443 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 444 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 445 return 0; 446 } 447 448 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 451 int ierr; 452 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 453 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 454 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 455 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 460 { 461 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 462 int ierr; 463 464 /* do nondiagonal part */ 465 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 466 /* send it on its way */ 467 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 468 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 469 /* do local part */ 470 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 471 /* receive remote parts: note this assumes the values are not actually */ 472 /* inserted in yy until the next line, which is true for my implementation*/ 473 /* but is not perhaps always true. */ 474 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 475 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 476 return 0; 477 } 478 479 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482 int ierr; 483 484 /* do nondiagonal part */ 485 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 486 /* send it on its way */ 487 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 488 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489 /* do local part */ 490 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 491 /* receive remote parts: note this assumes the values are not actually */ 492 /* inserted in yy until the next line, which is true for my implementation*/ 493 /* but is not perhaps always true. */ 494 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 495 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 496 return 0; 497 } 498 499 /* 500 This only works correctly for square matrices where the subblock A->A is the 501 diagonal block 502 */ 503 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 504 { 505 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 506 return MatGetDiagonal(a->A,v); 507 } 508 509 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 514 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 515 return 0; 516 } 517 518 static int MatDestroy_MPIAIJ(PetscObject obj) 519 { 520 Mat mat = (Mat) obj; 521 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 522 int ierr; 523 #if defined(PETSC_LOG) 524 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 525 #endif 526 PetscFree(aij->rowners); 527 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 528 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 529 if (aij->colmap) PetscFree(aij->colmap); 530 if (aij->garray) PetscFree(aij->garray); 531 if (aij->lvec) VecDestroy(aij->lvec); 532 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 533 PetscFree(aij); 534 PLogObjectDestroy(mat); 535 PetscHeaderDestroy(mat); 536 return 0; 537 } 538 #include "draw.h" 539 #include "pinclude/pviewer.h" 540 541 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 int ierr; 545 546 if (aij->size == 1) { 547 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548 } 549 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 550 return 0; 551 } 552 553 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 554 { 555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 556 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 557 int ierr, format,shift = C->indexshift,rank; 558 PetscObject vobj = (PetscObject) viewer; 559 FILE *fd; 560 561 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 562 ierr = ViewerFileGetFormat_Private(viewer,&format); 563 if (format == FILE_FORMAT_INFO_DETAILED) { 564 int nz,nzalloc,mem; 565 MPI_Comm_rank(mat->comm,&rank); 566 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 567 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 568 MPIU_Seq_begin(mat->comm,1); 569 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 570 nzalloc,mem); 571 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 572 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 573 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 574 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 575 fflush(fd); 576 MPIU_Seq_end(mat->comm,1); 577 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 578 return 0; 579 } 580 else if (format == FILE_FORMAT_INFO) { 581 return 0; 582 } 583 } 584 585 if (vobj->type == ASCII_FILE_VIEWER) { 586 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 587 MPIU_Seq_begin(mat->comm,1); 588 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 589 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 590 aij->cend); 591 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 592 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 593 fflush(fd); 594 MPIU_Seq_end(mat->comm,1); 595 } 596 else { 597 int size = aij->size; 598 rank = aij->rank; 599 if (size == 1) { 600 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 601 } 602 else { 603 /* assemble the entire matrix onto first processor. */ 604 Mat A; 605 Mat_SeqAIJ *Aloc; 606 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 607 Scalar *a; 608 609 if (!rank) { 610 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 611 CHKERRQ(ierr); 612 } 613 else { 614 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 615 CHKERRQ(ierr); 616 } 617 PLogObjectParent(mat,A); 618 619 /* copy over the A part */ 620 Aloc = (Mat_SeqAIJ*) aij->A->data; 621 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 622 row = aij->rstart; 623 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 624 for ( i=0; i<m; i++ ) { 625 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 626 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 627 } 628 aj = Aloc->j; 629 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 630 631 /* copy over the B part */ 632 Aloc = (Mat_SeqAIJ*) aij->B->data; 633 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 634 row = aij->rstart; 635 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 636 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 637 for ( i=0; i<m; i++ ) { 638 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 639 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 640 } 641 PetscFree(ct); 642 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 644 if (!rank) { 645 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 646 } 647 ierr = MatDestroy(A); CHKERRQ(ierr); 648 } 649 } 650 return 0; 651 } 652 653 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 654 { 655 Mat mat = (Mat) obj; 656 int ierr; 657 PetscObject vobj = (PetscObject) viewer; 658 659 if (!viewer) { 660 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661 } 662 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665 } 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == DRAW_COOKIE) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->type == BINARY_FILE_VIEWER) { 676 return MatView_MPIAIJ_Binary(mat,viewer); 677 } 678 return 0; 679 } 680 681 extern int MatMarkDiag_SeqAIJ(Mat); 682 /* 683 This has to provide several versions. 684 685 1) per sequential 686 2) a) use only local smoothing updating outer values only once. 687 b) local smoothing updating outer values each inner iteration 688 3) color updating out values betwen colors. 689 */ 690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691 double fshift,int its,Vec xx) 692 { 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694 Mat AA = mat->A, BB = mat->B; 695 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 696 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 697 int ierr,*idx, *diag; 698 int n = mat->n, m = mat->m, i,shift = A->indexshift; 699 Vec tt; 700 701 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 702 xs = x + shift; /* shift by one for index start of 1 */ 703 ls = ls + shift; 704 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 705 diag = A->diag; 706 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 707 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 708 } 709 if (flag & SOR_EISENSTAT) { 710 /* Let A = L + U + D; where L is lower trianglar, 711 U is upper triangular, E is diagonal; This routine applies 712 713 (L + E)^{-1} A (U + E)^{-1} 714 715 to a vector efficiently using Eisenstat's trick. This is for 716 the case of SSOR preconditioner, so E is D/omega where omega 717 is the relaxation factor. 718 */ 719 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 720 PLogObjectParent(matin,tt); 721 VecGetArray(tt,&t); 722 scale = (2.0/omega) - 1.0; 723 /* x = (E + U)^{-1} b */ 724 VecSet(&zero,mat->lvec); 725 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 726 mat->Mvctx); CHKERRQ(ierr); 727 for ( i=m-1; i>-1; i-- ) { 728 n = A->i[i+1] - diag[i] - 1; 729 idx = A->j + diag[i] + !shift; 730 v = A->a + diag[i] + !shift; 731 sum = b[i]; 732 SPARSEDENSEMDOT(sum,xs,v,idx,n); 733 d = fshift + A->a[diag[i]+shift]; 734 n = B->i[i+1] - B->i[i]; 735 idx = B->j + B->i[i] + shift; 736 v = B->a + B->i[i] + shift; 737 SPARSEDENSEMDOT(sum,ls,v,idx,n); 738 x[i] = omega*(sum/d); 739 } 740 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 741 mat->Mvctx); CHKERRQ(ierr); 742 743 /* t = b - (2*E - D)x */ 744 v = A->a; 745 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 746 747 /* t = (E + L)^{-1}t */ 748 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 749 diag = A->diag; 750 VecSet(&zero,mat->lvec); 751 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 752 mat->Mvctx); CHKERRQ(ierr); 753 for ( i=0; i<m; i++ ) { 754 n = diag[i] - A->i[i]; 755 idx = A->j + A->i[i] + shift; 756 v = A->a + A->i[i] + shift; 757 sum = t[i]; 758 SPARSEDENSEMDOT(sum,ts,v,idx,n); 759 d = fshift + A->a[diag[i]+shift]; 760 n = B->i[i+1] - B->i[i]; 761 idx = B->j + B->i[i] + shift; 762 v = B->a + B->i[i] + shift; 763 SPARSEDENSEMDOT(sum,ls,v,idx,n); 764 t[i] = omega*(sum/d); 765 } 766 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 /* x = x + t */ 769 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 770 VecDestroy(tt); 771 return 0; 772 } 773 774 775 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 776 if (flag & SOR_ZERO_INITIAL_GUESS) { 777 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 778 } 779 else { 780 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 781 CHKERRQ(ierr); 782 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 } 785 while (its--) { 786 /* go down through the rows */ 787 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 788 mat->Mvctx); CHKERRQ(ierr); 789 for ( i=0; i<m; i++ ) { 790 n = A->i[i+1] - A->i[i]; 791 idx = A->j + A->i[i] + shift; 792 v = A->a + A->i[i] + shift; 793 sum = b[i]; 794 SPARSEDENSEMDOT(sum,xs,v,idx,n); 795 d = fshift + A->a[diag[i]+shift]; 796 n = B->i[i+1] - B->i[i]; 797 idx = B->j + B->i[i] + shift; 798 v = B->a + B->i[i] + shift; 799 SPARSEDENSEMDOT(sum,ls,v,idx,n); 800 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801 } 802 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 803 mat->Mvctx); CHKERRQ(ierr); 804 /* come up through the rows */ 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=m-1; i>-1; i-- ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 821 mat->Mvctx); CHKERRQ(ierr); 822 } 823 } 824 else if (flag & SOR_FORWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 VecSet(&zero,mat->lvec); 827 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 828 mat->Mvctx); CHKERRQ(ierr); 829 for ( i=0; i<m; i++ ) { 830 n = diag[i] - A->i[i]; 831 idx = A->j + A->i[i] + shift; 832 v = A->a + A->i[i] + shift; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = fshift + A->a[diag[i]+shift]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] + shift; 838 v = B->a + B->i[i] + shift; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = omega*(sum/d); 841 } 842 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 843 mat->Mvctx); CHKERRQ(ierr); 844 its--; 845 } 846 while (its--) { 847 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 848 CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 850 CHKERRQ(ierr); 851 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 852 mat->Mvctx); CHKERRQ(ierr); 853 for ( i=0; i<m; i++ ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 867 mat->Mvctx); CHKERRQ(ierr); 868 } 869 } 870 else if (flag & SOR_BACKWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 VecSet(&zero,mat->lvec); 873 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 874 mat->Mvctx); CHKERRQ(ierr); 875 for ( i=m-1; i>-1; i-- ) { 876 n = A->i[i+1] - diag[i] - 1; 877 idx = A->j + diag[i] + !shift; 878 v = A->a + diag[i] + !shift; 879 sum = b[i]; 880 SPARSEDENSEMDOT(sum,xs,v,idx,n); 881 d = fshift + A->a[diag[i]+shift]; 882 n = B->i[i+1] - B->i[i]; 883 idx = B->j + B->i[i] + shift; 884 v = B->a + B->i[i] + shift; 885 SPARSEDENSEMDOT(sum,ls,v,idx,n); 886 x[i] = omega*(sum/d); 887 } 888 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 889 mat->Mvctx); CHKERRQ(ierr); 890 its--; 891 } 892 while (its--) { 893 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 894 mat->Mvctx); CHKERRQ(ierr); 895 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 896 mat->Mvctx); CHKERRQ(ierr); 897 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 898 mat->Mvctx); CHKERRQ(ierr); 899 for ( i=m-1; i>-1; i-- ) { 900 n = A->i[i+1] - A->i[i]; 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 911 } 912 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 913 mat->Mvctx); CHKERRQ(ierr); 914 } 915 } 916 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 917 if (flag & SOR_ZERO_INITIAL_GUESS) { 918 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 919 } 920 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 921 CHKERRQ(ierr); 922 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 923 CHKERRQ(ierr); 924 while (its--) { 925 /* go down through the rows */ 926 for ( i=0; i<m; i++ ) { 927 n = A->i[i+1] - A->i[i]; 928 idx = A->j + A->i[i] + shift; 929 v = A->a + A->i[i] + shift; 930 sum = b[i]; 931 SPARSEDENSEMDOT(sum,xs,v,idx,n); 932 d = fshift + A->a[diag[i]+shift]; 933 n = B->i[i+1] - B->i[i]; 934 idx = B->j + B->i[i] + shift; 935 v = B->a + B->i[i] + shift; 936 SPARSEDENSEMDOT(sum,ls,v,idx,n); 937 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 938 } 939 /* come up through the rows */ 940 for ( i=m-1; i>-1; i-- ) { 941 n = A->i[i+1] - A->i[i]; 942 idx = A->j + A->i[i] + shift; 943 v = A->a + A->i[i] + shift; 944 sum = b[i]; 945 SPARSEDENSEMDOT(sum,xs,v,idx,n); 946 d = fshift + A->a[diag[i]+shift]; 947 n = B->i[i+1] - B->i[i]; 948 idx = B->j + B->i[i] + shift; 949 v = B->a + B->i[i] + shift; 950 SPARSEDENSEMDOT(sum,ls,v,idx,n); 951 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 952 } 953 } 954 } 955 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 956 if (flag & SOR_ZERO_INITIAL_GUESS) { 957 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 958 } 959 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 960 CHKERRQ(ierr); 961 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 962 CHKERRQ(ierr); 963 while (its--) { 964 for ( i=0; i<m; i++ ) { 965 n = A->i[i+1] - A->i[i]; 966 idx = A->j + A->i[i] + shift; 967 v = A->a + A->i[i] + shift; 968 sum = b[i]; 969 SPARSEDENSEMDOT(sum,xs,v,idx,n); 970 d = fshift + A->a[diag[i]+shift]; 971 n = B->i[i+1] - B->i[i]; 972 idx = B->j + B->i[i] + shift; 973 v = B->a + B->i[i] + shift; 974 SPARSEDENSEMDOT(sum,ls,v,idx,n); 975 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 976 } 977 } 978 } 979 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980 if (flag & SOR_ZERO_INITIAL_GUESS) { 981 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 982 } 983 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 984 mat->Mvctx); CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 986 mat->Mvctx); CHKERRQ(ierr); 987 while (its--) { 988 for ( i=m-1; i>-1; i-- ) { 989 n = A->i[i+1] - A->i[i]; 990 idx = A->j + A->i[i] + shift; 991 v = A->a + A->i[i] + shift; 992 sum = b[i]; 993 SPARSEDENSEMDOT(sum,xs,v,idx,n); 994 d = fshift + A->a[diag[i]+shift]; 995 n = B->i[i+1] - B->i[i]; 996 idx = B->j + B->i[i] + shift; 997 v = B->a + B->i[i] + shift; 998 SPARSEDENSEMDOT(sum,ls,v,idx,n); 999 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1000 } 1001 } 1002 } 1003 return 0; 1004 } 1005 1006 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1007 int *nzalloc,int *mem) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1012 1013 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1014 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1015 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1016 if (flag == MAT_LOCAL) { 1017 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1018 } else if (flag == MAT_GLOBAL_MAX) { 1019 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1020 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1021 } else if (flag == MAT_GLOBAL_SUM) { 1022 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1023 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1024 } 1025 return 0; 1026 } 1027 1028 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1029 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1030 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1031 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1032 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1033 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1034 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1035 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036 1037 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040 1041 if (op == NO_NEW_NONZERO_LOCATIONS || 1042 op == YES_NEW_NONZERO_LOCATIONS || 1043 op == COLUMNS_SORTED || 1044 op == ROW_ORIENTED) { 1045 MatSetOption(a->A,op); 1046 MatSetOption(a->B,op); 1047 } 1048 else if (op == ROWS_SORTED || 1049 op == SYMMETRIC_MATRIX || 1050 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1051 op == YES_NEW_DIAGONALS) 1052 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1053 else if (op == COLUMN_ORIENTED) { 1054 a->roworiented = 0; 1055 MatSetOption(a->A,op); 1056 MatSetOption(a->B,op); 1057 } 1058 else if (op == NO_NEW_DIAGONALS) 1059 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1060 else 1061 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1062 return 0; 1063 } 1064 1065 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1066 { 1067 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1068 *m = mat->M; *n = mat->N; 1069 return 0; 1070 } 1071 1072 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1073 { 1074 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1075 *m = mat->m; *n = mat->N; 1076 return 0; 1077 } 1078 1079 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082 *m = mat->rstart; *n = mat->rend; 1083 return 0; 1084 } 1085 1086 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1087 { 1088 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089 Scalar *vworkA, *vworkB, **pvA, **pvB; 1090 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1091 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1092 1093 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1094 lrow = row - rstart; 1095 1096 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1097 if (!v) {pvA = 0; pvB = 0;} 1098 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1099 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1100 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1101 nztot = nzA + nzB; 1102 1103 if (v || idx) { 1104 if (nztot) { 1105 /* Sort by increasing column numbers, assuming A and B already sorted */ 1106 int imark; 1107 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108 if (v) { 1109 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1110 for ( i=0; i<nzB; i++ ) { 1111 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1112 else break; 1113 } 1114 imark = i; 1115 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1116 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1117 } 1118 if (idx) { 1119 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1120 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1121 for ( i=0; i<nzB; i++ ) { 1122 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1123 else break; 1124 } 1125 imark = i; 1126 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1127 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1128 } 1129 } 1130 else {*idx = 0; *v=0;} 1131 } 1132 *nz = nztot; 1133 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 return 0; 1136 } 1137 1138 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1139 { 1140 if (idx) PetscFree(*idx); 1141 if (v) PetscFree(*v); 1142 return 0; 1143 } 1144 1145 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1148 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1149 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1150 double sum = 0.0; 1151 Scalar *v; 1152 1153 if (aij->size == 1) { 1154 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1155 } else { 1156 if (type == NORM_FROBENIUS) { 1157 v = amat->a; 1158 for (i=0; i<amat->nz; i++ ) { 1159 #if defined(PETSC_COMPLEX) 1160 sum += real(conj(*v)*(*v)); v++; 1161 #else 1162 sum += (*v)*(*v); v++; 1163 #endif 1164 } 1165 v = bmat->a; 1166 for (i=0; i<bmat->nz; i++ ) { 1167 #if defined(PETSC_COMPLEX) 1168 sum += real(conj(*v)*(*v)); v++; 1169 #else 1170 sum += (*v)*(*v); v++; 1171 #endif 1172 } 1173 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1174 *norm = sqrt(*norm); 1175 } 1176 else if (type == NORM_1) { /* max column norm */ 1177 double *tmp, *tmp2; 1178 int *jj, *garray = aij->garray; 1179 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1180 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1181 PetscMemzero(tmp,aij->N*sizeof(double)); 1182 *norm = 0.0; 1183 v = amat->a; jj = amat->j; 1184 for ( j=0; j<amat->nz; j++ ) { 1185 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1186 } 1187 v = bmat->a; jj = bmat->j; 1188 for ( j=0; j<bmat->nz; j++ ) { 1189 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1190 } 1191 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1192 for ( j=0; j<aij->N; j++ ) { 1193 if (tmp2[j] > *norm) *norm = tmp2[j]; 1194 } 1195 PetscFree(tmp); PetscFree(tmp2); 1196 } 1197 else if (type == NORM_INFINITY) { /* max row norm */ 1198 double ntemp = 0.0; 1199 for ( j=0; j<amat->m; j++ ) { 1200 v = amat->a + amat->i[j] + shift; 1201 sum = 0.0; 1202 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1203 sum += PetscAbsScalar(*v); v++; 1204 } 1205 v = bmat->a + bmat->i[j] + shift; 1206 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1207 sum += PetscAbsScalar(*v); v++; 1208 } 1209 if (sum > ntemp) ntemp = sum; 1210 } 1211 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1212 } 1213 else { 1214 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1215 } 1216 } 1217 return 0; 1218 } 1219 1220 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1223 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1224 int ierr,shift = Aloc->indexshift; 1225 Mat B; 1226 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1227 Scalar *array; 1228 1229 if (matout == PETSC_NULL && M != N) 1230 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1231 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1232 PETSC_NULL,&B); CHKERRQ(ierr); 1233 1234 /* copy over the A part */ 1235 Aloc = (Mat_SeqAIJ*) a->A->data; 1236 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1237 row = a->rstart; 1238 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1239 for ( i=0; i<m; i++ ) { 1240 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1241 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1242 } 1243 aj = Aloc->j; 1244 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1245 1246 /* copy over the B part */ 1247 Aloc = (Mat_SeqAIJ*) a->B->data; 1248 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1249 row = a->rstart; 1250 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1251 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1252 for ( i=0; i<m; i++ ) { 1253 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1254 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1255 } 1256 PetscFree(ct); 1257 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1258 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1259 if (matout != PETSC_NULL) { 1260 *matout = B; 1261 } else { 1262 /* This isn't really an in-place transpose .... but free data structures from a */ 1263 PetscFree(a->rowners); 1264 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1265 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1266 if (a->colmap) PetscFree(a->colmap); 1267 if (a->garray) PetscFree(a->garray); 1268 if (a->lvec) VecDestroy(a->lvec); 1269 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1270 PetscFree(a); 1271 PetscMemcpy(A,B,sizeof(struct _Mat)); 1272 PetscHeaderDestroy(B); 1273 } 1274 return 0; 1275 } 1276 1277 extern int MatPrintHelp_SeqAIJ(Mat); 1278 static int MatPrintHelp_MPIAIJ(Mat A) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1281 1282 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1283 else return 0; 1284 } 1285 1286 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1287 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1288 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1289 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1290 /* -------------------------------------------------------------------*/ 1291 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1292 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1293 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1294 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1295 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1296 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1297 MatLUFactor_MPIAIJ,0, 1298 MatRelax_MPIAIJ, 1299 MatTranspose_MPIAIJ, 1300 MatGetInfo_MPIAIJ,0, 1301 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1302 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1303 0, 1304 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1305 MatGetReordering_MPIAIJ, 1306 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1307 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1308 MatILUFactorSymbolic_MPIAIJ,0, 1309 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1310 0,0,0, 1311 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1312 MatPrintHelp_MPIAIJ, 1313 MatScale_MPIAIJ}; 1314 1315 /*@C 1316 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1317 (the default parallel PETSc format). For good matrix assembly performance 1318 the user should preallocate the matrix storage by setting the parameters 1319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1320 performance can be increased by more than a factor of 50. 1321 1322 Input Parameters: 1323 . comm - MPI communicator 1324 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1325 . n - number of local columns (or PETSC_DECIDE to have calculated 1326 if N is given) 1327 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1328 . N - number of global columns (or PETSC_DECIDE to have calculated 1329 if n is given) 1330 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1331 (same for all local rows) 1332 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1333 or null (possibly different for each row). You must leave room 1334 for the diagonal entry even if it is zero. 1335 . o_nz - number of nonzeros per row in off-diagonal portion of local 1336 submatrix (same for all local rows). 1337 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1338 submatrix or null (possibly different for each row). 1339 1340 Output Parameter: 1341 . newmat - the matrix 1342 1343 Notes: 1344 The AIJ format (also called the Yale sparse matrix format or 1345 compressed row storage), is fully compatible with standard Fortran 77 1346 storage. That is, the stored row and column indices can begin at 1347 either one (as in Fortran) or zero. See the users manual for details. 1348 1349 The user MUST specify either the local or global matrix dimensions 1350 (possibly both). 1351 1352 By default, this format uses inodes (identical nodes) when possible. 1353 We search for consecutive rows with the same nonzero structure, thereby 1354 reusing matrix information to achieve increased efficiency. 1355 1356 Options Database Keys: 1357 $ -mat_aij_no_inode - Do not use inodes 1358 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1359 1360 Storage Information: 1361 For a square global matrix we define each processor's diagonal portion 1362 to be its local rows and the corresponding columns (a square submatrix); 1363 each processor's off-diagonal portion encompasses the remainder of the 1364 local matrix (a rectangular submatrix). 1365 1366 The user can specify preallocated storage for the diagonal part of 1367 the local submatrix with either d_nz or d_nnz (not both). Set 1368 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1369 memory allocation. Likewise, specify preallocated storage for the 1370 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1371 1372 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1373 the figure below we depict these three local rows and all columns (0-11). 1374 1375 $ 0 1 2 3 4 5 6 7 8 9 10 11 1376 $ ------------------- 1377 $ row 3 | o o o d d d o o o o o o 1378 $ row 4 | o o o d d d o o o o o o 1379 $ row 5 | o o o d d d o o o o o o 1380 $ ------------------- 1381 $ 1382 1383 Thus, any entries in the d locations are stored in the d (diagonal) 1384 submatrix, and any entries in the o locations are stored in the 1385 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1386 stored simply in the MATSEQAIJ format for compressed row storage. 1387 1388 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1389 and o_nz should indicate the number of nonzeros per row in the o matrix. 1390 In general, for PDE problems in which most nonzeros are near the diagonal, 1391 one expects d_nz >> o_nz. For additional details, see the users manual 1392 chapter on matrices and the file $(PETSC_DIR)/Performance. 1393 1394 1395 .keywords: matrix, aij, compressed row, sparse, parallel 1396 1397 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1398 @*/ 1399 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1400 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1401 { 1402 Mat mat; 1403 Mat_MPIAIJ *a; 1404 int ierr, i,sum[2],work[2]; 1405 1406 *newmat = 0; 1407 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1408 PLogObjectCreate(mat); 1409 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1410 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1411 mat->destroy = MatDestroy_MPIAIJ; 1412 mat->view = MatView_MPIAIJ; 1413 mat->factor = 0; 1414 mat->assembled = PETSC_FALSE; 1415 1416 a->insertmode = NOT_SET_VALUES; 1417 MPI_Comm_rank(comm,&a->rank); 1418 MPI_Comm_size(comm,&a->size); 1419 1420 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1421 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1422 1423 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1424 work[0] = m; work[1] = n; 1425 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1426 if (M == PETSC_DECIDE) M = sum[0]; 1427 if (N == PETSC_DECIDE) N = sum[1]; 1428 } 1429 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1430 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1431 a->m = m; 1432 a->n = n; 1433 a->N = N; 1434 a->M = M; 1435 1436 /* build local table of row and column ownerships */ 1437 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1438 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1439 a->cowners = a->rowners + a->size + 1; 1440 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1441 a->rowners[0] = 0; 1442 for ( i=2; i<=a->size; i++ ) { 1443 a->rowners[i] += a->rowners[i-1]; 1444 } 1445 a->rstart = a->rowners[a->rank]; 1446 a->rend = a->rowners[a->rank+1]; 1447 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1448 a->cowners[0] = 0; 1449 for ( i=2; i<=a->size; i++ ) { 1450 a->cowners[i] += a->cowners[i-1]; 1451 } 1452 a->cstart = a->cowners[a->rank]; 1453 a->cend = a->cowners[a->rank+1]; 1454 1455 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1456 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1457 PLogObjectParent(mat,a->A); 1458 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1459 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1460 PLogObjectParent(mat,a->B); 1461 1462 /* build cache for off array entries formed */ 1463 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1464 a->colmap = 0; 1465 a->garray = 0; 1466 a->roworiented = 1; 1467 1468 /* stuff used for matrix vector multiply */ 1469 a->lvec = 0; 1470 a->Mvctx = 0; 1471 1472 *newmat = mat; 1473 return 0; 1474 } 1475 1476 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1477 { 1478 Mat mat; 1479 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1480 int ierr, len,flg; 1481 1482 *newmat = 0; 1483 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1484 PLogObjectCreate(mat); 1485 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1486 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1487 mat->destroy = MatDestroy_MPIAIJ; 1488 mat->view = MatView_MPIAIJ; 1489 mat->factor = matin->factor; 1490 mat->assembled = PETSC_TRUE; 1491 1492 a->m = oldmat->m; 1493 a->n = oldmat->n; 1494 a->M = oldmat->M; 1495 a->N = oldmat->N; 1496 1497 a->rstart = oldmat->rstart; 1498 a->rend = oldmat->rend; 1499 a->cstart = oldmat->cstart; 1500 a->cend = oldmat->cend; 1501 a->size = oldmat->size; 1502 a->rank = oldmat->rank; 1503 a->insertmode = NOT_SET_VALUES; 1504 1505 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1506 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1507 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1508 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1509 if (oldmat->colmap) { 1510 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1511 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1512 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1513 } else a->colmap = 0; 1514 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1515 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1516 PLogObjectMemory(mat,len*sizeof(int)); 1517 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1518 } else a->garray = 0; 1519 1520 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1521 PLogObjectParent(mat,a->lvec); 1522 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1523 PLogObjectParent(mat,a->Mvctx); 1524 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1525 PLogObjectParent(mat,a->A); 1526 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1527 PLogObjectParent(mat,a->B); 1528 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1529 if (flg) { 1530 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1531 } 1532 *newmat = mat; 1533 return 0; 1534 } 1535 1536 #include "sysio.h" 1537 1538 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1539 { 1540 Mat A; 1541 int i, nz, ierr, j,rstart, rend, fd; 1542 Scalar *vals,*svals; 1543 PetscObject vobj = (PetscObject) bview; 1544 MPI_Comm comm = vobj->comm; 1545 MPI_Status status; 1546 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1547 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1548 int tag = ((PetscObject)bview)->tag; 1549 1550 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1551 if (!rank) { 1552 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1553 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1554 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1555 } 1556 1557 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1558 M = header[1]; N = header[2]; 1559 /* determine ownership of all rows */ 1560 m = M/size + ((M % size) > rank); 1561 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1562 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1563 rowners[0] = 0; 1564 for ( i=2; i<=size; i++ ) { 1565 rowners[i] += rowners[i-1]; 1566 } 1567 rstart = rowners[rank]; 1568 rend = rowners[rank+1]; 1569 1570 /* distribute row lengths to all processors */ 1571 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1572 offlens = ourlens + (rend-rstart); 1573 if (!rank) { 1574 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1575 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1576 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1577 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1578 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1579 PetscFree(sndcounts); 1580 } 1581 else { 1582 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1583 } 1584 1585 if (!rank) { 1586 /* calculate the number of nonzeros on each processor */ 1587 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1588 PetscMemzero(procsnz,size*sizeof(int)); 1589 for ( i=0; i<size; i++ ) { 1590 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1591 procsnz[i] += rowlengths[j]; 1592 } 1593 } 1594 PetscFree(rowlengths); 1595 1596 /* determine max buffer needed and allocate it */ 1597 maxnz = 0; 1598 for ( i=0; i<size; i++ ) { 1599 maxnz = PetscMax(maxnz,procsnz[i]); 1600 } 1601 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1602 1603 /* read in my part of the matrix column indices */ 1604 nz = procsnz[0]; 1605 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1606 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1607 1608 /* read in every one elses and ship off */ 1609 for ( i=1; i<size; i++ ) { 1610 nz = procsnz[i]; 1611 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1612 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1613 } 1614 PetscFree(cols); 1615 } 1616 else { 1617 /* determine buffer space needed for message */ 1618 nz = 0; 1619 for ( i=0; i<m; i++ ) { 1620 nz += ourlens[i]; 1621 } 1622 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1623 1624 /* receive message of column indices*/ 1625 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1626 MPI_Get_count(&status,MPI_INT,&maxnz); 1627 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1628 } 1629 1630 /* loop over local rows, determining number of off diagonal entries */ 1631 PetscMemzero(offlens,m*sizeof(int)); 1632 jj = 0; 1633 for ( i=0; i<m; i++ ) { 1634 for ( j=0; j<ourlens[i]; j++ ) { 1635 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1636 jj++; 1637 } 1638 } 1639 1640 /* create our matrix */ 1641 for ( i=0; i<m; i++ ) { 1642 ourlens[i] -= offlens[i]; 1643 } 1644 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1645 A = *newmat; 1646 MatSetOption(A,COLUMNS_SORTED); 1647 for ( i=0; i<m; i++ ) { 1648 ourlens[i] += offlens[i]; 1649 } 1650 1651 if (!rank) { 1652 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1653 1654 /* read in my part of the matrix numerical values */ 1655 nz = procsnz[0]; 1656 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1657 1658 /* insert into matrix */ 1659 jj = rstart; 1660 smycols = mycols; 1661 svals = vals; 1662 for ( i=0; i<m; i++ ) { 1663 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1664 smycols += ourlens[i]; 1665 svals += ourlens[i]; 1666 jj++; 1667 } 1668 1669 /* read in other processors and ship out */ 1670 for ( i=1; i<size; i++ ) { 1671 nz = procsnz[i]; 1672 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1673 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1674 } 1675 PetscFree(procsnz); 1676 } 1677 else { 1678 /* receive numeric values */ 1679 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1680 1681 /* receive message of values*/ 1682 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1683 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1684 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1685 1686 /* insert into matrix */ 1687 jj = rstart; 1688 smycols = mycols; 1689 svals = vals; 1690 for ( i=0; i<m; i++ ) { 1691 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1692 smycols += ourlens[i]; 1693 svals += ourlens[i]; 1694 jj++; 1695 } 1696 } 1697 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1698 1699 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1701 return 0; 1702 } 1703