1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.109 1996/01/12 22:07:09 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (aij->assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 104 for ( i=0; i<m; i++ ) { 105 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 106 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 107 if (idxm[i] >= rstart && idxm[i] < rend) { 108 row = idxm[i] - rstart; 109 for ( j=0; j<n; j++ ) { 110 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 111 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 112 if (idxn[j] >= cstart && idxn[j] < cend){ 113 col = idxn[j] - cstart; 114 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 115 } 116 else { 117 col = aij->colmap[idxn[j]] + shift; 118 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 119 } 120 } 121 } 122 else { 123 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 124 } 125 } 126 return 0; 127 } 128 129 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 130 { 131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 132 MPI_Comm comm = mat->comm; 133 int size = aij->size, *owners = aij->rowners; 134 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 135 MPI_Request *send_waits,*recv_waits; 136 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 137 InsertMode addv; 138 Scalar *rvalues,*svalues; 139 140 /* make sure all processors are either in INSERTMODE or ADDMODE */ 141 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 142 if (addv == (ADD_VALUES|INSERT_VALUES)) { 143 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 144 } 145 aij->insertmode = addv; /* in case this processor had no cache */ 146 147 /* first count number of contributors to each processor */ 148 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 149 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 150 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 151 for ( i=0; i<aij->stash.n; i++ ) { 152 idx = aij->stash.idx[i]; 153 for ( j=0; j<size; j++ ) { 154 if (idx >= owners[j] && idx < owners[j+1]) { 155 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 156 } 157 } 158 } 159 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 160 161 /* inform other processors of number of messages and max length*/ 162 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 163 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 164 nreceives = work[rank]; 165 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 166 nmax = work[rank]; 167 PetscFree(work); 168 169 /* post receives: 170 1) each message will consist of ordered pairs 171 (global index,value) we store the global index as a double 172 to simplify the message passing. 173 2) since we don't know how long each individual message is we 174 allocate the largest needed buffer for each receive. Potentially 175 this is a lot of wasted space. 176 177 178 This could be done better. 179 */ 180 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 181 CHKPTRQ(rvalues); 182 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 183 CHKPTRQ(recv_waits); 184 for ( i=0; i<nreceives; i++ ) { 185 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 186 comm,recv_waits+i); 187 } 188 189 /* do sends: 190 1) starts[i] gives the starting index in svalues for stuff going to 191 the ith processor 192 */ 193 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 194 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 195 CHKPTRQ(send_waits); 196 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 197 starts[0] = 0; 198 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 199 for ( i=0; i<aij->stash.n; i++ ) { 200 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 201 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 202 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 203 } 204 PetscFree(owner); 205 starts[0] = 0; 206 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 207 count = 0; 208 for ( i=0; i<size; i++ ) { 209 if (procs[i]) { 210 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 211 comm,send_waits+count++); 212 } 213 } 214 PetscFree(starts); PetscFree(nprocs); 215 216 /* Free cache space */ 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 247 col = (int) PETSCREAL(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (aij->assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PETSCREAL(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (aij->assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 aij->assembled = 1; 292 } 293 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 294 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 295 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 328 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 329 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 330 331 /* first count number of contributors to each processor */ 332 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 333 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 334 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 335 for ( i=0; i<N; i++ ) { 336 idx = rows[i]; 337 found = 0; 338 for ( j=0; j<size; j++ ) { 339 if (idx >= owners[j] && idx < owners[j+1]) { 340 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 341 } 342 } 343 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 344 } 345 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 346 347 /* inform other processors of number of messages and max length*/ 348 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 349 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 350 nrecvs = work[rank]; 351 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 352 nmax = work[rank]; 353 PetscFree(work); 354 355 /* post receives: */ 356 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 357 CHKPTRQ(rvalues); 358 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 359 CHKPTRQ(recv_waits); 360 for ( i=0; i<nrecvs; i++ ) { 361 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 369 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTRQ(send_waits); 371 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 372 starts[0] = 0; 373 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<N; i++ ) { 375 svalues[starts[owner[i]]++] = rows[i]; 376 } 377 ISRestoreIndices(is,&rows); 378 379 starts[0] = 0; 380 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<size; i++ ) { 383 if (procs[i]) { 384 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 385 } 386 } 387 PetscFree(starts); 388 389 base = owners[rank]; 390 391 /* wait on receives */ 392 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 393 source = lens + nrecvs; 394 count = nrecvs; slen = 0; 395 while (count) { 396 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 397 /* unpack receives into our local space */ 398 MPI_Get_count(&recv_status,MPI_INT,&n); 399 source[imdex] = recv_status.MPI_SOURCE; 400 lens[imdex] = n; 401 slen += n; 402 count--; 403 } 404 PetscFree(recv_waits); 405 406 /* move the data into the send scatter */ 407 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 408 count = 0; 409 for ( i=0; i<nrecvs; i++ ) { 410 values = rvalues + i*nmax; 411 for ( j=0; j<lens[i]; j++ ) { 412 lrows[count++] = values[j] - base; 413 } 414 } 415 PetscFree(rvalues); PetscFree(lens); 416 PetscFree(owner); PetscFree(nprocs); 417 418 /* actually zap the local rows */ 419 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 420 PLogObjectParent(A,istmp); 421 PetscFree(lrows); 422 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 423 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 424 ierr = ISDestroy(istmp); CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 429 CHKPTRQ(send_status); 430 MPI_Waitall(nsends,send_waits,send_status); 431 PetscFree(send_status); 432 } 433 PetscFree(send_waits); PetscFree(svalues); 434 435 return 0; 436 } 437 438 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 439 { 440 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 441 int ierr; 442 443 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 444 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 446 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 452 { 453 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 454 int ierr; 455 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 456 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 457 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 458 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 459 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 460 return 0; 461 } 462 463 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 464 { 465 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 466 int ierr; 467 468 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 469 /* do nondiagonal part */ 470 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 471 /* send it on its way */ 472 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 473 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 474 /* do local part */ 475 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 476 /* receive remote parts: note this assumes the values are not actually */ 477 /* inserted in yy until the next line, which is true for my implementation*/ 478 /* but is not perhaps always true. */ 479 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 480 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 481 return 0; 482 } 483 484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr; 488 489 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 490 /* do nondiagonal part */ 491 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 492 /* send it on its way */ 493 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 494 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495 /* do local part */ 496 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 497 /* receive remote parts: note this assumes the values are not actually */ 498 /* inserted in yy until the next line, which is true for my implementation*/ 499 /* but is not perhaps always true. */ 500 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 501 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 502 return 0; 503 } 504 505 /* 506 This only works correctly for square matrices where the subblock A->A is the 507 diagonal block 508 */ 509 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 513 return MatGetDiagonal(a->A,v); 514 } 515 516 static int MatDestroy_MPIAIJ(PetscObject obj) 517 { 518 Mat mat = (Mat) obj; 519 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 520 int ierr; 521 #if defined(PETSC_LOG) 522 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 523 #endif 524 PetscFree(aij->rowners); 525 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 526 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 527 if (aij->colmap) PetscFree(aij->colmap); 528 if (aij->garray) PetscFree(aij->garray); 529 if (aij->lvec) VecDestroy(aij->lvec); 530 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 531 PetscFree(aij); 532 PLogObjectDestroy(mat); 533 PetscHeaderDestroy(mat); 534 return 0; 535 } 536 #include "draw.h" 537 #include "pinclude/pviewer.h" 538 539 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 540 { 541 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 542 int ierr; 543 544 if (aij->size == 1) { 545 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 546 } 547 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 548 return 0; 549 } 550 551 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 552 { 553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 554 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 555 int ierr, format,shift = C->indexshift,rank; 556 PetscObject vobj = (PetscObject) viewer; 557 FILE *fd; 558 559 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 560 ierr = ViewerFileGetFormat_Private(viewer,&format); 561 if (format == FILE_FORMAT_INFO_DETAILED) { 562 int nz,nzalloc,mem; 563 MPI_Comm_rank(mat->comm,&rank); 564 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 565 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 566 MPIU_Seq_begin(mat->comm,1); 567 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 568 nzalloc,mem); 569 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 570 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 571 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 572 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 573 fflush(fd); 574 MPIU_Seq_end(mat->comm,1); 575 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 576 return 0; 577 } 578 else if (format == FILE_FORMAT_INFO) { 579 return 0; 580 } 581 } 582 583 if (vobj->type == ASCII_FILE_VIEWER) { 584 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 585 MPIU_Seq_begin(mat->comm,1); 586 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 587 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 588 aij->cend); 589 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 590 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 591 fflush(fd); 592 MPIU_Seq_end(mat->comm,1); 593 } 594 else { 595 int size = aij->size; 596 rank = aij->rank; 597 if (size == 1) { 598 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 599 } 600 else { 601 /* assemble the entire matrix onto first processor. */ 602 Mat A; 603 Mat_SeqAIJ *Aloc; 604 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 605 Scalar *a; 606 607 if (!rank) { 608 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 609 CHKERRQ(ierr); 610 } 611 else { 612 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 613 CHKERRQ(ierr); 614 } 615 PLogObjectParent(mat,A); 616 617 /* copy over the A part */ 618 Aloc = (Mat_SeqAIJ*) aij->A->data; 619 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 620 row = aij->rstart; 621 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 622 for ( i=0; i<m; i++ ) { 623 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 624 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 625 } 626 aj = Aloc->j; 627 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 628 629 /* copy over the B part */ 630 Aloc = (Mat_SeqAIJ*) aij->B->data; 631 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 632 row = aij->rstart; 633 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 634 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 635 for ( i=0; i<m; i++ ) { 636 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 637 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 638 } 639 PetscFree(ct); 640 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 641 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 642 if (!rank) { 643 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 644 } 645 ierr = MatDestroy(A); CHKERRQ(ierr); 646 } 647 } 648 return 0; 649 } 650 651 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 652 { 653 Mat mat = (Mat) obj; 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 655 int ierr; 656 PetscObject vobj = (PetscObject) viewer; 657 658 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 659 if (!viewer) { 660 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661 } 662 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665 } 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == DRAW_COOKIE) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->type == BINARY_FILE_VIEWER) { 676 return MatView_MPIAIJ_Binary(mat,viewer); 677 } 678 return 0; 679 } 680 681 extern int MatMarkDiag_SeqAIJ(Mat); 682 /* 683 This has to provide several versions. 684 685 1) per sequential 686 2) a) use only local smoothing updating outer values only once. 687 b) local smoothing updating outer values each inner iteration 688 3) color updating out values betwen colors. 689 */ 690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691 double fshift,int its,Vec xx) 692 { 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694 Mat AA = mat->A, BB = mat->B; 695 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 696 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 697 int ierr,*idx, *diag; 698 int n = mat->n, m = mat->m, i,shift = A->indexshift; 699 Vec tt; 700 701 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 702 703 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 704 xs = x + shift; /* shift by one for index start of 1 */ 705 ls = ls + shift; 706 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 707 diag = A->diag; 708 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 709 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 710 } 711 if (flag & SOR_EISENSTAT) { 712 /* Let A = L + U + D; where L is lower trianglar, 713 U is upper triangular, E is diagonal; This routine applies 714 715 (L + E)^{-1} A (U + E)^{-1} 716 717 to a vector efficiently using Eisenstat's trick. This is for 718 the case of SSOR preconditioner, so E is D/omega where omega 719 is the relaxation factor. 720 */ 721 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 722 PLogObjectParent(matin,tt); 723 VecGetArray(tt,&t); 724 scale = (2.0/omega) - 1.0; 725 /* x = (E + U)^{-1} b */ 726 VecSet(&zero,mat->lvec); 727 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 728 mat->Mvctx); CHKERRQ(ierr); 729 for ( i=m-1; i>-1; i-- ) { 730 n = A->i[i+1] - diag[i] - 1; 731 idx = A->j + diag[i] + !shift; 732 v = A->a + diag[i] + !shift; 733 sum = b[i]; 734 SPARSEDENSEMDOT(sum,xs,v,idx,n); 735 d = fshift + A->a[diag[i]+shift]; 736 n = B->i[i+1] - B->i[i]; 737 idx = B->j + B->i[i] + shift; 738 v = B->a + B->i[i] + shift; 739 SPARSEDENSEMDOT(sum,ls,v,idx,n); 740 x[i] = omega*(sum/d); 741 } 742 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 743 mat->Mvctx); CHKERRQ(ierr); 744 745 /* t = b - (2*E - D)x */ 746 v = A->a; 747 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 748 749 /* t = (E + L)^{-1}t */ 750 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 751 diag = A->diag; 752 VecSet(&zero,mat->lvec); 753 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 754 mat->Mvctx); CHKERRQ(ierr); 755 for ( i=0; i<m; i++ ) { 756 n = diag[i] - A->i[i]; 757 idx = A->j + A->i[i] + shift; 758 v = A->a + A->i[i] + shift; 759 sum = t[i]; 760 SPARSEDENSEMDOT(sum,ts,v,idx,n); 761 d = fshift + A->a[diag[i]+shift]; 762 n = B->i[i+1] - B->i[i]; 763 idx = B->j + B->i[i] + shift; 764 v = B->a + B->i[i] + shift; 765 SPARSEDENSEMDOT(sum,ls,v,idx,n); 766 t[i] = omega*(sum/d); 767 } 768 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 769 mat->Mvctx); CHKERRQ(ierr); 770 /* x = x + t */ 771 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 772 VecDestroy(tt); 773 return 0; 774 } 775 776 777 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 778 if (flag & SOR_ZERO_INITIAL_GUESS) { 779 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 780 } 781 else { 782 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 785 CHKERRQ(ierr); 786 } 787 while (its--) { 788 /* go down through the rows */ 789 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 790 mat->Mvctx); CHKERRQ(ierr); 791 for ( i=0; i<m; i++ ) { 792 n = A->i[i+1] - A->i[i]; 793 idx = A->j + A->i[i] + shift; 794 v = A->a + A->i[i] + shift; 795 sum = b[i]; 796 SPARSEDENSEMDOT(sum,xs,v,idx,n); 797 d = fshift + A->a[diag[i]+shift]; 798 n = B->i[i+1] - B->i[i]; 799 idx = B->j + B->i[i] + shift; 800 v = B->a + B->i[i] + shift; 801 SPARSEDENSEMDOT(sum,ls,v,idx,n); 802 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 803 } 804 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 805 mat->Mvctx); CHKERRQ(ierr); 806 /* come up through the rows */ 807 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 808 mat->Mvctx); CHKERRQ(ierr); 809 for ( i=m-1; i>-1; i-- ) { 810 n = A->i[i+1] - A->i[i]; 811 idx = A->j + A->i[i] + shift; 812 v = A->a + A->i[i] + shift; 813 sum = b[i]; 814 SPARSEDENSEMDOT(sum,xs,v,idx,n); 815 d = fshift + A->a[diag[i]+shift]; 816 n = B->i[i+1] - B->i[i]; 817 idx = B->j + B->i[i] + shift; 818 v = B->a + B->i[i] + shift; 819 SPARSEDENSEMDOT(sum,ls,v,idx,n); 820 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 821 } 822 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 823 mat->Mvctx); CHKERRQ(ierr); 824 } 825 } 826 else if (flag & SOR_FORWARD_SWEEP){ 827 if (flag & SOR_ZERO_INITIAL_GUESS) { 828 VecSet(&zero,mat->lvec); 829 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 830 mat->Mvctx); CHKERRQ(ierr); 831 for ( i=0; i<m; i++ ) { 832 n = diag[i] - A->i[i]; 833 idx = A->j + A->i[i] + shift; 834 v = A->a + A->i[i] + shift; 835 sum = b[i]; 836 SPARSEDENSEMDOT(sum,xs,v,idx,n); 837 d = fshift + A->a[diag[i]+shift]; 838 n = B->i[i+1] - B->i[i]; 839 idx = B->j + B->i[i] + shift; 840 v = B->a + B->i[i] + shift; 841 SPARSEDENSEMDOT(sum,ls,v,idx,n); 842 x[i] = omega*(sum/d); 843 } 844 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 845 mat->Mvctx); CHKERRQ(ierr); 846 its--; 847 } 848 while (its--) { 849 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 850 CHKERRQ(ierr); 851 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 852 CHKERRQ(ierr); 853 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 854 mat->Mvctx); CHKERRQ(ierr); 855 for ( i=0; i<m; i++ ) { 856 n = A->i[i+1] - A->i[i]; 857 idx = A->j + A->i[i] + shift; 858 v = A->a + A->i[i] + shift; 859 sum = b[i]; 860 SPARSEDENSEMDOT(sum,xs,v,idx,n); 861 d = fshift + A->a[diag[i]+shift]; 862 n = B->i[i+1] - B->i[i]; 863 idx = B->j + B->i[i] + shift; 864 v = B->a + B->i[i] + shift; 865 SPARSEDENSEMDOT(sum,ls,v,idx,n); 866 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867 } 868 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 869 mat->Mvctx); CHKERRQ(ierr); 870 } 871 } 872 else if (flag & SOR_BACKWARD_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 VecSet(&zero,mat->lvec); 875 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 876 mat->Mvctx); CHKERRQ(ierr); 877 for ( i=m-1; i>-1; i-- ) { 878 n = A->i[i+1] - diag[i] - 1; 879 idx = A->j + diag[i] + !shift; 880 v = A->a + diag[i] + !shift; 881 sum = b[i]; 882 SPARSEDENSEMDOT(sum,xs,v,idx,n); 883 d = fshift + A->a[diag[i]+shift]; 884 n = B->i[i+1] - B->i[i]; 885 idx = B->j + B->i[i] + shift; 886 v = B->a + B->i[i] + shift; 887 SPARSEDENSEMDOT(sum,ls,v,idx,n); 888 x[i] = omega*(sum/d); 889 } 890 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 891 mat->Mvctx); CHKERRQ(ierr); 892 its--; 893 } 894 while (its--) { 895 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 896 mat->Mvctx); CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 898 mat->Mvctx); CHKERRQ(ierr); 899 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 900 mat->Mvctx); CHKERRQ(ierr); 901 for ( i=m-1; i>-1; i-- ) { 902 n = A->i[i+1] - A->i[i]; 903 idx = A->j + A->i[i] + shift; 904 v = A->a + A->i[i] + shift; 905 sum = b[i]; 906 SPARSEDENSEMDOT(sum,xs,v,idx,n); 907 d = fshift + A->a[diag[i]+shift]; 908 n = B->i[i+1] - B->i[i]; 909 idx = B->j + B->i[i] + shift; 910 v = B->a + B->i[i] + shift; 911 SPARSEDENSEMDOT(sum,ls,v,idx,n); 912 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 913 } 914 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 915 mat->Mvctx); CHKERRQ(ierr); 916 } 917 } 918 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 919 if (flag & SOR_ZERO_INITIAL_GUESS) { 920 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 921 } 922 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 923 CHKERRQ(ierr); 924 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 925 CHKERRQ(ierr); 926 while (its--) { 927 /* go down through the rows */ 928 for ( i=0; i<m; i++ ) { 929 n = A->i[i+1] - A->i[i]; 930 idx = A->j + A->i[i] + shift; 931 v = A->a + A->i[i] + shift; 932 sum = b[i]; 933 SPARSEDENSEMDOT(sum,xs,v,idx,n); 934 d = fshift + A->a[diag[i]+shift]; 935 n = B->i[i+1] - B->i[i]; 936 idx = B->j + B->i[i] + shift; 937 v = B->a + B->i[i] + shift; 938 SPARSEDENSEMDOT(sum,ls,v,idx,n); 939 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 940 } 941 /* come up through the rows */ 942 for ( i=m-1; i>-1; i-- ) { 943 n = A->i[i+1] - A->i[i]; 944 idx = A->j + A->i[i] + shift; 945 v = A->a + A->i[i] + shift; 946 sum = b[i]; 947 SPARSEDENSEMDOT(sum,xs,v,idx,n); 948 d = fshift + A->a[diag[i]+shift]; 949 n = B->i[i+1] - B->i[i]; 950 idx = B->j + B->i[i] + shift; 951 v = B->a + B->i[i] + shift; 952 SPARSEDENSEMDOT(sum,ls,v,idx,n); 953 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 954 } 955 } 956 } 957 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 958 if (flag & SOR_ZERO_INITIAL_GUESS) { 959 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 960 } 961 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 962 CHKERRQ(ierr); 963 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 964 CHKERRQ(ierr); 965 while (its--) { 966 for ( i=0; i<m; i++ ) { 967 n = A->i[i+1] - A->i[i]; 968 idx = A->j + A->i[i] + shift; 969 v = A->a + A->i[i] + shift; 970 sum = b[i]; 971 SPARSEDENSEMDOT(sum,xs,v,idx,n); 972 d = fshift + A->a[diag[i]+shift]; 973 n = B->i[i+1] - B->i[i]; 974 idx = B->j + B->i[i] + shift; 975 v = B->a + B->i[i] + shift; 976 SPARSEDENSEMDOT(sum,ls,v,idx,n); 977 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 978 } 979 } 980 } 981 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 982 if (flag & SOR_ZERO_INITIAL_GUESS) { 983 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 984 } 985 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 986 mat->Mvctx); CHKERRQ(ierr); 987 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 988 mat->Mvctx); CHKERRQ(ierr); 989 while (its--) { 990 for ( i=m-1; i>-1; i-- ) { 991 n = A->i[i+1] - A->i[i]; 992 idx = A->j + A->i[i] + shift; 993 v = A->a + A->i[i] + shift; 994 sum = b[i]; 995 SPARSEDENSEMDOT(sum,xs,v,idx,n); 996 d = fshift + A->a[diag[i]+shift]; 997 n = B->i[i+1] - B->i[i]; 998 idx = B->j + B->i[i] + shift; 999 v = B->a + B->i[i] + shift; 1000 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1001 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1002 } 1003 } 1004 } 1005 return 0; 1006 } 1007 1008 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1009 int *nzalloc,int *mem) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1012 Mat A = mat->A, B = mat->B; 1013 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1014 1015 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1016 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1017 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1018 if (flag == MAT_LOCAL) { 1019 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1020 } else if (flag == MAT_GLOBAL_MAX) { 1021 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1022 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1023 } else if (flag == MAT_GLOBAL_SUM) { 1024 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1025 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026 } 1027 return 0; 1028 } 1029 1030 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1031 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1032 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1033 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1034 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1035 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1037 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1038 1039 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1042 1043 if (op == NO_NEW_NONZERO_LOCATIONS || 1044 op == YES_NEW_NONZERO_LOCATIONS || 1045 op == COLUMNS_SORTED || 1046 op == ROW_ORIENTED) { 1047 MatSetOption(a->A,op); 1048 MatSetOption(a->B,op); 1049 } 1050 else if (op == ROWS_SORTED || 1051 op == SYMMETRIC_MATRIX || 1052 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1053 op == YES_NEW_DIAGONALS) 1054 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1055 else if (op == COLUMN_ORIENTED) { 1056 a->roworiented = 0; 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == NO_NEW_DIAGONALS) 1061 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1062 else 1063 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1064 return 0; 1065 } 1066 1067 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1070 *m = mat->M; *n = mat->N; 1071 return 0; 1072 } 1073 1074 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1075 { 1076 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1077 *m = mat->m; *n = mat->N; 1078 return 0; 1079 } 1080 1081 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 *m = mat->rstart; *n = mat->rend; 1085 return 0; 1086 } 1087 1088 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1089 { 1090 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1091 Scalar *vworkA, *vworkB, **pvA, **pvB; 1092 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1093 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1094 1095 if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1096 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1097 lrow = row - rstart; 1098 1099 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1100 if (!v) {pvA = 0; pvB = 0;} 1101 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1102 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1103 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1104 nztot = nzA + nzB; 1105 1106 if (v || idx) { 1107 if (nztot) { 1108 /* Sort by increasing column numbers, assuming A and B already sorted */ 1109 int imark; 1110 if (mat->assembled) { 1111 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1112 } 1113 if (v) { 1114 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1115 for ( i=0; i<nzB; i++ ) { 1116 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1117 else break; 1118 } 1119 imark = i; 1120 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1121 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1122 } 1123 if (idx) { 1124 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1125 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1126 for ( i=0; i<nzB; i++ ) { 1127 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1128 else break; 1129 } 1130 imark = i; 1131 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1132 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1133 } 1134 } 1135 else {*idx = 0; *v=0;} 1136 } 1137 *nz = nztot; 1138 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1139 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1140 return 0; 1141 } 1142 1143 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 if (idx) PetscFree(*idx); 1146 if (v) PetscFree(*v); 1147 return 0; 1148 } 1149 1150 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1153 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1154 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1155 double sum = 0.0; 1156 Scalar *v; 1157 1158 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1159 if (aij->size == 1) { 1160 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1161 } else { 1162 if (type == NORM_FROBENIUS) { 1163 v = amat->a; 1164 for (i=0; i<amat->nz; i++ ) { 1165 #if defined(PETSC_COMPLEX) 1166 sum += real(conj(*v)*(*v)); v++; 1167 #else 1168 sum += (*v)*(*v); v++; 1169 #endif 1170 } 1171 v = bmat->a; 1172 for (i=0; i<bmat->nz; i++ ) { 1173 #if defined(PETSC_COMPLEX) 1174 sum += real(conj(*v)*(*v)); v++; 1175 #else 1176 sum += (*v)*(*v); v++; 1177 #endif 1178 } 1179 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1180 *norm = sqrt(*norm); 1181 } 1182 else if (type == NORM_1) { /* max column norm */ 1183 double *tmp, *tmp2; 1184 int *jj, *garray = aij->garray; 1185 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1186 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1187 PetscMemzero(tmp,aij->N*sizeof(double)); 1188 *norm = 0.0; 1189 v = amat->a; jj = amat->j; 1190 for ( j=0; j<amat->nz; j++ ) { 1191 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1192 } 1193 v = bmat->a; jj = bmat->j; 1194 for ( j=0; j<bmat->nz; j++ ) { 1195 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1196 } 1197 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1198 for ( j=0; j<aij->N; j++ ) { 1199 if (tmp2[j] > *norm) *norm = tmp2[j]; 1200 } 1201 PetscFree(tmp); PetscFree(tmp2); 1202 } 1203 else if (type == NORM_INFINITY) { /* max row norm */ 1204 double ntemp = 0.0; 1205 for ( j=0; j<amat->m; j++ ) { 1206 v = amat->a + amat->i[j] + shift; 1207 sum = 0.0; 1208 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1209 sum += PetscAbsScalar(*v); v++; 1210 } 1211 v = bmat->a + bmat->i[j] + shift; 1212 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1213 sum += PetscAbsScalar(*v); v++; 1214 } 1215 if (sum > ntemp) ntemp = sum; 1216 } 1217 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1218 } 1219 else { 1220 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1221 } 1222 } 1223 return 0; 1224 } 1225 1226 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1227 { 1228 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1229 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1230 int ierr,shift = Aloc->indexshift; 1231 Mat B; 1232 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1233 Scalar *array; 1234 1235 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1236 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1237 PETSC_NULL,&B); CHKERRQ(ierr); 1238 1239 /* copy over the A part */ 1240 Aloc = (Mat_SeqAIJ*) a->A->data; 1241 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1242 row = a->rstart; 1243 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1244 for ( i=0; i<m; i++ ) { 1245 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1246 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1247 } 1248 aj = Aloc->j; 1249 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1250 1251 /* copy over the B part */ 1252 Aloc = (Mat_SeqAIJ*) a->B->data; 1253 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1254 row = a->rstart; 1255 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1256 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1257 for ( i=0; i<m; i++ ) { 1258 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1259 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1260 } 1261 PetscFree(ct); 1262 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1263 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1264 if (matout) { 1265 *matout = B; 1266 } else { 1267 /* This isn't really an in-place transpose .... but free data structures from a */ 1268 PetscFree(a->rowners); 1269 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1270 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1271 if (a->colmap) PetscFree(a->colmap); 1272 if (a->garray) PetscFree(a->garray); 1273 if (a->lvec) VecDestroy(a->lvec); 1274 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1275 PetscFree(a); 1276 PetscMemcpy(A,B,sizeof(struct _Mat)); 1277 PetscHeaderDestroy(B); 1278 } 1279 return 0; 1280 } 1281 1282 extern int MatPrintHelp_SeqAIJ(Mat); 1283 static int MatPrintHelp_MPIAIJ(Mat A) 1284 { 1285 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1286 1287 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1288 else return 0; 1289 } 1290 1291 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1292 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1293 1294 /* -------------------------------------------------------------------*/ 1295 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1296 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1297 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1298 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1299 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1300 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1301 MatLUFactor_MPIAIJ,0, 1302 MatRelax_MPIAIJ, 1303 MatTranspose_MPIAIJ, 1304 MatGetInfo_MPIAIJ,0, 1305 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1306 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1307 0, 1308 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1309 MatGetReordering_MPIAIJ, 1310 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1311 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1312 MatILUFactorSymbolic_MPIAIJ,0, 1313 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1314 0,0,0, 1315 0,0,MatGetValues_MPIAIJ,0, 1316 MatPrintHelp_MPIAIJ}; 1317 1318 /*@C 1319 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1320 (the default parallel PETSc format). 1321 1322 Input Parameters: 1323 . comm - MPI communicator 1324 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1325 . n - number of local columns (or PETSC_DECIDE to have calculated 1326 if N is given) 1327 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1328 . N - number of global columns (or PETSC_DECIDE to have calculated 1329 if n is given) 1330 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1331 (same for all local rows) 1332 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1333 or null (possibly different for each row). You must leave room 1334 for the diagonal entry even if it is zero. 1335 . o_nz - number of nonzeros per row in off-diagonal portion of local 1336 submatrix (same for all local rows). 1337 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1338 submatrix or null (possibly different for each row). 1339 1340 Output Parameter: 1341 . newmat - the matrix 1342 1343 Notes: 1344 The AIJ format (also called the Yale sparse matrix format or 1345 compressed row storage), is fully compatible with standard Fortran 77 1346 storage. That is, the stored row and column indices can begin at 1347 either one (as in Fortran) or zero. See the users manual for details. 1348 1349 The user MUST specify either the local or global matrix dimensions 1350 (possibly both). 1351 1352 Storage Information: 1353 For a square global matrix we define each processor's diagonal portion 1354 to be its local rows and the corresponding columns (a square submatrix); 1355 each processor's off-diagonal portion encompasses the remainder of the 1356 local matrix (a rectangular submatrix). 1357 1358 The user can specify preallocated storage for the diagonal part of 1359 the local submatrix with either d_nz or d_nnz (not both). Set 1360 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1361 memory allocation. Likewise, specify preallocated storage for the 1362 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1363 1364 By default, this format uses inodes (identical nodes) when possible. 1365 We search for consecutive rows with the same nonzero structure, thereby 1366 reusing matrix information to achieve increased efficiency. 1367 1368 Options Database Keys: 1369 $ -mat_aij_no_inode - Do not use inodes 1370 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1371 1372 .keywords: matrix, aij, compressed row, sparse, parallel 1373 1374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1375 @*/ 1376 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1377 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1378 { 1379 Mat mat; 1380 Mat_MPIAIJ *a; 1381 int ierr, i,sum[2],work[2]; 1382 1383 *newmat = 0; 1384 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1385 PLogObjectCreate(mat); 1386 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1387 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1388 mat->destroy = MatDestroy_MPIAIJ; 1389 mat->view = MatView_MPIAIJ; 1390 mat->factor = 0; 1391 1392 a->insertmode = NOT_SET_VALUES; 1393 MPI_Comm_rank(comm,&a->rank); 1394 MPI_Comm_size(comm,&a->size); 1395 1396 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1397 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1398 1399 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1400 work[0] = m; work[1] = n; 1401 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1402 if (M == PETSC_DECIDE) M = sum[0]; 1403 if (N == PETSC_DECIDE) N = sum[1]; 1404 } 1405 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1406 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1407 a->m = m; 1408 a->n = n; 1409 a->N = N; 1410 a->M = M; 1411 1412 /* build local table of row and column ownerships */ 1413 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1414 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1415 a->cowners = a->rowners + a->size + 1; 1416 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1417 a->rowners[0] = 0; 1418 for ( i=2; i<=a->size; i++ ) { 1419 a->rowners[i] += a->rowners[i-1]; 1420 } 1421 a->rstart = a->rowners[a->rank]; 1422 a->rend = a->rowners[a->rank+1]; 1423 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1424 a->cowners[0] = 0; 1425 for ( i=2; i<=a->size; i++ ) { 1426 a->cowners[i] += a->cowners[i-1]; 1427 } 1428 a->cstart = a->cowners[a->rank]; 1429 a->cend = a->cowners[a->rank+1]; 1430 1431 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1432 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1433 PLogObjectParent(mat,a->A); 1434 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1435 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1436 PLogObjectParent(mat,a->B); 1437 1438 /* build cache for off array entries formed */ 1439 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1440 a->colmap = 0; 1441 a->garray = 0; 1442 a->roworiented = 1; 1443 1444 /* stuff used for matrix vector multiply */ 1445 a->lvec = 0; 1446 a->Mvctx = 0; 1447 a->assembled = 0; 1448 1449 *newmat = mat; 1450 return 0; 1451 } 1452 1453 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1454 { 1455 Mat mat; 1456 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1457 int ierr, len,flg; 1458 1459 if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix"); 1460 *newmat = 0; 1461 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1462 PLogObjectCreate(mat); 1463 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1464 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1465 mat->destroy = MatDestroy_MPIAIJ; 1466 mat->view = MatView_MPIAIJ; 1467 mat->factor = matin->factor; 1468 1469 a->m = oldmat->m; 1470 a->n = oldmat->n; 1471 a->M = oldmat->M; 1472 a->N = oldmat->N; 1473 1474 a->assembled = 1; 1475 a->rstart = oldmat->rstart; 1476 a->rend = oldmat->rend; 1477 a->cstart = oldmat->cstart; 1478 a->cend = oldmat->cend; 1479 a->size = oldmat->size; 1480 a->rank = oldmat->rank; 1481 a->insertmode = NOT_SET_VALUES; 1482 1483 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1484 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1485 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1486 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1487 if (oldmat->colmap) { 1488 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1489 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1490 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1491 } else a->colmap = 0; 1492 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1493 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1494 PLogObjectMemory(mat,len*sizeof(int)); 1495 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1496 } else a->garray = 0; 1497 1498 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1499 PLogObjectParent(mat,a->lvec); 1500 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1501 PLogObjectParent(mat,a->Mvctx); 1502 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1503 PLogObjectParent(mat,a->A); 1504 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1505 PLogObjectParent(mat,a->B); 1506 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1507 if (flg) { 1508 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1509 } 1510 *newmat = mat; 1511 return 0; 1512 } 1513 1514 #include "sysio.h" 1515 1516 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1517 { 1518 Mat A; 1519 int i, nz, ierr, j,rstart, rend, fd; 1520 Scalar *vals,*svals; 1521 PetscObject vobj = (PetscObject) bview; 1522 MPI_Comm comm = vobj->comm; 1523 MPI_Status status; 1524 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1525 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1526 int tag = ((PetscObject)bview)->tag; 1527 1528 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1529 if (!rank) { 1530 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1531 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1532 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1533 } 1534 1535 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1536 M = header[1]; N = header[2]; 1537 /* determine ownership of all rows */ 1538 m = M/size + ((M % size) > rank); 1539 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1540 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1541 rowners[0] = 0; 1542 for ( i=2; i<=size; i++ ) { 1543 rowners[i] += rowners[i-1]; 1544 } 1545 rstart = rowners[rank]; 1546 rend = rowners[rank+1]; 1547 1548 /* distribute row lengths to all processors */ 1549 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1550 offlens = ourlens + (rend-rstart); 1551 if (!rank) { 1552 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1553 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1554 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1555 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1556 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1557 PetscFree(sndcounts); 1558 } 1559 else { 1560 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1561 } 1562 1563 if (!rank) { 1564 /* calculate the number of nonzeros on each processor */ 1565 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1566 PetscMemzero(procsnz,size*sizeof(int)); 1567 for ( i=0; i<size; i++ ) { 1568 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1569 procsnz[i] += rowlengths[j]; 1570 } 1571 } 1572 PetscFree(rowlengths); 1573 1574 /* determine max buffer needed and allocate it */ 1575 maxnz = 0; 1576 for ( i=0; i<size; i++ ) { 1577 maxnz = PetscMax(maxnz,procsnz[i]); 1578 } 1579 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1580 1581 /* read in my part of the matrix column indices */ 1582 nz = procsnz[0]; 1583 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1584 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1585 1586 /* read in every one elses and ship off */ 1587 for ( i=1; i<size; i++ ) { 1588 nz = procsnz[i]; 1589 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1590 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1591 } 1592 PetscFree(cols); 1593 } 1594 else { 1595 /* determine buffer space needed for message */ 1596 nz = 0; 1597 for ( i=0; i<m; i++ ) { 1598 nz += ourlens[i]; 1599 } 1600 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1601 1602 /* receive message of column indices*/ 1603 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1604 MPI_Get_count(&status,MPI_INT,&maxnz); 1605 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1606 } 1607 1608 /* loop over local rows, determining number of off diagonal entries */ 1609 PetscMemzero(offlens,m*sizeof(int)); 1610 jj = 0; 1611 for ( i=0; i<m; i++ ) { 1612 for ( j=0; j<ourlens[i]; j++ ) { 1613 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1614 jj++; 1615 } 1616 } 1617 1618 /* create our matrix */ 1619 for ( i=0; i<m; i++ ) { 1620 ourlens[i] -= offlens[i]; 1621 } 1622 if (type == MATMPIAIJ) { 1623 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1624 } 1625 else if (type == MATMPIROW) { 1626 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1627 } 1628 A = *newmat; 1629 MatSetOption(A,COLUMNS_SORTED); 1630 for ( i=0; i<m; i++ ) { 1631 ourlens[i] += offlens[i]; 1632 } 1633 1634 if (!rank) { 1635 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1636 1637 /* read in my part of the matrix numerical values */ 1638 nz = procsnz[0]; 1639 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1640 1641 /* insert into matrix */ 1642 jj = rstart; 1643 smycols = mycols; 1644 svals = vals; 1645 for ( i=0; i<m; i++ ) { 1646 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1647 smycols += ourlens[i]; 1648 svals += ourlens[i]; 1649 jj++; 1650 } 1651 1652 /* read in other processors and ship out */ 1653 for ( i=1; i<size; i++ ) { 1654 nz = procsnz[i]; 1655 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1656 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1657 } 1658 PetscFree(procsnz); 1659 } 1660 else { 1661 /* receive numeric values */ 1662 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1663 1664 /* receive message of values*/ 1665 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1666 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1667 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1668 1669 /* insert into matrix */ 1670 jj = rstart; 1671 smycols = mycols; 1672 svals = vals; 1673 for ( i=0; i<m; i++ ) { 1674 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1675 smycols += ourlens[i]; 1676 svals += ourlens[i]; 1677 jj++; 1678 } 1679 } 1680 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1681 1682 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1684 return 0; 1685 } 1686