1 2 #ifndef lint 3 static char vcid[] = "$Id: mpiaij.c,v 1.163 1996/08/15 12:47:27 bsmith Exp bsmith $"; 4 #endif 5 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 static int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19 int n = B->n,i; 20 21 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22 PLogObjectMemory(mat,aij->N*sizeof(int)); 23 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 25 return 0; 26 } 27 28 extern int DisAssemble_MPIAIJ(Mat); 29 30 static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 31 { 32 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33 int ierr; 34 if (aij->size == 1) { 35 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 36 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 37 return 0; 38 } 39 40 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) { 68 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 69 } 70 col = aij->colmap[in[j]] - 1; 71 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 72 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 73 col = in[j]; 74 } 75 } 76 else col = in[j]; 77 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 78 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 79 } 80 } 81 } 82 else { 83 if (roworiented) { 84 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 85 } 86 else { 87 row = im[i]; 88 for ( j=0; j<n; j++ ) { 89 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 90 } 91 } 92 } 93 } 94 return 0; 95 } 96 97 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 98 { 99 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 100 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 101 int cstart = aij->cstart, cend = aij->cend,row,col; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 if (!aij->colmap) { 117 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 118 } 119 col = aij->colmap[idxn[j]] - 1; 120 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 121 else { 122 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 123 } 124 } 125 } 126 } 127 else { 128 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 129 } 130 } 131 return 0; 132 } 133 134 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 137 MPI_Comm comm = mat->comm; 138 int size = aij->size, *owners = aij->rowners; 139 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 140 MPI_Request *send_waits,*recv_waits; 141 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 142 InsertMode addv; 143 Scalar *rvalues,*svalues; 144 145 /* make sure all processors are either in INSERTMODE or ADDMODE */ 146 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 147 if (addv == (ADD_VALUES|INSERT_VALUES)) { 148 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 149 } 150 aij->insertmode = addv; /* in case this processor had no cache */ 151 152 /* first count number of contributors to each processor */ 153 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 154 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 155 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 156 for ( i=0; i<aij->stash.n; i++ ) { 157 idx = aij->stash.idx[i]; 158 for ( j=0; j<size; j++ ) { 159 if (idx >= owners[j] && idx < owners[j+1]) { 160 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 161 } 162 } 163 } 164 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 165 166 /* inform other processors of number of messages and max length*/ 167 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 168 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 169 nreceives = work[rank]; 170 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 171 nmax = work[rank]; 172 PetscFree(work); 173 174 /* post receives: 175 1) each message will consist of ordered pairs 176 (global index,value) we store the global index as a double 177 to simplify the message passing. 178 2) since we don't know how long each individual message is we 179 allocate the largest needed buffer for each receive. Potentially 180 this is a lot of wasted space. 181 182 183 This could be done better. 184 */ 185 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 186 CHKPTRQ(rvalues); 187 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 188 CHKPTRQ(recv_waits); 189 for ( i=0; i<nreceives; i++ ) { 190 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 191 comm,recv_waits+i); 192 } 193 194 /* do sends: 195 1) starts[i] gives the starting index in svalues for stuff going to 196 the ith processor 197 */ 198 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 199 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 200 CHKPTRQ(send_waits); 201 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 202 starts[0] = 0; 203 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 204 for ( i=0; i<aij->stash.n; i++ ) { 205 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 206 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 207 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 208 } 209 PetscFree(owner); 210 starts[0] = 0; 211 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 212 count = 0; 213 for ( i=0; i<size; i++ ) { 214 if (procs[i]) { 215 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 216 comm,send_waits+count++); 217 } 218 } 219 PetscFree(starts); PetscFree(nprocs); 220 221 /* Free cache space */ 222 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 223 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 224 225 aij->svalues = svalues; aij->rvalues = rvalues; 226 aij->nsends = nsends; aij->nrecvs = nreceives; 227 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 228 aij->rmax = nmax; 229 230 return 0; 231 } 232 extern int MatSetUpMultiply_MPIAIJ(Mat); 233 234 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 235 { 236 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 237 MPI_Status *send_status,recv_status; 238 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 239 int row,col,other_disassembled; 240 Scalar *values,val; 241 InsertMode addv = aij->insertmode; 242 243 /* wait on receives */ 244 while (count) { 245 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 246 /* unpack receives into our local space */ 247 values = aij->rvalues + 3*imdex*aij->rmax; 248 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 249 n = n/3; 250 for ( i=0; i<n; i++ ) { 251 row = (int) PetscReal(values[3*i]) - aij->rstart; 252 col = (int) PetscReal(values[3*i+1]); 253 val = values[3*i+2]; 254 if (col >= aij->cstart && col < aij->cend) { 255 col -= aij->cstart; 256 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 257 } 258 else { 259 if (mat->was_assembled) { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 col = aij->colmap[col] - 1; 264 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 265 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 266 col = (int) PetscReal(values[3*i+1]); 267 } 268 } 269 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 270 } 271 } 272 count--; 273 } 274 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 275 276 /* wait on sends */ 277 if (aij->nsends) { 278 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 279 CHKPTRQ(send_status); 280 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 281 PetscFree(send_status); 282 } 283 PetscFree(aij->send_waits); PetscFree(aij->svalues); 284 285 aij->insertmode = NOT_SET_VALUES; 286 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 287 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 288 289 /* determine if any processor has disassembled, if so we must 290 also disassemble ourselfs, in order that we may reassemble. */ 291 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 292 if (mat->was_assembled && !other_disassembled) { 293 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 294 } 295 296 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 297 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 298 } 299 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 300 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 301 302 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 303 return 0; 304 } 305 306 static int MatZeroEntries_MPIAIJ(Mat A) 307 { 308 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 309 int ierr; 310 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 311 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 312 return 0; 313 } 314 315 /* the code does not do the diagonal entries correctly unless the 316 matrix is square and the column and row owerships are identical. 317 This is a BUG. The only way to fix it seems to be to access 318 aij->A and aij->B directly and not through the MatZeroRows() 319 routine. 320 */ 321 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 322 { 323 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 324 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 325 int *procs,*nprocs,j,found,idx,nsends,*work; 326 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 327 int *rvalues,tag = A->tag,count,base,slen,n,*source; 328 int *lens,imdex,*lrows,*values; 329 MPI_Comm comm = A->comm; 330 MPI_Request *send_waits,*recv_waits; 331 MPI_Status recv_status,*send_status; 332 IS istmp; 333 334 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 335 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 336 337 /* first count number of contributors to each processor */ 338 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 339 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 340 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 341 for ( i=0; i<N; i++ ) { 342 idx = rows[i]; 343 found = 0; 344 for ( j=0; j<size; j++ ) { 345 if (idx >= owners[j] && idx < owners[j+1]) { 346 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 347 } 348 } 349 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 350 } 351 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 352 353 /* inform other processors of number of messages and max length*/ 354 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 355 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 356 nrecvs = work[rank]; 357 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 358 nmax = work[rank]; 359 PetscFree(work); 360 361 /* post receives: */ 362 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 363 CHKPTRQ(rvalues); 364 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 365 CHKPTRQ(recv_waits); 366 for ( i=0; i<nrecvs; i++ ) { 367 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 368 } 369 370 /* do sends: 371 1) starts[i] gives the starting index in svalues for stuff going to 372 the ith processor 373 */ 374 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 375 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 376 CHKPTRQ(send_waits); 377 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 378 starts[0] = 0; 379 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 for ( i=0; i<N; i++ ) { 381 svalues[starts[owner[i]]++] = rows[i]; 382 } 383 ISRestoreIndices(is,&rows); 384 385 starts[0] = 0; 386 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 387 count = 0; 388 for ( i=0; i<size; i++ ) { 389 if (procs[i]) { 390 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 391 } 392 } 393 PetscFree(starts); 394 395 base = owners[rank]; 396 397 /* wait on receives */ 398 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 399 source = lens + nrecvs; 400 count = nrecvs; slen = 0; 401 while (count) { 402 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 403 /* unpack receives into our local space */ 404 MPI_Get_count(&recv_status,MPI_INT,&n); 405 source[imdex] = recv_status.MPI_SOURCE; 406 lens[imdex] = n; 407 slen += n; 408 count--; 409 } 410 PetscFree(recv_waits); 411 412 /* move the data into the send scatter */ 413 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 414 count = 0; 415 for ( i=0; i<nrecvs; i++ ) { 416 values = rvalues + i*nmax; 417 for ( j=0; j<lens[i]; j++ ) { 418 lrows[count++] = values[j] - base; 419 } 420 } 421 PetscFree(rvalues); PetscFree(lens); 422 PetscFree(owner); PetscFree(nprocs); 423 424 /* actually zap the local rows */ 425 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 426 PLogObjectParent(A,istmp); 427 PetscFree(lrows); 428 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 429 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 430 ierr = ISDestroy(istmp); CHKERRQ(ierr); 431 432 /* wait on sends */ 433 if (nsends) { 434 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 435 CHKPTRQ(send_status); 436 MPI_Waitall(nsends,send_waits,send_status); 437 PetscFree(send_status); 438 } 439 PetscFree(send_waits); PetscFree(svalues); 440 441 return 0; 442 } 443 444 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 445 { 446 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 447 int ierr,nt; 448 449 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 450 if (nt != a->n) { 451 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 452 } 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 454 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 456 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 465 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 466 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 467 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 468 return 0; 469 } 470 471 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 472 { 473 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 474 int ierr; 475 476 /* do nondiagonal part */ 477 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 478 /* send it on its way */ 479 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 480 /* do local part */ 481 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 482 /* receive remote parts: note this assumes the values are not actually */ 483 /* inserted in yy until the next line, which is true for my implementation*/ 484 /* but is not perhaps always true. */ 485 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 486 return 0; 487 } 488 489 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 492 int ierr; 493 494 /* do nondiagonal part */ 495 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 496 /* send it on its way */ 497 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 498 /* do local part */ 499 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 500 /* receive remote parts: note this assumes the values are not actually */ 501 /* inserted in yy until the next line, which is true for my implementation*/ 502 /* but is not perhaps always true. */ 503 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 504 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 505 return 0; 506 } 507 508 /* 509 This only works correctly for square matrices where the subblock A->A is the 510 diagonal block 511 */ 512 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 513 { 514 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 515 if (a->M != a->N) 516 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 517 if (a->rstart != a->cstart || a->rend != a->cend) { 518 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 519 return MatGetDiagonal(a->A,v); 520 } 521 522 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 523 { 524 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 525 int ierr; 526 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 527 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 528 return 0; 529 } 530 531 static int MatDestroy_MPIAIJ(PetscObject obj) 532 { 533 Mat mat = (Mat) obj; 534 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 535 int ierr; 536 #if defined(PETSC_LOG) 537 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 538 #endif 539 PetscFree(aij->rowners); 540 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 541 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 542 if (aij->colmap) PetscFree(aij->colmap); 543 if (aij->garray) PetscFree(aij->garray); 544 if (aij->lvec) VecDestroy(aij->lvec); 545 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 546 if (aij->rowvalues) PetscFree(aij->rowvalues); 547 PetscFree(aij); 548 PLogObjectDestroy(mat); 549 PetscHeaderDestroy(mat); 550 return 0; 551 } 552 #include "draw.h" 553 #include "pinclude/pviewer.h" 554 555 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 int ierr; 559 560 if (aij->size == 1) { 561 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 562 } 563 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 564 return 0; 565 } 566 567 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 568 { 569 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 570 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 571 int ierr, format,shift = C->indexshift,rank; 572 FILE *fd; 573 ViewerType vtype; 574 575 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 576 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 577 ierr = ViewerGetFormat(viewer,&format); 578 if (format == ASCII_FORMAT_INFO_DETAILED) { 579 int nz, nzalloc, mem, flg; 580 MPI_Comm_rank(mat->comm,&rank); 581 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 582 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 583 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 584 PetscSequentialPhaseBegin(mat->comm,1); 585 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 586 rank,aij->m,nz,nzalloc,mem); 587 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 588 rank,aij->m,nz,nzalloc,mem); 589 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 590 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 591 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 592 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 593 fflush(fd); 594 PetscSequentialPhaseEnd(mat->comm,1); 595 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 596 return 0; 597 } 598 else if (format == ASCII_FORMAT_INFO) { 599 return 0; 600 } 601 } 602 603 if (vtype == DRAW_VIEWER) { 604 Draw draw; 605 PetscTruth isnull; 606 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 607 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 608 } 609 610 if (vtype == ASCII_FILE_VIEWER) { 611 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 612 PetscSequentialPhaseBegin(mat->comm,1); 613 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 614 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 615 aij->cend); 616 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 617 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 618 fflush(fd); 619 PetscSequentialPhaseEnd(mat->comm,1); 620 } 621 else { 622 int size = aij->size; 623 rank = aij->rank; 624 if (size == 1) { 625 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 626 } 627 else { 628 /* assemble the entire matrix onto first processor. */ 629 Mat A; 630 Mat_SeqAIJ *Aloc; 631 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 632 Scalar *a; 633 634 if (!rank) { 635 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 636 CHKERRQ(ierr); 637 } 638 else { 639 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 640 CHKERRQ(ierr); 641 } 642 PLogObjectParent(mat,A); 643 644 /* copy over the A part */ 645 Aloc = (Mat_SeqAIJ*) aij->A->data; 646 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 647 row = aij->rstart; 648 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 649 for ( i=0; i<m; i++ ) { 650 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 651 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 652 } 653 aj = Aloc->j; 654 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 655 656 /* copy over the B part */ 657 Aloc = (Mat_SeqAIJ*) aij->B->data; 658 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 659 row = aij->rstart; 660 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 661 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 662 for ( i=0; i<m; i++ ) { 663 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 664 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 665 } 666 PetscFree(ct); 667 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 668 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 669 if (!rank) { 670 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 671 } 672 ierr = MatDestroy(A); CHKERRQ(ierr); 673 } 674 } 675 return 0; 676 } 677 678 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 679 { 680 Mat mat = (Mat) obj; 681 int ierr; 682 ViewerType vtype; 683 684 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 685 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 686 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 687 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 688 } 689 else if (vtype == BINARY_FILE_VIEWER) { 690 return MatView_MPIAIJ_Binary(mat,viewer); 691 } 692 return 0; 693 } 694 695 /* 696 This has to provide several versions. 697 698 2) a) use only local smoothing updating outer values only once. 699 b) local smoothing updating outer values each inner iteration 700 3) color updating out values betwen colors. 701 */ 702 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 703 double fshift,int its,Vec xx) 704 { 705 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 706 Mat AA = mat->A, BB = mat->B; 707 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 708 Scalar *b,*x,*xs,*ls,d,*v,sum; 709 int ierr,*idx, *diag; 710 int n = mat->n, m = mat->m, i,shift = A->indexshift; 711 712 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 713 xs = x + shift; /* shift by one for index start of 1 */ 714 ls = ls + shift; 715 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 716 diag = A->diag; 717 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 718 if (flag & SOR_ZERO_INITIAL_GUESS) { 719 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 720 } 721 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 722 CHKERRQ(ierr); 723 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 724 CHKERRQ(ierr); 725 while (its--) { 726 /* go down through the rows */ 727 for ( i=0; i<m; i++ ) { 728 n = A->i[i+1] - A->i[i]; 729 idx = A->j + A->i[i] + shift; 730 v = A->a + A->i[i] + shift; 731 sum = b[i]; 732 SPARSEDENSEMDOT(sum,xs,v,idx,n); 733 d = fshift + A->a[diag[i]+shift]; 734 n = B->i[i+1] - B->i[i]; 735 idx = B->j + B->i[i] + shift; 736 v = B->a + B->i[i] + shift; 737 SPARSEDENSEMDOT(sum,ls,v,idx,n); 738 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 739 } 740 /* come up through the rows */ 741 for ( i=m-1; i>-1; i-- ) { 742 n = A->i[i+1] - A->i[i]; 743 idx = A->j + A->i[i] + shift; 744 v = A->a + A->i[i] + shift; 745 sum = b[i]; 746 SPARSEDENSEMDOT(sum,xs,v,idx,n); 747 d = fshift + A->a[diag[i]+shift]; 748 n = B->i[i+1] - B->i[i]; 749 idx = B->j + B->i[i] + shift; 750 v = B->a + B->i[i] + shift; 751 SPARSEDENSEMDOT(sum,ls,v,idx,n); 752 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 753 } 754 } 755 } 756 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 757 if (flag & SOR_ZERO_INITIAL_GUESS) { 758 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 759 } 760 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 761 CHKERRQ(ierr); 762 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 763 CHKERRQ(ierr); 764 while (its--) { 765 for ( i=0; i<m; i++ ) { 766 n = A->i[i+1] - A->i[i]; 767 idx = A->j + A->i[i] + shift; 768 v = A->a + A->i[i] + shift; 769 sum = b[i]; 770 SPARSEDENSEMDOT(sum,xs,v,idx,n); 771 d = fshift + A->a[diag[i]+shift]; 772 n = B->i[i+1] - B->i[i]; 773 idx = B->j + B->i[i] + shift; 774 v = B->a + B->i[i] + shift; 775 SPARSEDENSEMDOT(sum,ls,v,idx,n); 776 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 777 } 778 } 779 } 780 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 783 } 784 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 785 mat->Mvctx); CHKERRQ(ierr); 786 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 787 mat->Mvctx); CHKERRQ(ierr); 788 while (its--) { 789 for ( i=m-1; i>-1; i-- ) { 790 n = A->i[i+1] - A->i[i]; 791 idx = A->j + A->i[i] + shift; 792 v = A->a + A->i[i] + shift; 793 sum = b[i]; 794 SPARSEDENSEMDOT(sum,xs,v,idx,n); 795 d = fshift + A->a[diag[i]+shift]; 796 n = B->i[i+1] - B->i[i]; 797 idx = B->j + B->i[i] + shift; 798 v = B->a + B->i[i] + shift; 799 SPARSEDENSEMDOT(sum,ls,v,idx,n); 800 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801 } 802 } 803 } 804 else { 805 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 806 } 807 return 0; 808 } 809 810 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 811 int *nzalloc,int *mem) 812 { 813 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 814 Mat A = mat->A, B = mat->B; 815 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 816 817 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 818 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 819 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 820 if (flag == MAT_LOCAL) { 821 if (nz) *nz = isend[0]; 822 if (nzalloc) *nzalloc = isend[1]; 823 if (mem) *mem = isend[2]; 824 } else if (flag == MAT_GLOBAL_MAX) { 825 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 826 if (nz) *nz = irecv[0]; 827 if (nzalloc) *nzalloc = irecv[1]; 828 if (mem) *mem = irecv[2]; 829 } else if (flag == MAT_GLOBAL_SUM) { 830 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 831 if (nz) *nz = irecv[0]; 832 if (nzalloc) *nzalloc = irecv[1]; 833 if (mem) *mem = irecv[2]; 834 } 835 return 0; 836 } 837 838 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 839 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 840 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 841 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 842 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 843 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 844 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 845 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 846 847 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 848 { 849 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 850 851 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 852 op == MAT_YES_NEW_NONZERO_LOCATIONS || 853 op == MAT_COLUMNS_SORTED || 854 op == MAT_ROW_ORIENTED) { 855 MatSetOption(a->A,op); 856 MatSetOption(a->B,op); 857 } 858 else if (op == MAT_ROWS_SORTED || 859 op == MAT_SYMMETRIC || 860 op == MAT_STRUCTURALLY_SYMMETRIC || 861 op == MAT_YES_NEW_DIAGONALS) 862 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 863 else if (op == MAT_COLUMN_ORIENTED) { 864 a->roworiented = 0; 865 MatSetOption(a->A,op); 866 MatSetOption(a->B,op); 867 } 868 else if (op == MAT_NO_NEW_DIAGONALS) 869 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 870 else 871 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 872 return 0; 873 } 874 875 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 876 { 877 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 878 *m = mat->M; *n = mat->N; 879 return 0; 880 } 881 882 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 883 { 884 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 885 *m = mat->m; *n = mat->N; 886 return 0; 887 } 888 889 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 890 { 891 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 892 *m = mat->rstart; *n = mat->rend; 893 return 0; 894 } 895 896 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 897 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 898 899 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 900 { 901 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 902 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 903 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 904 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 905 int *cmap, *idx_p; 906 907 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 908 mat->getrowactive = PETSC_TRUE; 909 910 if (!mat->rowvalues && (idx || v)) { 911 /* 912 allocate enough space to hold information from the longest row. 913 */ 914 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 915 int max = 1,m = mat->m,tmp; 916 for ( i=0; i<m; i++ ) { 917 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 918 if (max < tmp) { max = tmp; } 919 } 920 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 921 CHKPTRQ(mat->rowvalues); 922 mat->rowindices = (int *) (mat->rowvalues + max); 923 } 924 925 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 926 lrow = row - rstart; 927 928 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 929 if (!v) {pvA = 0; pvB = 0;} 930 if (!idx) {pcA = 0; if (!v) pcB = 0;} 931 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 932 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 933 nztot = nzA + nzB; 934 935 cmap = mat->garray; 936 if (v || idx) { 937 if (nztot) { 938 /* Sort by increasing column numbers, assuming A and B already sorted */ 939 int imark = -1; 940 if (v) { 941 *v = v_p = mat->rowvalues; 942 for ( i=0; i<nzB; i++ ) { 943 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 944 else break; 945 } 946 imark = i; 947 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 948 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 949 } 950 if (idx) { 951 *idx = idx_p = mat->rowindices; 952 if (imark > -1) { 953 for ( i=0; i<imark; i++ ) { 954 idx_p[i] = cmap[cworkB[i]]; 955 } 956 } else { 957 for ( i=0; i<nzB; i++ ) { 958 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 959 else break; 960 } 961 imark = i; 962 } 963 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 964 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 965 } 966 } 967 else { 968 if (idx) *idx = 0; 969 if (v) *v = 0; 970 } 971 } 972 *nz = nztot; 973 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 974 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 975 return 0; 976 } 977 978 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 979 { 980 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 981 if (aij->getrowactive == PETSC_FALSE) { 982 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 983 } 984 aij->getrowactive = PETSC_FALSE; 985 return 0; 986 } 987 988 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 989 { 990 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 991 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 992 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 993 double sum = 0.0; 994 Scalar *v; 995 996 if (aij->size == 1) { 997 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 998 } else { 999 if (type == NORM_FROBENIUS) { 1000 v = amat->a; 1001 for (i=0; i<amat->nz; i++ ) { 1002 #if defined(PETSC_COMPLEX) 1003 sum += real(conj(*v)*(*v)); v++; 1004 #else 1005 sum += (*v)*(*v); v++; 1006 #endif 1007 } 1008 v = bmat->a; 1009 for (i=0; i<bmat->nz; i++ ) { 1010 #if defined(PETSC_COMPLEX) 1011 sum += real(conj(*v)*(*v)); v++; 1012 #else 1013 sum += (*v)*(*v); v++; 1014 #endif 1015 } 1016 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1017 *norm = sqrt(*norm); 1018 } 1019 else if (type == NORM_1) { /* max column norm */ 1020 double *tmp, *tmp2; 1021 int *jj, *garray = aij->garray; 1022 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1023 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1024 PetscMemzero(tmp,aij->N*sizeof(double)); 1025 *norm = 0.0; 1026 v = amat->a; jj = amat->j; 1027 for ( j=0; j<amat->nz; j++ ) { 1028 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1029 } 1030 v = bmat->a; jj = bmat->j; 1031 for ( j=0; j<bmat->nz; j++ ) { 1032 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1033 } 1034 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1035 for ( j=0; j<aij->N; j++ ) { 1036 if (tmp2[j] > *norm) *norm = tmp2[j]; 1037 } 1038 PetscFree(tmp); PetscFree(tmp2); 1039 } 1040 else if (type == NORM_INFINITY) { /* max row norm */ 1041 double ntemp = 0.0; 1042 for ( j=0; j<amat->m; j++ ) { 1043 v = amat->a + amat->i[j] + shift; 1044 sum = 0.0; 1045 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1046 sum += PetscAbsScalar(*v); v++; 1047 } 1048 v = bmat->a + bmat->i[j] + shift; 1049 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1050 sum += PetscAbsScalar(*v); v++; 1051 } 1052 if (sum > ntemp) ntemp = sum; 1053 } 1054 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1055 } 1056 else { 1057 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1058 } 1059 } 1060 return 0; 1061 } 1062 1063 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1067 int ierr,shift = Aloc->indexshift; 1068 Mat B; 1069 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1070 Scalar *array; 1071 1072 if (matout == PETSC_NULL && M != N) 1073 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1074 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1075 PETSC_NULL,&B); CHKERRQ(ierr); 1076 1077 /* copy over the A part */ 1078 Aloc = (Mat_SeqAIJ*) a->A->data; 1079 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1080 row = a->rstart; 1081 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1082 for ( i=0; i<m; i++ ) { 1083 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1084 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1085 } 1086 aj = Aloc->j; 1087 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1088 1089 /* copy over the B part */ 1090 Aloc = (Mat_SeqAIJ*) a->B->data; 1091 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1092 row = a->rstart; 1093 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1094 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1095 for ( i=0; i<m; i++ ) { 1096 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1097 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1098 } 1099 PetscFree(ct); 1100 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1101 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1102 if (matout != PETSC_NULL) { 1103 *matout = B; 1104 } else { 1105 /* This isn't really an in-place transpose .... but free data structures from a */ 1106 PetscFree(a->rowners); 1107 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1108 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1109 if (a->colmap) PetscFree(a->colmap); 1110 if (a->garray) PetscFree(a->garray); 1111 if (a->lvec) VecDestroy(a->lvec); 1112 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1113 PetscFree(a); 1114 PetscMemcpy(A,B,sizeof(struct _Mat)); 1115 PetscHeaderDestroy(B); 1116 } 1117 return 0; 1118 } 1119 1120 extern int MatPrintHelp_SeqAIJ(Mat); 1121 static int MatPrintHelp_MPIAIJ(Mat A) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1124 1125 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1126 else return 0; 1127 } 1128 1129 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1130 { 1131 *bs = 1; 1132 return 0; 1133 } 1134 1135 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1136 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1137 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1138 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1139 /* -------------------------------------------------------------------*/ 1140 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1141 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1142 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1143 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1144 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1145 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1146 MatLUFactor_MPIAIJ,0, 1147 MatRelax_MPIAIJ, 1148 MatTranspose_MPIAIJ, 1149 MatGetInfo_MPIAIJ,0, 1150 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1151 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1152 0, 1153 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1154 MatGetReordering_MPIAIJ, 1155 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1156 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1157 MatILUFactorSymbolic_MPIAIJ,0, 1158 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1159 0,0,0, 1160 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1161 MatPrintHelp_MPIAIJ, 1162 MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 1163 1164 /*@C 1165 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1166 (the default parallel PETSc format). For good matrix assembly performance 1167 the user should preallocate the matrix storage by setting the parameters 1168 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1169 performance can be increased by more than a factor of 50. 1170 1171 Input Parameters: 1172 . comm - MPI communicator 1173 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1174 This value should be the same as the local size used in creating the 1175 y vector for the matrix-vector product y = Ax. 1176 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1177 This value should be the same as the local size used in creating the 1178 x vector for the matrix-vector product y = Ax. 1179 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1180 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1181 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1182 (same for all local rows) 1183 . d_nzz - array containing the number of nonzeros in the various rows of the 1184 diagonal portion of the local submatrix (possibly different for each row) 1185 or PETSC_NULL. You must leave room for the diagonal entry even if 1186 it is zero. 1187 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1188 submatrix (same for all local rows). 1189 . o_nzz - array containing the number of nonzeros in the various rows of the 1190 off-diagonal portion of the local submatrix (possibly different for 1191 each row) or PETSC_NULL. 1192 1193 Output Parameter: 1194 . A - the matrix 1195 1196 Notes: 1197 The AIJ format (also called the Yale sparse matrix format or 1198 compressed row storage), is fully compatible with standard Fortran 77 1199 storage. That is, the stored row and column indices can begin at 1200 either one (as in Fortran) or zero. See the users manual for details. 1201 1202 The user MUST specify either the local or global matrix dimensions 1203 (possibly both). 1204 1205 By default, this format uses inodes (identical nodes) when possible. 1206 We search for consecutive rows with the same nonzero structure, thereby 1207 reusing matrix information to achieve increased efficiency. 1208 1209 Options Database Keys: 1210 $ -mat_aij_no_inode - Do not use inodes 1211 $ -mat_aij_inode_limit <limit> - Set inode limit. 1212 $ (max limit=5) 1213 $ -mat_aij_oneindex - Internally use indexing starting at 1 1214 $ rather than 0. Note: When calling MatSetValues(), 1215 $ the user still MUST index entries starting at 0! 1216 1217 Storage Information: 1218 For a square global matrix we define each processor's diagonal portion 1219 to be its local rows and the corresponding columns (a square submatrix); 1220 each processor's off-diagonal portion encompasses the remainder of the 1221 local matrix (a rectangular submatrix). 1222 1223 The user can specify preallocated storage for the diagonal part of 1224 the local submatrix with either d_nz or d_nnz (not both). Set 1225 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1226 memory allocation. Likewise, specify preallocated storage for the 1227 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1228 1229 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1230 the figure below we depict these three local rows and all columns (0-11). 1231 1232 $ 0 1 2 3 4 5 6 7 8 9 10 11 1233 $ ------------------- 1234 $ row 3 | o o o d d d o o o o o o 1235 $ row 4 | o o o d d d o o o o o o 1236 $ row 5 | o o o d d d o o o o o o 1237 $ ------------------- 1238 $ 1239 1240 Thus, any entries in the d locations are stored in the d (diagonal) 1241 submatrix, and any entries in the o locations are stored in the 1242 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1243 stored simply in the MATSEQAIJ format for compressed row storage. 1244 1245 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1246 and o_nz should indicate the number of nonzeros per row in the o matrix. 1247 In general, for PDE problems in which most nonzeros are near the diagonal, 1248 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1249 or you will get TERRIBLE performance; see the users' manual chapter on 1250 matrices and the file $(PETSC_DIR)/Performance. 1251 1252 .keywords: matrix, aij, compressed row, sparse, parallel 1253 1254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1255 @*/ 1256 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1257 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1258 { 1259 Mat B; 1260 Mat_MPIAIJ *b; 1261 int ierr, i,sum[2],work[2]; 1262 1263 *A = 0; 1264 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1265 PLogObjectCreate(B); 1266 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1267 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1268 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1269 B->destroy = MatDestroy_MPIAIJ; 1270 B->view = MatView_MPIAIJ; 1271 B->factor = 0; 1272 B->assembled = PETSC_FALSE; 1273 1274 b->insertmode = NOT_SET_VALUES; 1275 MPI_Comm_rank(comm,&b->rank); 1276 MPI_Comm_size(comm,&b->size); 1277 1278 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1279 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1280 1281 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1282 work[0] = m; work[1] = n; 1283 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1284 if (M == PETSC_DECIDE) M = sum[0]; 1285 if (N == PETSC_DECIDE) N = sum[1]; 1286 } 1287 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1288 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1289 b->m = m; B->m = m; 1290 b->n = n; B->n = n; 1291 b->N = N; B->N = N; 1292 b->M = M; B->M = M; 1293 1294 /* build local table of row and column ownerships */ 1295 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1296 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1297 b->cowners = b->rowners + b->size + 2; 1298 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1299 b->rowners[0] = 0; 1300 for ( i=2; i<=b->size; i++ ) { 1301 b->rowners[i] += b->rowners[i-1]; 1302 } 1303 b->rstart = b->rowners[b->rank]; 1304 b->rend = b->rowners[b->rank+1]; 1305 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1306 b->cowners[0] = 0; 1307 for ( i=2; i<=b->size; i++ ) { 1308 b->cowners[i] += b->cowners[i-1]; 1309 } 1310 b->cstart = b->cowners[b->rank]; 1311 b->cend = b->cowners[b->rank+1]; 1312 1313 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1314 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1315 PLogObjectParent(B,b->A); 1316 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1317 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1318 PLogObjectParent(B,b->B); 1319 1320 /* build cache for off array entries formed */ 1321 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1322 b->colmap = 0; 1323 b->garray = 0; 1324 b->roworiented = 1; 1325 1326 /* stuff used for matrix vector multiply */ 1327 b->lvec = 0; 1328 b->Mvctx = 0; 1329 1330 /* stuff for MatGetRow() */ 1331 b->rowindices = 0; 1332 b->rowvalues = 0; 1333 b->getrowactive = PETSC_FALSE; 1334 1335 *A = B; 1336 return 0; 1337 } 1338 1339 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1340 { 1341 Mat mat; 1342 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1343 int ierr, len=0, flg; 1344 1345 *newmat = 0; 1346 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1347 PLogObjectCreate(mat); 1348 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1349 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1350 mat->destroy = MatDestroy_MPIAIJ; 1351 mat->view = MatView_MPIAIJ; 1352 mat->factor = matin->factor; 1353 mat->assembled = PETSC_TRUE; 1354 1355 a->m = mat->m = oldmat->m; 1356 a->n = mat->n = oldmat->n; 1357 a->M = mat->M = oldmat->M; 1358 a->N = mat->N = oldmat->N; 1359 1360 a->rstart = oldmat->rstart; 1361 a->rend = oldmat->rend; 1362 a->cstart = oldmat->cstart; 1363 a->cend = oldmat->cend; 1364 a->size = oldmat->size; 1365 a->rank = oldmat->rank; 1366 a->insertmode = NOT_SET_VALUES; 1367 a->rowvalues = 0; 1368 a->getrowactive = PETSC_FALSE; 1369 1370 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1371 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1372 a->cowners = a->rowners + a->size + 2; 1373 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1374 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1375 if (oldmat->colmap) { 1376 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1377 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1378 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1379 } else a->colmap = 0; 1380 if (oldmat->garray) { 1381 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1382 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1383 PLogObjectMemory(mat,len*sizeof(int)); 1384 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1385 } else a->garray = 0; 1386 1387 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1388 PLogObjectParent(mat,a->lvec); 1389 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1390 PLogObjectParent(mat,a->Mvctx); 1391 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1392 PLogObjectParent(mat,a->A); 1393 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1394 PLogObjectParent(mat,a->B); 1395 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1396 if (flg) { 1397 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1398 } 1399 *newmat = mat; 1400 return 0; 1401 } 1402 1403 #include "sys.h" 1404 1405 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1406 { 1407 Mat A; 1408 int i, nz, ierr, j,rstart, rend, fd; 1409 Scalar *vals,*svals; 1410 MPI_Comm comm = ((PetscObject)viewer)->comm; 1411 MPI_Status status; 1412 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1413 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1414 int tag = ((PetscObject)viewer)->tag; 1415 1416 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1417 if (!rank) { 1418 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1419 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1420 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1421 } 1422 1423 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1424 M = header[1]; N = header[2]; 1425 /* determine ownership of all rows */ 1426 m = M/size + ((M % size) > rank); 1427 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1428 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1429 rowners[0] = 0; 1430 for ( i=2; i<=size; i++ ) { 1431 rowners[i] += rowners[i-1]; 1432 } 1433 rstart = rowners[rank]; 1434 rend = rowners[rank+1]; 1435 1436 /* distribute row lengths to all processors */ 1437 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1438 offlens = ourlens + (rend-rstart); 1439 if (!rank) { 1440 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1441 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1442 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1443 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1444 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1445 PetscFree(sndcounts); 1446 } 1447 else { 1448 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1449 } 1450 1451 if (!rank) { 1452 /* calculate the number of nonzeros on each processor */ 1453 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1454 PetscMemzero(procsnz,size*sizeof(int)); 1455 for ( i=0; i<size; i++ ) { 1456 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1457 procsnz[i] += rowlengths[j]; 1458 } 1459 } 1460 PetscFree(rowlengths); 1461 1462 /* determine max buffer needed and allocate it */ 1463 maxnz = 0; 1464 for ( i=0; i<size; i++ ) { 1465 maxnz = PetscMax(maxnz,procsnz[i]); 1466 } 1467 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1468 1469 /* read in my part of the matrix column indices */ 1470 nz = procsnz[0]; 1471 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1472 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1473 1474 /* read in every one elses and ship off */ 1475 for ( i=1; i<size; i++ ) { 1476 nz = procsnz[i]; 1477 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1478 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1479 } 1480 PetscFree(cols); 1481 } 1482 else { 1483 /* determine buffer space needed for message */ 1484 nz = 0; 1485 for ( i=0; i<m; i++ ) { 1486 nz += ourlens[i]; 1487 } 1488 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1489 1490 /* receive message of column indices*/ 1491 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1492 MPI_Get_count(&status,MPI_INT,&maxnz); 1493 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1494 } 1495 1496 /* loop over local rows, determining number of off diagonal entries */ 1497 PetscMemzero(offlens,m*sizeof(int)); 1498 jj = 0; 1499 for ( i=0; i<m; i++ ) { 1500 for ( j=0; j<ourlens[i]; j++ ) { 1501 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1502 jj++; 1503 } 1504 } 1505 1506 /* create our matrix */ 1507 for ( i=0; i<m; i++ ) { 1508 ourlens[i] -= offlens[i]; 1509 } 1510 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1511 A = *newmat; 1512 MatSetOption(A,MAT_COLUMNS_SORTED); 1513 for ( i=0; i<m; i++ ) { 1514 ourlens[i] += offlens[i]; 1515 } 1516 1517 if (!rank) { 1518 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1519 1520 /* read in my part of the matrix numerical values */ 1521 nz = procsnz[0]; 1522 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1523 1524 /* insert into matrix */ 1525 jj = rstart; 1526 smycols = mycols; 1527 svals = vals; 1528 for ( i=0; i<m; i++ ) { 1529 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1530 smycols += ourlens[i]; 1531 svals += ourlens[i]; 1532 jj++; 1533 } 1534 1535 /* read in other processors and ship out */ 1536 for ( i=1; i<size; i++ ) { 1537 nz = procsnz[i]; 1538 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1539 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1540 } 1541 PetscFree(procsnz); 1542 } 1543 else { 1544 /* receive numeric values */ 1545 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1546 1547 /* receive message of values*/ 1548 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1549 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1550 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1551 1552 /* insert into matrix */ 1553 jj = rstart; 1554 smycols = mycols; 1555 svals = vals; 1556 for ( i=0; i<m; i++ ) { 1557 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1558 smycols += ourlens[i]; 1559 svals += ourlens[i]; 1560 jj++; 1561 } 1562 } 1563 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1564 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1567 return 0; 1568 } 1569