1 2 #ifndef lint 3 static char vcid[] = "$Id: mpiaij.c,v 1.170 1996/09/24 20:14:40 bsmith Exp bsmith $"; 4 #endif 5 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 18 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 19 int n = B->n,i; 20 21 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22 PLogObjectMemory(mat,aij->N*sizeof(int)); 23 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 24 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 25 return 0; 26 } 27 28 extern int DisAssemble_MPIAIJ(Mat); 29 30 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 31 PetscTruth *done) 32 { 33 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 34 int ierr; 35 if (aij->size == 1) { 36 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 37 } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 38 return 0; 39 } 40 41 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 42 PetscTruth *done) 43 { 44 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 45 int ierr; 46 if (aij->size == 1) { 47 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 48 } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 49 return 0; 50 } 51 52 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 53 54 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 55 { 56 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 57 Scalar value; 58 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 59 int cstart = aij->cstart, cend = aij->cend,row,col; 60 int roworiented = aij->roworiented; 61 62 #if defined(PETSC_BOPT_g) 63 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 64 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 65 } 66 #endif 67 aij->insertmode = addv; 68 for ( i=0; i<m; i++ ) { 69 #if defined(PETSC_BOPT_g) 70 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 71 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 72 #endif 73 if (im[i] >= rstart && im[i] < rend) { 74 row = im[i] - rstart; 75 for ( j=0; j<n; j++ ) { 76 if (in[j] >= cstart && in[j] < cend){ 77 col = in[j] - cstart; 78 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 79 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 80 } 81 #if defined(PETSC_BOPT_g) 82 else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 83 else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 84 #endif 85 else { 86 if (mat->was_assembled) { 87 if (!aij->colmap) { 88 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 89 } 90 col = aij->colmap[in[j]] - 1; 91 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 92 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 93 col = in[j]; 94 } 95 } 96 else col = in[j]; 97 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 98 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 99 } 100 } 101 } 102 else { 103 if (roworiented) { 104 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 105 } 106 else { 107 row = im[i]; 108 for ( j=0; j<n; j++ ) { 109 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } 113 } 114 return 0; 115 } 116 117 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 118 { 119 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 120 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 121 int cstart = aij->cstart, cend = aij->cend,row,col; 122 123 for ( i=0; i<m; i++ ) { 124 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 125 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 126 if (idxm[i] >= rstart && idxm[i] < rend) { 127 row = idxm[i] - rstart; 128 for ( j=0; j<n; j++ ) { 129 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 130 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 131 if (idxn[j] >= cstart && idxn[j] < cend){ 132 col = idxn[j] - cstart; 133 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 134 } 135 else { 136 if (!aij->colmap) { 137 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 138 } 139 col = aij->colmap[idxn[j]] - 1; 140 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 141 else { 142 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 143 } 144 } 145 } 146 } 147 else { 148 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 149 } 150 } 151 return 0; 152 } 153 154 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 155 { 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 157 MPI_Comm comm = mat->comm; 158 int size = aij->size, *owners = aij->rowners; 159 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 160 MPI_Request *send_waits,*recv_waits; 161 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 162 InsertMode addv; 163 Scalar *rvalues,*svalues; 164 165 /* make sure all processors are either in INSERTMODE or ADDMODE */ 166 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 167 if (addv == (ADD_VALUES|INSERT_VALUES)) { 168 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 169 } 170 aij->insertmode = addv; /* in case this processor had no cache */ 171 172 /* first count number of contributors to each processor */ 173 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 174 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 175 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 176 for ( i=0; i<aij->stash.n; i++ ) { 177 idx = aij->stash.idx[i]; 178 for ( j=0; j<size; j++ ) { 179 if (idx >= owners[j] && idx < owners[j+1]) { 180 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 181 } 182 } 183 } 184 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 185 186 /* inform other processors of number of messages and max length*/ 187 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 188 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 189 nreceives = work[rank]; 190 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 191 nmax = work[rank]; 192 PetscFree(work); 193 194 /* post receives: 195 1) each message will consist of ordered pairs 196 (global index,value) we store the global index as a double 197 to simplify the message passing. 198 2) since we don't know how long each individual message is we 199 allocate the largest needed buffer for each receive. Potentially 200 this is a lot of wasted space. 201 202 203 This could be done better. 204 */ 205 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 206 CHKPTRQ(rvalues); 207 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 208 CHKPTRQ(recv_waits); 209 for ( i=0; i<nreceives; i++ ) { 210 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 211 comm,recv_waits+i); 212 } 213 214 /* do sends: 215 1) starts[i] gives the starting index in svalues for stuff going to 216 the ith processor 217 */ 218 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 219 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 220 CHKPTRQ(send_waits); 221 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 222 starts[0] = 0; 223 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 224 for ( i=0; i<aij->stash.n; i++ ) { 225 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 226 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 227 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 228 } 229 PetscFree(owner); 230 starts[0] = 0; 231 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 232 count = 0; 233 for ( i=0; i<size; i++ ) { 234 if (procs[i]) { 235 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 236 comm,send_waits+count++); 237 } 238 } 239 PetscFree(starts); PetscFree(nprocs); 240 241 /* Free cache space */ 242 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 243 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 244 245 aij->svalues = svalues; aij->rvalues = rvalues; 246 aij->nsends = nsends; aij->nrecvs = nreceives; 247 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 248 aij->rmax = nmax; 249 250 return 0; 251 } 252 extern int MatSetUpMultiply_MPIAIJ(Mat); 253 254 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 255 { 256 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 257 MPI_Status *send_status,recv_status; 258 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 259 int row,col,other_disassembled; 260 Scalar *values,val; 261 InsertMode addv = aij->insertmode; 262 263 /* wait on receives */ 264 while (count) { 265 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 266 /* unpack receives into our local space */ 267 values = aij->rvalues + 3*imdex*aij->rmax; 268 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 269 n = n/3; 270 for ( i=0; i<n; i++ ) { 271 row = (int) PetscReal(values[3*i]) - aij->rstart; 272 col = (int) PetscReal(values[3*i+1]); 273 val = values[3*i+2]; 274 if (col >= aij->cstart && col < aij->cend) { 275 col -= aij->cstart; 276 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 277 } 278 else { 279 if (mat->was_assembled || mat->assembled) { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[col] - 1; 284 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 285 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 286 col = (int) PetscReal(values[3*i+1]); 287 } 288 } 289 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 290 } 291 } 292 count--; 293 } 294 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 295 296 /* wait on sends */ 297 if (aij->nsends) { 298 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 299 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 300 PetscFree(send_status); 301 } 302 PetscFree(aij->send_waits); PetscFree(aij->svalues); 303 304 aij->insertmode = NOT_SET_VALUES; 305 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 306 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 307 308 /* determine if any processor has disassembled, if so we must 309 also disassemble ourselfs, in order that we may reassemble. */ 310 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 311 if (mat->was_assembled && !other_disassembled) { 312 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 313 } 314 315 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 316 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 317 } 318 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 320 321 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 322 return 0; 323 } 324 325 static int MatZeroEntries_MPIAIJ(Mat A) 326 { 327 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 328 int ierr; 329 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 330 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 331 return 0; 332 } 333 334 /* the code does not do the diagonal entries correctly unless the 335 matrix is square and the column and row owerships are identical. 336 This is a BUG. The only way to fix it seems to be to access 337 aij->A and aij->B directly and not through the MatZeroRows() 338 routine. 339 */ 340 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 341 { 342 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 343 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 344 int *procs,*nprocs,j,found,idx,nsends,*work; 345 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 346 int *rvalues,tag = A->tag,count,base,slen,n,*source; 347 int *lens,imdex,*lrows,*values; 348 MPI_Comm comm = A->comm; 349 MPI_Request *send_waits,*recv_waits; 350 MPI_Status recv_status,*send_status; 351 IS istmp; 352 353 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 354 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 355 356 /* first count number of contributors to each processor */ 357 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 358 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 359 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 360 for ( i=0; i<N; i++ ) { 361 idx = rows[i]; 362 found = 0; 363 for ( j=0; j<size; j++ ) { 364 if (idx >= owners[j] && idx < owners[j+1]) { 365 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 366 } 367 } 368 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 369 } 370 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 371 372 /* inform other processors of number of messages and max length*/ 373 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 374 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 375 nrecvs = work[rank]; 376 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 377 nmax = work[rank]; 378 PetscFree(work); 379 380 /* post receives: */ 381 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 382 CHKPTRQ(rvalues); 383 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 384 CHKPTRQ(recv_waits); 385 for ( i=0; i<nrecvs; i++ ) { 386 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 387 } 388 389 /* do sends: 390 1) starts[i] gives the starting index in svalues for stuff going to 391 the ith processor 392 */ 393 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 394 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 395 CHKPTRQ(send_waits); 396 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 397 starts[0] = 0; 398 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 399 for ( i=0; i<N; i++ ) { 400 svalues[starts[owner[i]]++] = rows[i]; 401 } 402 ISRestoreIndices(is,&rows); 403 404 starts[0] = 0; 405 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 406 count = 0; 407 for ( i=0; i<size; i++ ) { 408 if (procs[i]) { 409 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 410 } 411 } 412 PetscFree(starts); 413 414 base = owners[rank]; 415 416 /* wait on receives */ 417 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 418 source = lens + nrecvs; 419 count = nrecvs; slen = 0; 420 while (count) { 421 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 422 /* unpack receives into our local space */ 423 MPI_Get_count(&recv_status,MPI_INT,&n); 424 source[imdex] = recv_status.MPI_SOURCE; 425 lens[imdex] = n; 426 slen += n; 427 count--; 428 } 429 PetscFree(recv_waits); 430 431 /* move the data into the send scatter */ 432 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 433 count = 0; 434 for ( i=0; i<nrecvs; i++ ) { 435 values = rvalues + i*nmax; 436 for ( j=0; j<lens[i]; j++ ) { 437 lrows[count++] = values[j] - base; 438 } 439 } 440 PetscFree(rvalues); PetscFree(lens); 441 PetscFree(owner); PetscFree(nprocs); 442 443 /* actually zap the local rows */ 444 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 445 PLogObjectParent(A,istmp); 446 PetscFree(lrows); 447 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 448 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 449 ierr = ISDestroy(istmp); CHKERRQ(ierr); 450 451 /* wait on sends */ 452 if (nsends) { 453 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 454 CHKPTRQ(send_status); 455 MPI_Waitall(nsends,send_waits,send_status); 456 PetscFree(send_status); 457 } 458 PetscFree(send_waits); PetscFree(svalues); 459 460 return 0; 461 } 462 463 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 464 { 465 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 466 int ierr,nt; 467 468 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 469 if (nt != a->n) { 470 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 471 } 472 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 473 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 474 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 475 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 476 return 0; 477 } 478 479 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 482 int ierr; 483 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 484 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 485 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 486 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 487 return 0; 488 } 489 490 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 491 { 492 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 493 int ierr; 494 495 /* do nondiagonal part */ 496 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 497 /* send it on its way */ 498 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 499 /* do local part */ 500 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 501 /* receive remote parts: note this assumes the values are not actually */ 502 /* inserted in yy until the next line, which is true for my implementation*/ 503 /* but is not perhaps always true. */ 504 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 505 return 0; 506 } 507 508 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 509 { 510 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 513 /* do nondiagonal part */ 514 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 515 /* send it on its way */ 516 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 517 /* do local part */ 518 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 519 /* receive remote parts: note this assumes the values are not actually */ 520 /* inserted in yy until the next line, which is true for my implementation*/ 521 /* but is not perhaps always true. */ 522 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 523 return 0; 524 } 525 526 /* 527 This only works correctly for square matrices where the subblock A->A is the 528 diagonal block 529 */ 530 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 531 { 532 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 533 if (a->M != a->N) 534 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 535 if (a->rstart != a->cstart || a->rend != a->cend) { 536 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 537 return MatGetDiagonal(a->A,v); 538 } 539 540 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 541 { 542 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 543 int ierr; 544 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 545 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 546 return 0; 547 } 548 549 static int MatDestroy_MPIAIJ(PetscObject obj) 550 { 551 Mat mat = (Mat) obj; 552 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 553 int ierr; 554 #if defined(PETSC_LOG) 555 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 556 #endif 557 PetscFree(aij->rowners); 558 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 559 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 560 if (aij->colmap) PetscFree(aij->colmap); 561 if (aij->garray) PetscFree(aij->garray); 562 if (aij->lvec) VecDestroy(aij->lvec); 563 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 564 if (aij->rowvalues) PetscFree(aij->rowvalues); 565 PetscFree(aij); 566 PLogObjectDestroy(mat); 567 PetscHeaderDestroy(mat); 568 return 0; 569 } 570 #include "draw.h" 571 #include "pinclude/pviewer.h" 572 573 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 576 int ierr; 577 578 if (aij->size == 1) { 579 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 580 } 581 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 582 return 0; 583 } 584 585 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 588 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 589 int ierr, format,shift = C->indexshift,rank; 590 FILE *fd; 591 ViewerType vtype; 592 593 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 594 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 595 ierr = ViewerGetFormat(viewer,&format); 596 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 597 MatInfo info; 598 int flg; 599 MPI_Comm_rank(mat->comm,&rank); 600 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 601 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 602 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 603 PetscSequentialPhaseBegin(mat->comm,1); 604 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 605 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 606 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 607 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 608 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 609 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 610 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 611 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 612 fflush(fd); 613 PetscSequentialPhaseEnd(mat->comm,1); 614 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 615 return 0; 616 } 617 else if (format == VIEWER_FORMAT_ASCII_INFO) { 618 return 0; 619 } 620 } 621 622 if (vtype == DRAW_VIEWER) { 623 Draw draw; 624 PetscTruth isnull; 625 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 626 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 627 } 628 629 if (vtype == ASCII_FILE_VIEWER) { 630 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 631 PetscSequentialPhaseBegin(mat->comm,1); 632 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 633 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 634 aij->cend); 635 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 636 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 637 fflush(fd); 638 PetscSequentialPhaseEnd(mat->comm,1); 639 } 640 else { 641 int size = aij->size; 642 rank = aij->rank; 643 if (size == 1) { 644 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 645 } 646 else { 647 /* assemble the entire matrix onto first processor. */ 648 Mat A; 649 Mat_SeqAIJ *Aloc; 650 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 651 Scalar *a; 652 653 if (!rank) { 654 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 655 CHKERRQ(ierr); 656 } 657 else { 658 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 659 CHKERRQ(ierr); 660 } 661 PLogObjectParent(mat,A); 662 663 /* copy over the A part */ 664 Aloc = (Mat_SeqAIJ*) aij->A->data; 665 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 666 row = aij->rstart; 667 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 668 for ( i=0; i<m; i++ ) { 669 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 670 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 671 } 672 aj = Aloc->j; 673 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 674 675 /* copy over the B part */ 676 Aloc = (Mat_SeqAIJ*) aij->B->data; 677 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 678 row = aij->rstart; 679 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 680 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 681 for ( i=0; i<m; i++ ) { 682 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 683 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 684 } 685 PetscFree(ct); 686 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 688 if (!rank) { 689 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 690 } 691 ierr = MatDestroy(A); CHKERRQ(ierr); 692 } 693 } 694 return 0; 695 } 696 697 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 698 { 699 Mat mat = (Mat) obj; 700 int ierr; 701 ViewerType vtype; 702 703 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 704 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 705 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 706 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 707 } 708 else if (vtype == BINARY_FILE_VIEWER) { 709 return MatView_MPIAIJ_Binary(mat,viewer); 710 } 711 return 0; 712 } 713 714 /* 715 This has to provide several versions. 716 717 2) a) use only local smoothing updating outer values only once. 718 b) local smoothing updating outer values each inner iteration 719 3) color updating out values betwen colors. 720 */ 721 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 722 double fshift,int its,Vec xx) 723 { 724 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 725 Mat AA = mat->A, BB = mat->B; 726 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 727 Scalar *b,*x,*xs,*ls,d,*v,sum; 728 int ierr,*idx, *diag; 729 int n = mat->n, m = mat->m, i,shift = A->indexshift; 730 731 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 732 xs = x + shift; /* shift by one for index start of 1 */ 733 ls = ls + shift; 734 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 735 diag = A->diag; 736 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 737 if (flag & SOR_ZERO_INITIAL_GUESS) { 738 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 739 } 740 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 741 CHKERRQ(ierr); 742 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 743 CHKERRQ(ierr); 744 while (its--) { 745 /* go down through the rows */ 746 for ( i=0; i<m; i++ ) { 747 n = A->i[i+1] - A->i[i]; 748 idx = A->j + A->i[i] + shift; 749 v = A->a + A->i[i] + shift; 750 sum = b[i]; 751 SPARSEDENSEMDOT(sum,xs,v,idx,n); 752 d = fshift + A->a[diag[i]+shift]; 753 n = B->i[i+1] - B->i[i]; 754 idx = B->j + B->i[i] + shift; 755 v = B->a + B->i[i] + shift; 756 SPARSEDENSEMDOT(sum,ls,v,idx,n); 757 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 758 } 759 /* come up through the rows */ 760 for ( i=m-1; i>-1; i-- ) { 761 n = A->i[i+1] - A->i[i]; 762 idx = A->j + A->i[i] + shift; 763 v = A->a + A->i[i] + shift; 764 sum = b[i]; 765 SPARSEDENSEMDOT(sum,xs,v,idx,n); 766 d = fshift + A->a[diag[i]+shift]; 767 n = B->i[i+1] - B->i[i]; 768 idx = B->j + B->i[i] + shift; 769 v = B->a + B->i[i] + shift; 770 SPARSEDENSEMDOT(sum,ls,v,idx,n); 771 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 772 } 773 } 774 } 775 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 776 if (flag & SOR_ZERO_INITIAL_GUESS) { 777 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 778 } 779 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 780 CHKERRQ(ierr); 781 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 782 CHKERRQ(ierr); 783 while (its--) { 784 for ( i=0; i<m; i++ ) { 785 n = A->i[i+1] - A->i[i]; 786 idx = A->j + A->i[i] + shift; 787 v = A->a + A->i[i] + shift; 788 sum = b[i]; 789 SPARSEDENSEMDOT(sum,xs,v,idx,n); 790 d = fshift + A->a[diag[i]+shift]; 791 n = B->i[i+1] - B->i[i]; 792 idx = B->j + B->i[i] + shift; 793 v = B->a + B->i[i] + shift; 794 SPARSEDENSEMDOT(sum,ls,v,idx,n); 795 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 796 } 797 } 798 } 799 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 800 if (flag & SOR_ZERO_INITIAL_GUESS) { 801 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 802 } 803 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 804 mat->Mvctx); CHKERRQ(ierr); 805 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 806 mat->Mvctx); CHKERRQ(ierr); 807 while (its--) { 808 for ( i=m-1; i>-1; i-- ) { 809 n = A->i[i+1] - A->i[i]; 810 idx = A->j + A->i[i] + shift; 811 v = A->a + A->i[i] + shift; 812 sum = b[i]; 813 SPARSEDENSEMDOT(sum,xs,v,idx,n); 814 d = fshift + A->a[diag[i]+shift]; 815 n = B->i[i+1] - B->i[i]; 816 idx = B->j + B->i[i] + shift; 817 v = B->a + B->i[i] + shift; 818 SPARSEDENSEMDOT(sum,ls,v,idx,n); 819 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 820 } 821 } 822 } 823 else { 824 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 825 } 826 return 0; 827 } 828 829 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 830 { 831 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 832 Mat A = mat->A, B = mat->B; 833 int ierr; 834 double isend[5], irecv[5]; 835 836 info->rows_global = (double)mat->M; 837 info->columns_global = (double)mat->N; 838 info->rows_local = (double)mat->m; 839 info->columns_local = (double)mat->N; 840 info->block_size = 1.0; 841 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 842 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 843 isend[3] = info->memory; isend[4] = info->mallocs; 844 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 845 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 846 isend[3] += info->memory; isend[4] += info->mallocs; 847 if (flag == MAT_LOCAL) { 848 info->nz_used = isend[0]; 849 info->nz_allocated = isend[1]; 850 info->nz_unneeded = isend[2]; 851 info->memory = isend[3]; 852 info->mallocs = isend[4]; 853 } else if (flag == MAT_GLOBAL_MAX) { 854 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 855 info->nz_used = irecv[0]; 856 info->nz_allocated = irecv[1]; 857 info->nz_unneeded = irecv[2]; 858 info->memory = irecv[3]; 859 info->mallocs = irecv[4]; 860 } else if (flag == MAT_GLOBAL_SUM) { 861 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 862 info->nz_used = irecv[0]; 863 info->nz_allocated = irecv[1]; 864 info->nz_unneeded = irecv[2]; 865 info->memory = irecv[3]; 866 info->mallocs = irecv[4]; 867 } 868 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 869 info->fill_ratio_needed = 0; 870 info->factor_mallocs = 0; 871 872 return 0; 873 } 874 875 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 876 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 877 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 878 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 879 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 880 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 881 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 882 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 883 884 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 885 { 886 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 887 888 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 889 op == MAT_YES_NEW_NONZERO_LOCATIONS || 890 op == MAT_COLUMNS_SORTED || 891 op == MAT_ROW_ORIENTED) { 892 MatSetOption(a->A,op); 893 MatSetOption(a->B,op); 894 } 895 else if (op == MAT_ROWS_SORTED || 896 op == MAT_SYMMETRIC || 897 op == MAT_STRUCTURALLY_SYMMETRIC || 898 op == MAT_YES_NEW_DIAGONALS) 899 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 900 else if (op == MAT_COLUMN_ORIENTED) { 901 a->roworiented = 0; 902 MatSetOption(a->A,op); 903 MatSetOption(a->B,op); 904 } 905 else if (op == MAT_NO_NEW_DIAGONALS) 906 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 907 else 908 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 909 return 0; 910 } 911 912 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 913 { 914 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 915 *m = mat->M; *n = mat->N; 916 return 0; 917 } 918 919 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 920 { 921 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 922 *m = mat->m; *n = mat->N; 923 return 0; 924 } 925 926 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 927 { 928 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 929 *m = mat->rstart; *n = mat->rend; 930 return 0; 931 } 932 933 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 934 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 935 936 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 937 { 938 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 939 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 940 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 941 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 942 int *cmap, *idx_p; 943 944 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 945 mat->getrowactive = PETSC_TRUE; 946 947 if (!mat->rowvalues && (idx || v)) { 948 /* 949 allocate enough space to hold information from the longest row. 950 */ 951 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 952 int max = 1,m = mat->m,tmp; 953 for ( i=0; i<m; i++ ) { 954 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 955 if (max < tmp) { max = tmp; } 956 } 957 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 958 CHKPTRQ(mat->rowvalues); 959 mat->rowindices = (int *) (mat->rowvalues + max); 960 } 961 962 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 963 lrow = row - rstart; 964 965 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 966 if (!v) {pvA = 0; pvB = 0;} 967 if (!idx) {pcA = 0; if (!v) pcB = 0;} 968 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 969 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 970 nztot = nzA + nzB; 971 972 cmap = mat->garray; 973 if (v || idx) { 974 if (nztot) { 975 /* Sort by increasing column numbers, assuming A and B already sorted */ 976 int imark = -1; 977 if (v) { 978 *v = v_p = mat->rowvalues; 979 for ( i=0; i<nzB; i++ ) { 980 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 981 else break; 982 } 983 imark = i; 984 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 985 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 986 } 987 if (idx) { 988 *idx = idx_p = mat->rowindices; 989 if (imark > -1) { 990 for ( i=0; i<imark; i++ ) { 991 idx_p[i] = cmap[cworkB[i]]; 992 } 993 } else { 994 for ( i=0; i<nzB; i++ ) { 995 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 996 else break; 997 } 998 imark = i; 999 } 1000 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1001 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1002 } 1003 } 1004 else { 1005 if (idx) *idx = 0; 1006 if (v) *v = 0; 1007 } 1008 } 1009 *nz = nztot; 1010 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1011 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1012 return 0; 1013 } 1014 1015 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1016 { 1017 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1018 if (aij->getrowactive == PETSC_FALSE) { 1019 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1020 } 1021 aij->getrowactive = PETSC_FALSE; 1022 return 0; 1023 } 1024 1025 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1026 { 1027 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1028 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1029 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1030 double sum = 0.0; 1031 Scalar *v; 1032 1033 if (aij->size == 1) { 1034 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1035 } else { 1036 if (type == NORM_FROBENIUS) { 1037 v = amat->a; 1038 for (i=0; i<amat->nz; i++ ) { 1039 #if defined(PETSC_COMPLEX) 1040 sum += real(conj(*v)*(*v)); v++; 1041 #else 1042 sum += (*v)*(*v); v++; 1043 #endif 1044 } 1045 v = bmat->a; 1046 for (i=0; i<bmat->nz; i++ ) { 1047 #if defined(PETSC_COMPLEX) 1048 sum += real(conj(*v)*(*v)); v++; 1049 #else 1050 sum += (*v)*(*v); v++; 1051 #endif 1052 } 1053 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1054 *norm = sqrt(*norm); 1055 } 1056 else if (type == NORM_1) { /* max column norm */ 1057 double *tmp, *tmp2; 1058 int *jj, *garray = aij->garray; 1059 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1060 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1061 PetscMemzero(tmp,aij->N*sizeof(double)); 1062 *norm = 0.0; 1063 v = amat->a; jj = amat->j; 1064 for ( j=0; j<amat->nz; j++ ) { 1065 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1066 } 1067 v = bmat->a; jj = bmat->j; 1068 for ( j=0; j<bmat->nz; j++ ) { 1069 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1070 } 1071 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1072 for ( j=0; j<aij->N; j++ ) { 1073 if (tmp2[j] > *norm) *norm = tmp2[j]; 1074 } 1075 PetscFree(tmp); PetscFree(tmp2); 1076 } 1077 else if (type == NORM_INFINITY) { /* max row norm */ 1078 double ntemp = 0.0; 1079 for ( j=0; j<amat->m; j++ ) { 1080 v = amat->a + amat->i[j] + shift; 1081 sum = 0.0; 1082 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1083 sum += PetscAbsScalar(*v); v++; 1084 } 1085 v = bmat->a + bmat->i[j] + shift; 1086 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1087 sum += PetscAbsScalar(*v); v++; 1088 } 1089 if (sum > ntemp) ntemp = sum; 1090 } 1091 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1092 } 1093 else { 1094 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1095 } 1096 } 1097 return 0; 1098 } 1099 1100 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1101 { 1102 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1103 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1104 int ierr,shift = Aloc->indexshift; 1105 Mat B; 1106 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1107 Scalar *array; 1108 1109 if (matout == PETSC_NULL && M != N) 1110 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1111 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1112 PETSC_NULL,&B); CHKERRQ(ierr); 1113 1114 /* copy over the A part */ 1115 Aloc = (Mat_SeqAIJ*) a->A->data; 1116 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1117 row = a->rstart; 1118 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1119 for ( i=0; i<m; i++ ) { 1120 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1121 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1122 } 1123 aj = Aloc->j; 1124 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1125 1126 /* copy over the B part */ 1127 Aloc = (Mat_SeqAIJ*) a->B->data; 1128 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1129 row = a->rstart; 1130 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1131 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1132 for ( i=0; i<m; i++ ) { 1133 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1134 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1135 } 1136 PetscFree(ct); 1137 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1138 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1139 if (matout != PETSC_NULL) { 1140 *matout = B; 1141 } else { 1142 /* This isn't really an in-place transpose .... but free data structures from a */ 1143 PetscFree(a->rowners); 1144 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1145 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1146 if (a->colmap) PetscFree(a->colmap); 1147 if (a->garray) PetscFree(a->garray); 1148 if (a->lvec) VecDestroy(a->lvec); 1149 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1150 PetscFree(a); 1151 PetscMemcpy(A,B,sizeof(struct _Mat)); 1152 PetscHeaderDestroy(B); 1153 } 1154 return 0; 1155 } 1156 1157 int MatDiagonalScale_MPIAIJ(Mat A,Vec ll,Vec rr) 1158 { 1159 Mat a = ((Mat_MPIAIJ *) A->data)->A; 1160 Mat b = ((Mat_MPIAIJ *) A->data)->B; 1161 int ierr,s1,s2,s3; 1162 1163 if (ll) { 1164 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1165 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1166 if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIAIJ:non-conforming local sizes"); 1167 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1168 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1169 } 1170 if (rr) SETERRQ(1,"MatDiagonalScale_MPIAIJ:not supported for right vector"); 1171 return 0; 1172 } 1173 1174 1175 extern int MatPrintHelp_SeqAIJ(Mat); 1176 static int MatPrintHelp_MPIAIJ(Mat A) 1177 { 1178 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1179 1180 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1181 else return 0; 1182 } 1183 1184 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1185 { 1186 *bs = 1; 1187 return 0; 1188 } 1189 1190 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1191 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1192 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1193 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1194 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1195 /* -------------------------------------------------------------------*/ 1196 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1197 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1198 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1199 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1200 0,0, 1201 0,0, 1202 0,0, 1203 MatRelax_MPIAIJ, 1204 MatTranspose_MPIAIJ, 1205 MatGetInfo_MPIAIJ,0, 1206 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1207 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1208 0, 1209 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1210 0,0,0,0, 1211 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1212 0,0, 1213 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1214 0,0,0, 1215 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1216 MatPrintHelp_MPIAIJ, 1217 MatScale_MPIAIJ,0,0,0, 1218 MatGetBlockSize_MPIAIJ,0,0,0,0, 1219 MatFDColoringCreate_MPIAIJ}; 1220 1221 1222 /*@C 1223 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1224 (the default parallel PETSc format). For good matrix assembly performance 1225 the user should preallocate the matrix storage by setting the parameters 1226 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1227 performance can be increased by more than a factor of 50. 1228 1229 Input Parameters: 1230 . comm - MPI communicator 1231 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1232 This value should be the same as the local size used in creating the 1233 y vector for the matrix-vector product y = Ax. 1234 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1235 This value should be the same as the local size used in creating the 1236 x vector for the matrix-vector product y = Ax. 1237 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1238 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1239 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1240 (same for all local rows) 1241 . d_nzz - array containing the number of nonzeros in the various rows of the 1242 diagonal portion of the local submatrix (possibly different for each row) 1243 or PETSC_NULL. You must leave room for the diagonal entry even if 1244 it is zero. 1245 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1246 submatrix (same for all local rows). 1247 . o_nzz - array containing the number of nonzeros in the various rows of the 1248 off-diagonal portion of the local submatrix (possibly different for 1249 each row) or PETSC_NULL. 1250 1251 Output Parameter: 1252 . A - the matrix 1253 1254 Notes: 1255 The AIJ format (also called the Yale sparse matrix format or 1256 compressed row storage), is fully compatible with standard Fortran 77 1257 storage. That is, the stored row and column indices can begin at 1258 either one (as in Fortran) or zero. See the users manual for details. 1259 1260 The user MUST specify either the local or global matrix dimensions 1261 (possibly both). 1262 1263 By default, this format uses inodes (identical nodes) when possible. 1264 We search for consecutive rows with the same nonzero structure, thereby 1265 reusing matrix information to achieve increased efficiency. 1266 1267 Options Database Keys: 1268 $ -mat_aij_no_inode - Do not use inodes 1269 $ -mat_aij_inode_limit <limit> - Set inode limit. 1270 $ (max limit=5) 1271 $ -mat_aij_oneindex - Internally use indexing starting at 1 1272 $ rather than 0. Note: When calling MatSetValues(), 1273 $ the user still MUST index entries starting at 0! 1274 1275 Storage Information: 1276 For a square global matrix we define each processor's diagonal portion 1277 to be its local rows and the corresponding columns (a square submatrix); 1278 each processor's off-diagonal portion encompasses the remainder of the 1279 local matrix (a rectangular submatrix). 1280 1281 The user can specify preallocated storage for the diagonal part of 1282 the local submatrix with either d_nz or d_nnz (not both). Set 1283 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1284 memory allocation. Likewise, specify preallocated storage for the 1285 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1286 1287 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1288 the figure below we depict these three local rows and all columns (0-11). 1289 1290 $ 0 1 2 3 4 5 6 7 8 9 10 11 1291 $ ------------------- 1292 $ row 3 | o o o d d d o o o o o o 1293 $ row 4 | o o o d d d o o o o o o 1294 $ row 5 | o o o d d d o o o o o o 1295 $ ------------------- 1296 $ 1297 1298 Thus, any entries in the d locations are stored in the d (diagonal) 1299 submatrix, and any entries in the o locations are stored in the 1300 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1301 stored simply in the MATSEQAIJ format for compressed row storage. 1302 1303 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1304 and o_nz should indicate the number of nonzeros per row in the o matrix. 1305 In general, for PDE problems in which most nonzeros are near the diagonal, 1306 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1307 or you will get TERRIBLE performance; see the users' manual chapter on 1308 matrices and the file $(PETSC_DIR)/Performance. 1309 1310 .keywords: matrix, aij, compressed row, sparse, parallel 1311 1312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1313 @*/ 1314 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1315 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1316 { 1317 Mat B; 1318 Mat_MPIAIJ *b; 1319 int ierr, i,sum[2],work[2],size; 1320 1321 *A = 0; 1322 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1323 PLogObjectCreate(B); 1324 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1325 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1326 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1327 MPI_Comm_size(comm,&size); 1328 if (size == 1) { 1329 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1330 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1331 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1332 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1333 B->ops.lufactor = MatLUFactor_MPIAIJ; 1334 B->ops.solve = MatSolve_MPIAIJ; 1335 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1336 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1337 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1338 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1339 } 1340 B->destroy = MatDestroy_MPIAIJ; 1341 B->view = MatView_MPIAIJ; 1342 B->factor = 0; 1343 B->assembled = PETSC_FALSE; 1344 1345 b->insertmode = NOT_SET_VALUES; 1346 MPI_Comm_rank(comm,&b->rank); 1347 MPI_Comm_size(comm,&b->size); 1348 1349 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1350 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1351 1352 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1353 work[0] = m; work[1] = n; 1354 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1355 if (M == PETSC_DECIDE) M = sum[0]; 1356 if (N == PETSC_DECIDE) N = sum[1]; 1357 } 1358 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1359 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1360 b->m = m; B->m = m; 1361 b->n = n; B->n = n; 1362 b->N = N; B->N = N; 1363 b->M = M; B->M = M; 1364 1365 /* build local table of row and column ownerships */ 1366 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1367 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1368 b->cowners = b->rowners + b->size + 2; 1369 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1370 b->rowners[0] = 0; 1371 for ( i=2; i<=b->size; i++ ) { 1372 b->rowners[i] += b->rowners[i-1]; 1373 } 1374 b->rstart = b->rowners[b->rank]; 1375 b->rend = b->rowners[b->rank+1]; 1376 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1377 b->cowners[0] = 0; 1378 for ( i=2; i<=b->size; i++ ) { 1379 b->cowners[i] += b->cowners[i-1]; 1380 } 1381 b->cstart = b->cowners[b->rank]; 1382 b->cend = b->cowners[b->rank+1]; 1383 1384 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1385 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1386 PLogObjectParent(B,b->A); 1387 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1388 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1389 PLogObjectParent(B,b->B); 1390 1391 /* build cache for off array entries formed */ 1392 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1393 b->colmap = 0; 1394 b->garray = 0; 1395 b->roworiented = 1; 1396 1397 /* stuff used for matrix vector multiply */ 1398 b->lvec = 0; 1399 b->Mvctx = 0; 1400 1401 /* stuff for MatGetRow() */ 1402 b->rowindices = 0; 1403 b->rowvalues = 0; 1404 b->getrowactive = PETSC_FALSE; 1405 1406 *A = B; 1407 return 0; 1408 } 1409 1410 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1411 { 1412 Mat mat; 1413 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1414 int ierr, len=0, flg; 1415 1416 *newmat = 0; 1417 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1418 PLogObjectCreate(mat); 1419 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1420 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1421 mat->destroy = MatDestroy_MPIAIJ; 1422 mat->view = MatView_MPIAIJ; 1423 mat->factor = matin->factor; 1424 mat->assembled = PETSC_TRUE; 1425 1426 a->m = mat->m = oldmat->m; 1427 a->n = mat->n = oldmat->n; 1428 a->M = mat->M = oldmat->M; 1429 a->N = mat->N = oldmat->N; 1430 1431 a->rstart = oldmat->rstart; 1432 a->rend = oldmat->rend; 1433 a->cstart = oldmat->cstart; 1434 a->cend = oldmat->cend; 1435 a->size = oldmat->size; 1436 a->rank = oldmat->rank; 1437 a->insertmode = NOT_SET_VALUES; 1438 a->rowvalues = 0; 1439 a->getrowactive = PETSC_FALSE; 1440 1441 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1442 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1443 a->cowners = a->rowners + a->size + 2; 1444 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1445 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1446 if (oldmat->colmap) { 1447 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1448 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1449 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1450 } else a->colmap = 0; 1451 if (oldmat->garray) { 1452 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1453 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1454 PLogObjectMemory(mat,len*sizeof(int)); 1455 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1456 } else a->garray = 0; 1457 1458 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1459 PLogObjectParent(mat,a->lvec); 1460 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1461 PLogObjectParent(mat,a->Mvctx); 1462 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1463 PLogObjectParent(mat,a->A); 1464 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1465 PLogObjectParent(mat,a->B); 1466 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1467 if (flg) { 1468 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1469 } 1470 *newmat = mat; 1471 return 0; 1472 } 1473 1474 #include "sys.h" 1475 1476 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1477 { 1478 Mat A; 1479 int i, nz, ierr, j,rstart, rend, fd; 1480 Scalar *vals,*svals; 1481 MPI_Comm comm = ((PetscObject)viewer)->comm; 1482 MPI_Status status; 1483 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1484 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1485 int tag = ((PetscObject)viewer)->tag; 1486 1487 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1488 if (!rank) { 1489 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1490 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1491 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1492 } 1493 1494 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1495 M = header[1]; N = header[2]; 1496 /* determine ownership of all rows */ 1497 m = M/size + ((M % size) > rank); 1498 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1499 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1500 rowners[0] = 0; 1501 for ( i=2; i<=size; i++ ) { 1502 rowners[i] += rowners[i-1]; 1503 } 1504 rstart = rowners[rank]; 1505 rend = rowners[rank+1]; 1506 1507 /* distribute row lengths to all processors */ 1508 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1509 offlens = ourlens + (rend-rstart); 1510 if (!rank) { 1511 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1512 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1513 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1514 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1515 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1516 PetscFree(sndcounts); 1517 } 1518 else { 1519 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1520 } 1521 1522 if (!rank) { 1523 /* calculate the number of nonzeros on each processor */ 1524 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1525 PetscMemzero(procsnz,size*sizeof(int)); 1526 for ( i=0; i<size; i++ ) { 1527 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1528 procsnz[i] += rowlengths[j]; 1529 } 1530 } 1531 PetscFree(rowlengths); 1532 1533 /* determine max buffer needed and allocate it */ 1534 maxnz = 0; 1535 for ( i=0; i<size; i++ ) { 1536 maxnz = PetscMax(maxnz,procsnz[i]); 1537 } 1538 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1539 1540 /* read in my part of the matrix column indices */ 1541 nz = procsnz[0]; 1542 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1543 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1544 1545 /* read in every one elses and ship off */ 1546 for ( i=1; i<size; i++ ) { 1547 nz = procsnz[i]; 1548 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1549 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1550 } 1551 PetscFree(cols); 1552 } 1553 else { 1554 /* determine buffer space needed for message */ 1555 nz = 0; 1556 for ( i=0; i<m; i++ ) { 1557 nz += ourlens[i]; 1558 } 1559 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1560 1561 /* receive message of column indices*/ 1562 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1563 MPI_Get_count(&status,MPI_INT,&maxnz); 1564 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1565 } 1566 1567 /* loop over local rows, determining number of off diagonal entries */ 1568 PetscMemzero(offlens,m*sizeof(int)); 1569 jj = 0; 1570 for ( i=0; i<m; i++ ) { 1571 for ( j=0; j<ourlens[i]; j++ ) { 1572 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1573 jj++; 1574 } 1575 } 1576 1577 /* create our matrix */ 1578 for ( i=0; i<m; i++ ) { 1579 ourlens[i] -= offlens[i]; 1580 } 1581 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1582 A = *newmat; 1583 MatSetOption(A,MAT_COLUMNS_SORTED); 1584 for ( i=0; i<m; i++ ) { 1585 ourlens[i] += offlens[i]; 1586 } 1587 1588 if (!rank) { 1589 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1590 1591 /* read in my part of the matrix numerical values */ 1592 nz = procsnz[0]; 1593 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1594 1595 /* insert into matrix */ 1596 jj = rstart; 1597 smycols = mycols; 1598 svals = vals; 1599 for ( i=0; i<m; i++ ) { 1600 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1601 smycols += ourlens[i]; 1602 svals += ourlens[i]; 1603 jj++; 1604 } 1605 1606 /* read in other processors and ship out */ 1607 for ( i=1; i<size; i++ ) { 1608 nz = procsnz[i]; 1609 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1610 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1611 } 1612 PetscFree(procsnz); 1613 } 1614 else { 1615 /* receive numeric values */ 1616 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1617 1618 /* receive message of values*/ 1619 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1620 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1621 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1622 1623 /* insert into matrix */ 1624 jj = rstart; 1625 smycols = mycols; 1626 svals = vals; 1627 for ( i=0; i<m; i++ ) { 1628 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1629 smycols += ourlens[i]; 1630 svals += ourlens[i]; 1631 jj++; 1632 } 1633 } 1634 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1635 1636 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1638 return 0; 1639 } 1640