1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.180 1996/12/18 18:47:24 curfman Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNCTION__ 15 #define __FUNCTION__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23 PLogObjectMemory(mat,aij->N*sizeof(int)); 24 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 26 return 0; 27 } 28 29 extern int DisAssemble_MPIAIJ(Mat); 30 31 #undef __FUNCTION__ 32 #define __FUNCTION__ "MatGetRowIJ_MPIAIJ" 33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 34 PetscTruth *done) 35 { 36 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37 int ierr; 38 if (aij->size == 1) { 39 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40 } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 41 return 0; 42 } 43 44 #undef __FUNCTION__ 45 #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ" 46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 47 PetscTruth *done) 48 { 49 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 50 int ierr; 51 if (aij->size == 1) { 52 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53 } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 54 return 0; 55 } 56 57 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 58 59 #undef __FUNCTION__ 60 #define __FUNCTION__ "MatSetValues_MPIAIJ" 61 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 62 { 63 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 64 Scalar value; 65 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 66 int cstart = aij->cstart, cend = aij->cend,row,col; 67 int roworiented = aij->roworiented; 68 69 #if defined(PETSC_BOPT_g) 70 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 71 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 72 } 73 #endif 74 aij->insertmode = addv; 75 for ( i=0; i<m; i++ ) { 76 #if defined(PETSC_BOPT_g) 77 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 78 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 79 #endif 80 if (im[i] >= rstart && im[i] < rend) { 81 row = im[i] - rstart; 82 for ( j=0; j<n; j++ ) { 83 if (in[j] >= cstart && in[j] < cend){ 84 col = in[j] - cstart; 85 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 86 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 87 } 88 #if defined(PETSC_BOPT_g) 89 else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 90 else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 91 #endif 92 else { 93 if (mat->was_assembled) { 94 if (!aij->colmap) { 95 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 96 } 97 col = aij->colmap[in[j]] - 1; 98 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 99 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 100 col = in[j]; 101 } 102 } 103 else col = in[j]; 104 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 105 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 106 } 107 } 108 } 109 else { 110 if (roworiented && !aij->donotstash) { 111 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 112 } 113 else { 114 if (!aij->donotstash) { 115 row = im[i]; 116 for ( j=0; j<n; j++ ) { 117 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 118 } 119 } 120 } 121 } 122 } 123 return 0; 124 } 125 126 #undef __FUNCTION__ 127 #define __FUNCTION__ "MatGetValues_MPIAIJ" 128 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 132 int cstart = aij->cstart, cend = aij->cend,row,col; 133 134 for ( i=0; i<m; i++ ) { 135 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 136 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 137 if (idxm[i] >= rstart && idxm[i] < rend) { 138 row = idxm[i] - rstart; 139 for ( j=0; j<n; j++ ) { 140 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 141 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 142 if (idxn[j] >= cstart && idxn[j] < cend){ 143 col = idxn[j] - cstart; 144 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 145 } 146 else { 147 if (!aij->colmap) { 148 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 149 } 150 col = aij->colmap[idxn[j]] - 1; 151 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 152 else { 153 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 154 } 155 } 156 } 157 } 158 else { 159 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 160 } 161 } 162 return 0; 163 } 164 165 #undef __FUNCTION__ 166 #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ" 167 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 168 { 169 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 170 MPI_Comm comm = mat->comm; 171 int size = aij->size, *owners = aij->rowners; 172 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 173 MPI_Request *send_waits,*recv_waits; 174 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 175 InsertMode addv; 176 Scalar *rvalues,*svalues; 177 178 /* make sure all processors are either in INSERTMODE or ADDMODE */ 179 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 180 if (addv == (ADD_VALUES|INSERT_VALUES)) { 181 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 182 } 183 aij->insertmode = addv; /* in case this processor had no cache */ 184 185 /* first count number of contributors to each processor */ 186 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 187 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 188 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 189 for ( i=0; i<aij->stash.n; i++ ) { 190 idx = aij->stash.idx[i]; 191 for ( j=0; j<size; j++ ) { 192 if (idx >= owners[j] && idx < owners[j+1]) { 193 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 194 } 195 } 196 } 197 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 198 199 /* inform other processors of number of messages and max length*/ 200 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 201 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 202 nreceives = work[rank]; 203 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 204 nmax = work[rank]; 205 PetscFree(work); 206 207 /* post receives: 208 1) each message will consist of ordered pairs 209 (global index,value) we store the global index as a double 210 to simplify the message passing. 211 2) since we don't know how long each individual message is we 212 allocate the largest needed buffer for each receive. Potentially 213 this is a lot of wasted space. 214 215 216 This could be done better. 217 */ 218 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 219 CHKPTRQ(rvalues); 220 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 221 CHKPTRQ(recv_waits); 222 for ( i=0; i<nreceives; i++ ) { 223 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 224 comm,recv_waits+i); 225 } 226 227 /* do sends: 228 1) starts[i] gives the starting index in svalues for stuff going to 229 the ith processor 230 */ 231 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 232 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 233 CHKPTRQ(send_waits); 234 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 235 starts[0] = 0; 236 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 237 for ( i=0; i<aij->stash.n; i++ ) { 238 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 239 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 240 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 241 } 242 PetscFree(owner); 243 starts[0] = 0; 244 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 245 count = 0; 246 for ( i=0; i<size; i++ ) { 247 if (procs[i]) { 248 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 249 comm,send_waits+count++); 250 } 251 } 252 PetscFree(starts); PetscFree(nprocs); 253 254 /* Free cache space */ 255 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 256 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 257 258 aij->svalues = svalues; aij->rvalues = rvalues; 259 aij->nsends = nsends; aij->nrecvs = nreceives; 260 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 261 aij->rmax = nmax; 262 263 return 0; 264 } 265 extern int MatSetUpMultiply_MPIAIJ(Mat); 266 267 #undef __FUNCTION__ 268 #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ" 269 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 272 MPI_Status *send_status,recv_status; 273 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 274 int row,col,other_disassembled; 275 Scalar *values,val; 276 InsertMode addv = aij->insertmode; 277 278 /* wait on receives */ 279 while (count) { 280 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 281 /* unpack receives into our local space */ 282 values = aij->rvalues + 3*imdex*aij->rmax; 283 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 284 n = n/3; 285 for ( i=0; i<n; i++ ) { 286 row = (int) PetscReal(values[3*i]) - aij->rstart; 287 col = (int) PetscReal(values[3*i+1]); 288 val = values[3*i+2]; 289 if (col >= aij->cstart && col < aij->cend) { 290 col -= aij->cstart; 291 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 292 } 293 else { 294 if (mat->was_assembled || mat->assembled) { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 col = aij->colmap[col] - 1; 299 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 300 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 301 col = (int) PetscReal(values[3*i+1]); 302 } 303 } 304 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 305 } 306 } 307 count--; 308 } 309 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 310 311 /* wait on sends */ 312 if (aij->nsends) { 313 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 314 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 315 PetscFree(send_status); 316 } 317 PetscFree(aij->send_waits); PetscFree(aij->svalues); 318 319 aij->insertmode = NOT_SET_VALUES; 320 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 322 323 /* determine if any processor has disassembled, if so we must 324 also disassemble ourselfs, in order that we may reassemble. */ 325 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 326 if (mat->was_assembled && !other_disassembled) { 327 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 328 } 329 330 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 331 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 332 } 333 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 335 336 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 337 return 0; 338 } 339 340 #undef __FUNCTION__ 341 #define __FUNCTION__ "MatZeroEntries_MPIAIJ" 342 static int MatZeroEntries_MPIAIJ(Mat A) 343 { 344 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 345 int ierr; 346 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 347 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 348 return 0; 349 } 350 351 /* the code does not do the diagonal entries correctly unless the 352 matrix is square and the column and row owerships are identical. 353 This is a BUG. The only way to fix it seems to be to access 354 aij->A and aij->B directly and not through the MatZeroRows() 355 routine. 356 */ 357 #undef __FUNCTION__ 358 #define __FUNCTION__ "MatZeroRows_MPIAIJ" 359 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 360 { 361 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 362 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 363 int *procs,*nprocs,j,found,idx,nsends,*work; 364 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 365 int *rvalues,tag = A->tag,count,base,slen,n,*source; 366 int *lens,imdex,*lrows,*values; 367 MPI_Comm comm = A->comm; 368 MPI_Request *send_waits,*recv_waits; 369 MPI_Status recv_status,*send_status; 370 IS istmp; 371 372 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 373 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 374 375 /* first count number of contributors to each processor */ 376 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 377 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 378 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 379 for ( i=0; i<N; i++ ) { 380 idx = rows[i]; 381 found = 0; 382 for ( j=0; j<size; j++ ) { 383 if (idx >= owners[j] && idx < owners[j+1]) { 384 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 385 } 386 } 387 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 388 } 389 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 390 391 /* inform other processors of number of messages and max length*/ 392 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 393 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 394 nrecvs = work[rank]; 395 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 396 nmax = work[rank]; 397 PetscFree(work); 398 399 /* post receives: */ 400 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 401 CHKPTRQ(rvalues); 402 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 403 CHKPTRQ(recv_waits); 404 for ( i=0; i<nrecvs; i++ ) { 405 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 406 } 407 408 /* do sends: 409 1) starts[i] gives the starting index in svalues for stuff going to 410 the ith processor 411 */ 412 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 413 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 414 CHKPTRQ(send_waits); 415 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 416 starts[0] = 0; 417 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 418 for ( i=0; i<N; i++ ) { 419 svalues[starts[owner[i]]++] = rows[i]; 420 } 421 ISRestoreIndices(is,&rows); 422 423 starts[0] = 0; 424 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 425 count = 0; 426 for ( i=0; i<size; i++ ) { 427 if (procs[i]) { 428 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 429 } 430 } 431 PetscFree(starts); 432 433 base = owners[rank]; 434 435 /* wait on receives */ 436 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 437 source = lens + nrecvs; 438 count = nrecvs; slen = 0; 439 while (count) { 440 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 441 /* unpack receives into our local space */ 442 MPI_Get_count(&recv_status,MPI_INT,&n); 443 source[imdex] = recv_status.MPI_SOURCE; 444 lens[imdex] = n; 445 slen += n; 446 count--; 447 } 448 PetscFree(recv_waits); 449 450 /* move the data into the send scatter */ 451 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 452 count = 0; 453 for ( i=0; i<nrecvs; i++ ) { 454 values = rvalues + i*nmax; 455 for ( j=0; j<lens[i]; j++ ) { 456 lrows[count++] = values[j] - base; 457 } 458 } 459 PetscFree(rvalues); PetscFree(lens); 460 PetscFree(owner); PetscFree(nprocs); 461 462 /* actually zap the local rows */ 463 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 464 PLogObjectParent(A,istmp); 465 PetscFree(lrows); 466 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 467 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 468 ierr = ISDestroy(istmp); CHKERRQ(ierr); 469 470 /* wait on sends */ 471 if (nsends) { 472 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 473 CHKPTRQ(send_status); 474 MPI_Waitall(nsends,send_waits,send_status); 475 PetscFree(send_status); 476 } 477 PetscFree(send_waits); PetscFree(svalues); 478 479 return 0; 480 } 481 482 #undef __FUNCTION__ 483 #define __FUNCTION__ "MatMult_MPIAIJ" 484 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr,nt; 488 489 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 490 if (nt != a->n) { 491 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 492 } 493 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 494 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 495 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 496 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 497 return 0; 498 } 499 500 #undef __FUNCTION__ 501 #define __FUNCTION__ "MatMultAdd_MPIAIJ" 502 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505 int ierr; 506 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 507 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 508 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 509 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 510 return 0; 511 } 512 513 #undef __FUNCTION__ 514 #define __FUNCTION__ "MatMultTrans_MPIAIJ" 515 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 516 { 517 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 518 int ierr; 519 520 /* do nondiagonal part */ 521 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 522 /* send it on its way */ 523 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524 /* do local part */ 525 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 526 /* receive remote parts: note this assumes the values are not actually */ 527 /* inserted in yy until the next line, which is true for my implementation*/ 528 /* but is not perhaps always true. */ 529 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 530 return 0; 531 } 532 533 #undef __FUNCTION__ 534 #define __FUNCTION__ "MatMultTransAdd_MPIAIJ" 535 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 536 { 537 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 538 int ierr; 539 540 /* do nondiagonal part */ 541 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 542 /* send it on its way */ 543 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 544 /* do local part */ 545 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 546 /* receive remote parts: note this assumes the values are not actually */ 547 /* inserted in yy until the next line, which is true for my implementation*/ 548 /* but is not perhaps always true. */ 549 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 550 return 0; 551 } 552 553 /* 554 This only works correctly for square matrices where the subblock A->A is the 555 diagonal block 556 */ 557 #undef __FUNCTION__ 558 #define __FUNCTION__ "MatGetDiagonal_MPIAIJ" 559 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 560 { 561 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 562 if (a->M != a->N) 563 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 564 if (a->rstart != a->cstart || a->rend != a->cend) { 565 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 566 return MatGetDiagonal(a->A,v); 567 } 568 569 #undef __FUNCTION__ 570 #define __FUNCTION__ "MatScale_MPIAIJ" 571 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr; 575 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 576 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 577 return 0; 578 } 579 580 #undef __FUNCTION__ 581 #define __FUNCTION__ "MatDestroy_MPIAIJ" 582 static int MatDestroy_MPIAIJ(PetscObject obj) 583 { 584 Mat mat = (Mat) obj; 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 586 int ierr; 587 #if defined(PETSC_LOG) 588 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 589 #endif 590 PetscFree(aij->rowners); 591 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 592 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 593 if (aij->colmap) PetscFree(aij->colmap); 594 if (aij->garray) PetscFree(aij->garray); 595 if (aij->lvec) VecDestroy(aij->lvec); 596 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 597 if (aij->rowvalues) PetscFree(aij->rowvalues); 598 PetscFree(aij); 599 if (mat->mapping) { 600 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 601 } 602 PLogObjectDestroy(mat); 603 PetscHeaderDestroy(mat); 604 return 0; 605 } 606 #include "draw.h" 607 #include "pinclude/pviewer.h" 608 609 #undef __FUNCTION__ 610 #define __FUNCTION__ "MatView_MPIAIJ_Binary" 611 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 612 { 613 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 614 int ierr; 615 616 if (aij->size == 1) { 617 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 618 } 619 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 620 return 0; 621 } 622 623 #undef __FUNCTION__ 624 #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 625 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 626 { 627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 628 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 629 int ierr, format,shift = C->indexshift,rank; 630 FILE *fd; 631 ViewerType vtype; 632 633 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 634 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 635 ierr = ViewerGetFormat(viewer,&format); 636 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 637 MatInfo info; 638 int flg; 639 MPI_Comm_rank(mat->comm,&rank); 640 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 641 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 642 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 643 PetscSequentialPhaseBegin(mat->comm,1); 644 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 645 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 646 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 647 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 648 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 649 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 650 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 651 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 652 fflush(fd); 653 PetscSequentialPhaseEnd(mat->comm,1); 654 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 655 return 0; 656 } 657 else if (format == VIEWER_FORMAT_ASCII_INFO) { 658 return 0; 659 } 660 } 661 662 if (vtype == DRAW_VIEWER) { 663 Draw draw; 664 PetscTruth isnull; 665 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 666 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 667 } 668 669 if (vtype == ASCII_FILE_VIEWER) { 670 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 671 PetscSequentialPhaseBegin(mat->comm,1); 672 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 673 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 674 aij->cend); 675 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 676 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 677 fflush(fd); 678 PetscSequentialPhaseEnd(mat->comm,1); 679 } 680 else { 681 int size = aij->size; 682 rank = aij->rank; 683 if (size == 1) { 684 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 685 } 686 else { 687 /* assemble the entire matrix onto first processor. */ 688 Mat A; 689 Mat_SeqAIJ *Aloc; 690 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 691 Scalar *a; 692 693 if (!rank) { 694 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 695 CHKERRQ(ierr); 696 } 697 else { 698 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 699 CHKERRQ(ierr); 700 } 701 PLogObjectParent(mat,A); 702 703 /* copy over the A part */ 704 Aloc = (Mat_SeqAIJ*) aij->A->data; 705 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 706 row = aij->rstart; 707 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 708 for ( i=0; i<m; i++ ) { 709 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 710 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 711 } 712 aj = Aloc->j; 713 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 714 715 /* copy over the B part */ 716 Aloc = (Mat_SeqAIJ*) aij->B->data; 717 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 718 row = aij->rstart; 719 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 720 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 721 for ( i=0; i<m; i++ ) { 722 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 723 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 724 } 725 PetscFree(ct); 726 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 727 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 730 } 731 ierr = MatDestroy(A); CHKERRQ(ierr); 732 } 733 } 734 return 0; 735 } 736 737 #undef __FUNCTION__ 738 #define __FUNCTION__ "MatView_MPIAIJ" 739 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 740 { 741 Mat mat = (Mat) obj; 742 int ierr; 743 ViewerType vtype; 744 745 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 746 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 747 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 748 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 749 } 750 else if (vtype == BINARY_FILE_VIEWER) { 751 return MatView_MPIAIJ_Binary(mat,viewer); 752 } 753 return 0; 754 } 755 756 /* 757 This has to provide several versions. 758 759 2) a) use only local smoothing updating outer values only once. 760 b) local smoothing updating outer values each inner iteration 761 3) color updating out values betwen colors. 762 */ 763 #undef __FUNCTION__ 764 #define __FUNCTION__ "MatRelax_MPIAIJ" 765 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 766 double fshift,int its,Vec xx) 767 { 768 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 769 Mat AA = mat->A, BB = mat->B; 770 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 771 Scalar *b,*x,*xs,*ls,d,*v,sum; 772 int ierr,*idx, *diag; 773 int n = mat->n, m = mat->m, i,shift = A->indexshift; 774 775 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 776 xs = x + shift; /* shift by one for index start of 1 */ 777 ls = ls + shift; 778 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 779 diag = A->diag; 780 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 783 } 784 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 785 CHKERRQ(ierr); 786 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 787 CHKERRQ(ierr); 788 while (its--) { 789 /* go down through the rows */ 790 for ( i=0; i<m; i++ ) { 791 n = A->i[i+1] - A->i[i]; 792 idx = A->j + A->i[i] + shift; 793 v = A->a + A->i[i] + shift; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = fshift + A->a[diag[i]+shift]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] + shift; 799 v = B->a + B->i[i] + shift; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 802 } 803 /* come up through the rows */ 804 for ( i=m-1; i>-1; i-- ) { 805 n = A->i[i+1] - A->i[i]; 806 idx = A->j + A->i[i] + shift; 807 v = A->a + A->i[i] + shift; 808 sum = b[i]; 809 SPARSEDENSEMDOT(sum,xs,v,idx,n); 810 d = fshift + A->a[diag[i]+shift]; 811 n = B->i[i+1] - B->i[i]; 812 idx = B->j + B->i[i] + shift; 813 v = B->a + B->i[i] + shift; 814 SPARSEDENSEMDOT(sum,ls,v,idx,n); 815 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 816 } 817 } 818 } 819 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 822 } 823 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 824 CHKERRQ(ierr); 825 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 826 CHKERRQ(ierr); 827 while (its--) { 828 for ( i=0; i<m; i++ ) { 829 n = A->i[i+1] - A->i[i]; 830 idx = A->j + A->i[i] + shift; 831 v = A->a + A->i[i] + shift; 832 sum = b[i]; 833 SPARSEDENSEMDOT(sum,xs,v,idx,n); 834 d = fshift + A->a[diag[i]+shift]; 835 n = B->i[i+1] - B->i[i]; 836 idx = B->j + B->i[i] + shift; 837 v = B->a + B->i[i] + shift; 838 SPARSEDENSEMDOT(sum,ls,v,idx,n); 839 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 840 } 841 } 842 } 843 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 846 } 847 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 848 mat->Mvctx); CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 850 mat->Mvctx); CHKERRQ(ierr); 851 while (its--) { 852 for ( i=m-1; i>-1; i-- ) { 853 n = A->i[i+1] - A->i[i]; 854 idx = A->j + A->i[i] + shift; 855 v = A->a + A->i[i] + shift; 856 sum = b[i]; 857 SPARSEDENSEMDOT(sum,xs,v,idx,n); 858 d = fshift + A->a[diag[i]+shift]; 859 n = B->i[i+1] - B->i[i]; 860 idx = B->j + B->i[i] + shift; 861 v = B->a + B->i[i] + shift; 862 SPARSEDENSEMDOT(sum,ls,v,idx,n); 863 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 864 } 865 } 866 } 867 else { 868 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 869 } 870 return 0; 871 } 872 873 #undef __FUNCTION__ 874 #define __FUNCTION__ "MatGetInfo_MPIAIJ" 875 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 876 { 877 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 878 Mat A = mat->A, B = mat->B; 879 int ierr; 880 double isend[5], irecv[5]; 881 882 info->rows_global = (double)mat->M; 883 info->columns_global = (double)mat->N; 884 info->rows_local = (double)mat->m; 885 info->columns_local = (double)mat->N; 886 info->block_size = 1.0; 887 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 888 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 889 isend[3] = info->memory; isend[4] = info->mallocs; 890 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 891 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 892 isend[3] += info->memory; isend[4] += info->mallocs; 893 if (flag == MAT_LOCAL) { 894 info->nz_used = isend[0]; 895 info->nz_allocated = isend[1]; 896 info->nz_unneeded = isend[2]; 897 info->memory = isend[3]; 898 info->mallocs = isend[4]; 899 } else if (flag == MAT_GLOBAL_MAX) { 900 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 901 info->nz_used = irecv[0]; 902 info->nz_allocated = irecv[1]; 903 info->nz_unneeded = irecv[2]; 904 info->memory = irecv[3]; 905 info->mallocs = irecv[4]; 906 } else if (flag == MAT_GLOBAL_SUM) { 907 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 908 info->nz_used = irecv[0]; 909 info->nz_allocated = irecv[1]; 910 info->nz_unneeded = irecv[2]; 911 info->memory = irecv[3]; 912 info->mallocs = irecv[4]; 913 } 914 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 915 info->fill_ratio_needed = 0; 916 info->factor_mallocs = 0; 917 918 return 0; 919 } 920 921 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 922 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 923 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 924 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 925 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 926 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 927 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 928 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 929 930 #undef __FUNCTION__ 931 #define __FUNCTION__ "MatSetOption_MPIAIJ" 932 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 933 { 934 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 935 936 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 937 op == MAT_YES_NEW_NONZERO_LOCATIONS || 938 op == MAT_COLUMNS_UNSORTED || 939 op == MAT_COLUMNS_SORTED) { 940 MatSetOption(a->A,op); 941 MatSetOption(a->B,op); 942 } else if (op == MAT_ROW_ORIENTED) { 943 a->roworiented = 1; 944 MatSetOption(a->A,op); 945 MatSetOption(a->B,op); 946 } else if (op == MAT_ROWS_SORTED || 947 op == MAT_ROWS_UNSORTED || 948 op == MAT_SYMMETRIC || 949 op == MAT_STRUCTURALLY_SYMMETRIC || 950 op == MAT_YES_NEW_DIAGONALS) 951 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 952 else if (op == MAT_COLUMN_ORIENTED) { 953 a->roworiented = 0; 954 MatSetOption(a->A,op); 955 MatSetOption(a->B,op); 956 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 957 a->donotstash = 1; 958 } else if (op == MAT_NO_NEW_DIAGONALS) 959 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 960 else 961 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 962 return 0; 963 } 964 965 #undef __FUNCTION__ 966 #define __FUNCTION__ "MatGetSize_MPIAIJ" 967 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 968 { 969 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 970 *m = mat->M; *n = mat->N; 971 return 0; 972 } 973 974 #undef __FUNCTION__ 975 #define __FUNCTION__ "MatGetLocalSize_MPIAIJ" 976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 977 { 978 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 979 *m = mat->m; *n = mat->N; 980 return 0; 981 } 982 983 #undef __FUNCTION__ 984 #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ" 985 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 986 { 987 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 988 *m = mat->rstart; *n = mat->rend; 989 return 0; 990 } 991 992 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 993 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 994 995 #undef __FUNCTION__ 996 #define __FUNCTION__ "MatGetRow_MPIAIJ" 997 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 998 { 999 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1000 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1001 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1002 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1003 int *cmap, *idx_p; 1004 1005 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1006 mat->getrowactive = PETSC_TRUE; 1007 1008 if (!mat->rowvalues && (idx || v)) { 1009 /* 1010 allocate enough space to hold information from the longest row. 1011 */ 1012 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1013 int max = 1,m = mat->m,tmp; 1014 for ( i=0; i<m; i++ ) { 1015 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1016 if (max < tmp) { max = tmp; } 1017 } 1018 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1019 CHKPTRQ(mat->rowvalues); 1020 mat->rowindices = (int *) (mat->rowvalues + max); 1021 } 1022 1023 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1024 lrow = row - rstart; 1025 1026 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1027 if (!v) {pvA = 0; pvB = 0;} 1028 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1029 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1030 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1031 nztot = nzA + nzB; 1032 1033 cmap = mat->garray; 1034 if (v || idx) { 1035 if (nztot) { 1036 /* Sort by increasing column numbers, assuming A and B already sorted */ 1037 int imark = -1; 1038 if (v) { 1039 *v = v_p = mat->rowvalues; 1040 for ( i=0; i<nzB; i++ ) { 1041 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1042 else break; 1043 } 1044 imark = i; 1045 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1046 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1047 } 1048 if (idx) { 1049 *idx = idx_p = mat->rowindices; 1050 if (imark > -1) { 1051 for ( i=0; i<imark; i++ ) { 1052 idx_p[i] = cmap[cworkB[i]]; 1053 } 1054 } else { 1055 for ( i=0; i<nzB; i++ ) { 1056 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1057 else break; 1058 } 1059 imark = i; 1060 } 1061 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1062 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1063 } 1064 } 1065 else { 1066 if (idx) *idx = 0; 1067 if (v) *v = 0; 1068 } 1069 } 1070 *nz = nztot; 1071 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1072 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1073 return 0; 1074 } 1075 1076 #undef __FUNCTION__ 1077 #define __FUNCTION__ "MatRestoreRow_MPIAIJ" 1078 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1079 { 1080 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1081 if (aij->getrowactive == PETSC_FALSE) { 1082 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1083 } 1084 aij->getrowactive = PETSC_FALSE; 1085 return 0; 1086 } 1087 1088 #undef __FUNCTION__ 1089 #define __FUNCTION__ "MatNorm_MPIAIJ" 1090 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1091 { 1092 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1093 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1094 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1095 double sum = 0.0; 1096 Scalar *v; 1097 1098 if (aij->size == 1) { 1099 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1100 } else { 1101 if (type == NORM_FROBENIUS) { 1102 v = amat->a; 1103 for (i=0; i<amat->nz; i++ ) { 1104 #if defined(PETSC_COMPLEX) 1105 sum += real(conj(*v)*(*v)); v++; 1106 #else 1107 sum += (*v)*(*v); v++; 1108 #endif 1109 } 1110 v = bmat->a; 1111 for (i=0; i<bmat->nz; i++ ) { 1112 #if defined(PETSC_COMPLEX) 1113 sum += real(conj(*v)*(*v)); v++; 1114 #else 1115 sum += (*v)*(*v); v++; 1116 #endif 1117 } 1118 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1119 *norm = sqrt(*norm); 1120 } 1121 else if (type == NORM_1) { /* max column norm */ 1122 double *tmp, *tmp2; 1123 int *jj, *garray = aij->garray; 1124 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1125 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1126 PetscMemzero(tmp,aij->N*sizeof(double)); 1127 *norm = 0.0; 1128 v = amat->a; jj = amat->j; 1129 for ( j=0; j<amat->nz; j++ ) { 1130 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1131 } 1132 v = bmat->a; jj = bmat->j; 1133 for ( j=0; j<bmat->nz; j++ ) { 1134 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1135 } 1136 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1137 for ( j=0; j<aij->N; j++ ) { 1138 if (tmp2[j] > *norm) *norm = tmp2[j]; 1139 } 1140 PetscFree(tmp); PetscFree(tmp2); 1141 } 1142 else if (type == NORM_INFINITY) { /* max row norm */ 1143 double ntemp = 0.0; 1144 for ( j=0; j<amat->m; j++ ) { 1145 v = amat->a + amat->i[j] + shift; 1146 sum = 0.0; 1147 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1148 sum += PetscAbsScalar(*v); v++; 1149 } 1150 v = bmat->a + bmat->i[j] + shift; 1151 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1152 sum += PetscAbsScalar(*v); v++; 1153 } 1154 if (sum > ntemp) ntemp = sum; 1155 } 1156 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1157 } 1158 else { 1159 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1160 } 1161 } 1162 return 0; 1163 } 1164 1165 #undef __FUNCTION__ 1166 #define __FUNCTION__ "MatTranspose_MPIAIJ" 1167 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1170 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1171 int ierr,shift = Aloc->indexshift; 1172 Mat B; 1173 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1174 Scalar *array; 1175 1176 if (matout == PETSC_NULL && M != N) 1177 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1178 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1179 PETSC_NULL,&B); CHKERRQ(ierr); 1180 1181 /* copy over the A part */ 1182 Aloc = (Mat_SeqAIJ*) a->A->data; 1183 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1184 row = a->rstart; 1185 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1186 for ( i=0; i<m; i++ ) { 1187 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1188 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1189 } 1190 aj = Aloc->j; 1191 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1192 1193 /* copy over the B part */ 1194 Aloc = (Mat_SeqAIJ*) a->B->data; 1195 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1196 row = a->rstart; 1197 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1198 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1199 for ( i=0; i<m; i++ ) { 1200 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1201 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1202 } 1203 PetscFree(ct); 1204 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1205 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1206 if (matout != PETSC_NULL) { 1207 *matout = B; 1208 } else { 1209 /* This isn't really an in-place transpose .... but free data structures from a */ 1210 PetscFree(a->rowners); 1211 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1212 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1213 if (a->colmap) PetscFree(a->colmap); 1214 if (a->garray) PetscFree(a->garray); 1215 if (a->lvec) VecDestroy(a->lvec); 1216 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1217 PetscFree(a); 1218 PetscMemcpy(A,B,sizeof(struct _Mat)); 1219 PetscHeaderDestroy(B); 1220 } 1221 return 0; 1222 } 1223 1224 #undef __FUNCTION__ 1225 #define __FUNCTION__ "MatDiagonalScale_MPIAIJ" 1226 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1227 { 1228 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1229 Mat a = aij->A, b = aij->B; 1230 int ierr,s1,s2,s3; 1231 1232 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1233 if (rr) { 1234 s3 = aij->n; 1235 VecGetLocalSize_Fast(rr,s1); 1236 if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 1237 /* Overlap communication with computation. */ 1238 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1239 } 1240 if (ll) { 1241 VecGetLocalSize_Fast(ll,s1); 1242 if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1243 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1244 } 1245 /* scale the diagonal block */ 1246 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1247 1248 if (rr) { 1249 /* Do a scatter end and then right scale the off-diagonal block */ 1250 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1251 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1252 } 1253 1254 return 0; 1255 } 1256 1257 1258 extern int MatPrintHelp_SeqAIJ(Mat); 1259 #undef __FUNCTION__ 1260 #define __FUNCTION__ "MatPrintHelp_MPIAIJ" 1261 static int MatPrintHelp_MPIAIJ(Mat A) 1262 { 1263 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1264 1265 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1266 else return 0; 1267 } 1268 1269 #undef __FUNCTION__ 1270 #define __FUNCTION__ "MatGetBlockSize_MPIAIJ" 1271 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1272 { 1273 *bs = 1; 1274 return 0; 1275 } 1276 1277 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1278 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1279 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1280 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1281 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1282 /* -------------------------------------------------------------------*/ 1283 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1284 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1285 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1286 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1287 0,0, 1288 0,0, 1289 0,0, 1290 MatRelax_MPIAIJ, 1291 MatTranspose_MPIAIJ, 1292 MatGetInfo_MPIAIJ,0, 1293 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1294 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1295 0, 1296 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1297 0,0,0,0, 1298 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1299 0,0, 1300 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1301 0,0,0, 1302 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1303 MatPrintHelp_MPIAIJ, 1304 MatScale_MPIAIJ,0,0,0, 1305 MatGetBlockSize_MPIAIJ,0,0,0,0, 1306 MatFDColoringCreate_MPIAIJ}; 1307 1308 1309 #undef __FUNCTION__ 1310 #define __FUNCTION__ "MatCreateMPIAIJ" 1311 /*@C 1312 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1313 (the default parallel PETSc format). For good matrix assembly performance 1314 the user should preallocate the matrix storage by setting the parameters 1315 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1316 performance can be increased by more than a factor of 50. 1317 1318 Input Parameters: 1319 . comm - MPI communicator 1320 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1321 This value should be the same as the local size used in creating the 1322 y vector for the matrix-vector product y = Ax. 1323 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1324 This value should be the same as the local size used in creating the 1325 x vector for the matrix-vector product y = Ax. 1326 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1327 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1328 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1329 (same for all local rows) 1330 . d_nzz - array containing the number of nonzeros in the various rows of the 1331 diagonal portion of the local submatrix (possibly different for each row) 1332 or PETSC_NULL. You must leave room for the diagonal entry even if 1333 it is zero. 1334 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1335 submatrix (same for all local rows). 1336 . o_nzz - array containing the number of nonzeros in the various rows of the 1337 off-diagonal portion of the local submatrix (possibly different for 1338 each row) or PETSC_NULL. 1339 1340 Output Parameter: 1341 . A - the matrix 1342 1343 Notes: 1344 The AIJ format (also called the Yale sparse matrix format or 1345 compressed row storage), is fully compatible with standard Fortran 77 1346 storage. That is, the stored row and column indices can begin at 1347 either one (as in Fortran) or zero. See the users manual for details. 1348 1349 The user MUST specify either the local or global matrix dimensions 1350 (possibly both). 1351 1352 By default, this format uses inodes (identical nodes) when possible. 1353 We search for consecutive rows with the same nonzero structure, thereby 1354 reusing matrix information to achieve increased efficiency. 1355 1356 Options Database Keys: 1357 $ -mat_aij_no_inode - Do not use inodes 1358 $ -mat_aij_inode_limit <limit> - Set inode limit. 1359 $ (max limit=5) 1360 $ -mat_aij_oneindex - Internally use indexing starting at 1 1361 $ rather than 0. Note: When calling MatSetValues(), 1362 $ the user still MUST index entries starting at 0! 1363 1364 Storage Information: 1365 For a square global matrix we define each processor's diagonal portion 1366 to be its local rows and the corresponding columns (a square submatrix); 1367 each processor's off-diagonal portion encompasses the remainder of the 1368 local matrix (a rectangular submatrix). 1369 1370 The user can specify preallocated storage for the diagonal part of 1371 the local submatrix with either d_nz or d_nnz (not both). Set 1372 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1373 memory allocation. Likewise, specify preallocated storage for the 1374 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1375 1376 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1377 the figure below we depict these three local rows and all columns (0-11). 1378 1379 $ 0 1 2 3 4 5 6 7 8 9 10 11 1380 $ ------------------- 1381 $ row 3 | o o o d d d o o o o o o 1382 $ row 4 | o o o d d d o o o o o o 1383 $ row 5 | o o o d d d o o o o o o 1384 $ ------------------- 1385 $ 1386 1387 Thus, any entries in the d locations are stored in the d (diagonal) 1388 submatrix, and any entries in the o locations are stored in the 1389 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1390 stored simply in the MATSEQAIJ format for compressed row storage. 1391 1392 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1393 and o_nz should indicate the number of nonzeros per row in the o matrix. 1394 In general, for PDE problems in which most nonzeros are near the diagonal, 1395 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1396 or you will get TERRIBLE performance; see the users' manual chapter on 1397 matrices. 1398 1399 .keywords: matrix, aij, compressed row, sparse, parallel 1400 1401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1402 @*/ 1403 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1404 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1405 { 1406 Mat B; 1407 Mat_MPIAIJ *b; 1408 int ierr, i,sum[2],work[2],size; 1409 1410 *A = 0; 1411 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1412 PLogObjectCreate(B); 1413 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1414 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1415 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1416 MPI_Comm_size(comm,&size); 1417 if (size == 1) { 1418 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1419 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1420 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1421 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1422 B->ops.lufactor = MatLUFactor_MPIAIJ; 1423 B->ops.solve = MatSolve_MPIAIJ; 1424 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1425 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1426 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1427 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1428 } 1429 B->destroy = MatDestroy_MPIAIJ; 1430 B->view = MatView_MPIAIJ; 1431 B->factor = 0; 1432 B->assembled = PETSC_FALSE; 1433 B->mapping = 0; 1434 1435 b->insertmode = NOT_SET_VALUES; 1436 MPI_Comm_rank(comm,&b->rank); 1437 MPI_Comm_size(comm,&b->size); 1438 1439 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1440 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1441 1442 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1443 work[0] = m; work[1] = n; 1444 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1445 if (M == PETSC_DECIDE) M = sum[0]; 1446 if (N == PETSC_DECIDE) N = sum[1]; 1447 } 1448 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1449 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1450 b->m = m; B->m = m; 1451 b->n = n; B->n = n; 1452 b->N = N; B->N = N; 1453 b->M = M; B->M = M; 1454 1455 /* build local table of row and column ownerships */ 1456 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1457 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1458 b->cowners = b->rowners + b->size + 2; 1459 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1460 b->rowners[0] = 0; 1461 for ( i=2; i<=b->size; i++ ) { 1462 b->rowners[i] += b->rowners[i-1]; 1463 } 1464 b->rstart = b->rowners[b->rank]; 1465 b->rend = b->rowners[b->rank+1]; 1466 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1467 b->cowners[0] = 0; 1468 for ( i=2; i<=b->size; i++ ) { 1469 b->cowners[i] += b->cowners[i-1]; 1470 } 1471 b->cstart = b->cowners[b->rank]; 1472 b->cend = b->cowners[b->rank+1]; 1473 1474 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1475 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1476 PLogObjectParent(B,b->A); 1477 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1478 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1479 PLogObjectParent(B,b->B); 1480 1481 /* build cache for off array entries formed */ 1482 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1483 b->donotstash = 0; 1484 b->colmap = 0; 1485 b->garray = 0; 1486 b->roworiented = 1; 1487 1488 /* stuff used for matrix vector multiply */ 1489 b->lvec = 0; 1490 b->Mvctx = 0; 1491 1492 /* stuff for MatGetRow() */ 1493 b->rowindices = 0; 1494 b->rowvalues = 0; 1495 b->getrowactive = PETSC_FALSE; 1496 1497 *A = B; 1498 return 0; 1499 } 1500 1501 #undef __FUNCTION__ 1502 #define __FUNCTION__ "MatConvertSameType_MPIAIJ" 1503 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1504 { 1505 Mat mat; 1506 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1507 int ierr, len=0, flg; 1508 1509 *newmat = 0; 1510 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1511 PLogObjectCreate(mat); 1512 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1513 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1514 mat->destroy = MatDestroy_MPIAIJ; 1515 mat->view = MatView_MPIAIJ; 1516 mat->factor = matin->factor; 1517 mat->assembled = PETSC_TRUE; 1518 1519 a->m = mat->m = oldmat->m; 1520 a->n = mat->n = oldmat->n; 1521 a->M = mat->M = oldmat->M; 1522 a->N = mat->N = oldmat->N; 1523 1524 a->rstart = oldmat->rstart; 1525 a->rend = oldmat->rend; 1526 a->cstart = oldmat->cstart; 1527 a->cend = oldmat->cend; 1528 a->size = oldmat->size; 1529 a->rank = oldmat->rank; 1530 a->insertmode = NOT_SET_VALUES; 1531 a->rowvalues = 0; 1532 a->getrowactive = PETSC_FALSE; 1533 1534 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1535 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1536 a->cowners = a->rowners + a->size + 2; 1537 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1538 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1539 if (oldmat->colmap) { 1540 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1541 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1542 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1543 } else a->colmap = 0; 1544 if (oldmat->garray) { 1545 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1546 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1547 PLogObjectMemory(mat,len*sizeof(int)); 1548 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1549 } else a->garray = 0; 1550 1551 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1552 PLogObjectParent(mat,a->lvec); 1553 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1554 PLogObjectParent(mat,a->Mvctx); 1555 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1556 PLogObjectParent(mat,a->A); 1557 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1558 PLogObjectParent(mat,a->B); 1559 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1560 if (flg) { 1561 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1562 } 1563 *newmat = mat; 1564 return 0; 1565 } 1566 1567 #include "sys.h" 1568 1569 #undef __FUNCTION__ 1570 #define __FUNCTION__ "MatLoad_MPIAIJ" 1571 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1572 { 1573 Mat A; 1574 int i, nz, ierr, j,rstart, rend, fd; 1575 Scalar *vals,*svals; 1576 MPI_Comm comm = ((PetscObject)viewer)->comm; 1577 MPI_Status status; 1578 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1579 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1580 int tag = ((PetscObject)viewer)->tag; 1581 1582 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1583 if (!rank) { 1584 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1585 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1586 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1587 } 1588 1589 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1590 M = header[1]; N = header[2]; 1591 /* determine ownership of all rows */ 1592 m = M/size + ((M % size) > rank); 1593 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1594 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1595 rowners[0] = 0; 1596 for ( i=2; i<=size; i++ ) { 1597 rowners[i] += rowners[i-1]; 1598 } 1599 rstart = rowners[rank]; 1600 rend = rowners[rank+1]; 1601 1602 /* distribute row lengths to all processors */ 1603 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1604 offlens = ourlens + (rend-rstart); 1605 if (!rank) { 1606 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1607 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1608 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1609 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1610 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1611 PetscFree(sndcounts); 1612 } 1613 else { 1614 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1615 } 1616 1617 if (!rank) { 1618 /* calculate the number of nonzeros on each processor */ 1619 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1620 PetscMemzero(procsnz,size*sizeof(int)); 1621 for ( i=0; i<size; i++ ) { 1622 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1623 procsnz[i] += rowlengths[j]; 1624 } 1625 } 1626 PetscFree(rowlengths); 1627 1628 /* determine max buffer needed and allocate it */ 1629 maxnz = 0; 1630 for ( i=0; i<size; i++ ) { 1631 maxnz = PetscMax(maxnz,procsnz[i]); 1632 } 1633 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1634 1635 /* read in my part of the matrix column indices */ 1636 nz = procsnz[0]; 1637 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1638 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1639 1640 /* read in every one elses and ship off */ 1641 for ( i=1; i<size; i++ ) { 1642 nz = procsnz[i]; 1643 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1644 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1645 } 1646 PetscFree(cols); 1647 } 1648 else { 1649 /* determine buffer space needed for message */ 1650 nz = 0; 1651 for ( i=0; i<m; i++ ) { 1652 nz += ourlens[i]; 1653 } 1654 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1655 1656 /* receive message of column indices*/ 1657 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1658 MPI_Get_count(&status,MPI_INT,&maxnz); 1659 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1660 } 1661 1662 /* loop over local rows, determining number of off diagonal entries */ 1663 PetscMemzero(offlens,m*sizeof(int)); 1664 jj = 0; 1665 for ( i=0; i<m; i++ ) { 1666 for ( j=0; j<ourlens[i]; j++ ) { 1667 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1668 jj++; 1669 } 1670 } 1671 1672 /* create our matrix */ 1673 for ( i=0; i<m; i++ ) { 1674 ourlens[i] -= offlens[i]; 1675 } 1676 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1677 A = *newmat; 1678 MatSetOption(A,MAT_COLUMNS_SORTED); 1679 for ( i=0; i<m; i++ ) { 1680 ourlens[i] += offlens[i]; 1681 } 1682 1683 if (!rank) { 1684 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1685 1686 /* read in my part of the matrix numerical values */ 1687 nz = procsnz[0]; 1688 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1689 1690 /* insert into matrix */ 1691 jj = rstart; 1692 smycols = mycols; 1693 svals = vals; 1694 for ( i=0; i<m; i++ ) { 1695 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1696 smycols += ourlens[i]; 1697 svals += ourlens[i]; 1698 jj++; 1699 } 1700 1701 /* read in other processors and ship out */ 1702 for ( i=1; i<size; i++ ) { 1703 nz = procsnz[i]; 1704 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1705 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1706 } 1707 PetscFree(procsnz); 1708 } 1709 else { 1710 /* receive numeric values */ 1711 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1712 1713 /* receive message of values*/ 1714 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1715 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1716 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1717 1718 /* insert into matrix */ 1719 jj = rstart; 1720 smycols = mycols; 1721 svals = vals; 1722 for ( i=0; i<m; i++ ) { 1723 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1724 smycols += ourlens[i]; 1725 svals += ourlens[i]; 1726 jj++; 1727 } 1728 } 1729 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1730 1731 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1732 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1733 return 0; 1734 } 1735