1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.207 1997/05/23 18:38:33 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 for ( _i=0; _i<nrow; _i++ ) { \ 67 if (rp[_i] > col1) break; \ 68 if (rp[_i] == col1) { \ 69 if (addv == ADD_VALUES) ap[_i] += value; \ 70 else ap[_i] = value; \ 71 goto a_noinsert; \ 72 } \ 73 } \ 74 if (nonew == 1) goto a_noinsert; \ 75 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 76 if (nrow >= rmax) { \ 77 /* there is no extra room in row, therefore enlarge */ \ 78 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 79 Scalar *new_a; \ 80 \ 81 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 82 \ 83 /* malloc new storage space */ \ 84 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 85 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 86 new_j = (int *) (new_a + new_nz); \ 87 new_i = new_j + new_nz; \ 88 \ 89 /* copy over old data into new slots */ \ 90 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 91 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 92 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 93 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 94 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 95 len*sizeof(int)); \ 96 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 97 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 98 len*sizeof(Scalar)); \ 99 /* free up old matrix storage */ \ 100 \ 101 PetscFree(a->a); \ 102 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 103 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 104 a->singlemalloc = 1; \ 105 \ 106 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 107 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 108 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 109 a->maxnz += CHUNKSIZE; \ 110 a->reallocs++; \ 111 } \ 112 N = nrow++ - 1; a->nz++; \ 113 /* shift up all the later entries in this row */ \ 114 for ( ii=N; ii>=_i; ii-- ) { \ 115 rp[ii+1] = rp[ii]; \ 116 ap[ii+1] = ap[ii]; \ 117 } \ 118 rp[_i] = col1; \ 119 ap[_i] = value; \ 120 a_noinsert: ; \ 121 ailen[row] = nrow; \ 122 } 123 124 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 125 { \ 126 \ 127 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 128 rmax = bimax[row]; nrow = bilen[row]; \ 129 col1 = col - shift; \ 130 \ 131 for ( _i=0; _i<nrow; _i++ ) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 150 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 151 new_j = (int *) (new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 156 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int)); \ 161 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 162 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar)); \ 164 /* free up old matrix storage */ \ 165 \ 166 PetscFree(b->a); \ 167 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 168 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 169 b->singlemalloc = 1; \ 170 \ 171 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 172 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 173 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 174 b->maxnz += CHUNKSIZE; \ 175 b->reallocs++; \ 176 } \ 177 N = nrow++ - 1; b->nz++; \ 178 /* shift up all the later entries in this row */ \ 179 for ( ii=N; ii>=_i; ii-- ) { \ 180 rp[ii+1] = rp[ii]; \ 181 ap[ii+1] = ap[ii]; \ 182 } \ 183 rp[_i] = col1; \ 184 ap[_i] = value; \ 185 b_noinsert: ; \ 186 bilen[row] = nrow; \ 187 } 188 189 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 for ( i=0; i<m; i++ ) { 216 #if defined(PETSC_BOPT_g) 217 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 218 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 219 #endif 220 if (im[i] >= rstart && im[i] < rend) { 221 row = im[i] - rstart; 222 for ( j=0; j<n; j++ ) { 223 if (in[j] >= cstart && in[j] < cend){ 224 col = in[j] - cstart; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } 229 #if defined(PETSC_BOPT_g) 230 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 231 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 col = aij->colmap[in[j]] - 1; 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ *) B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } 249 else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } 256 else { 257 if (roworiented && !aij->donotstash) { 258 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 259 } 260 else { 261 if (!aij->donotstash) { 262 row = im[i]; 263 for ( j=0; j<n; j++ ) { 264 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 } 270 return 0; 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatGetValues_MPIAIJ" 275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 278 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 279 int cstart = aij->cstart, cend = aij->cend,row,col; 280 281 for ( i=0; i<m; i++ ) { 282 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 283 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for ( j=0; j<n; j++ ) { 287 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 288 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 292 } 293 else { 294 if (!aij->colmap) { 295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 col = aij->colmap[idxn[j]] - 1; 298 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 299 else { 300 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 301 } 302 } 303 } 304 } 305 else { 306 SETERRQ(1,0,"Only local values currently supported"); 307 } 308 } 309 return 0; 310 } 311 312 #undef __FUNC__ 313 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 317 MPI_Comm comm = mat->comm; 318 int size = aij->size, *owners = aij->rowners; 319 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 320 MPI_Request *send_waits,*recv_waits; 321 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 322 InsertMode addv; 323 Scalar *rvalues,*svalues; 324 325 /* make sure all processors are either in INSERTMODE or ADDMODE */ 326 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 327 if (addv == (ADD_VALUES|INSERT_VALUES)) { 328 SETERRQ(1,0,"Some processors inserted others added"); 329 } 330 mat->insertmode = addv; /* in case this processor had no cache */ 331 332 /* first count number of contributors to each processor */ 333 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 334 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 335 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 336 for ( i=0; i<aij->stash.n; i++ ) { 337 idx = aij->stash.idx[i]; 338 for ( j=0; j<size; j++ ) { 339 if (idx >= owners[j] && idx < owners[j+1]) { 340 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 341 } 342 } 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 349 nreceives = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: 355 1) each message will consist of ordered pairs 356 (global index,value) we store the global index as a double 357 to simplify the message passing. 358 2) since we don't know how long each individual message is we 359 allocate the largest needed buffer for each receive. Potentially 360 this is a lot of wasted space. 361 362 363 This could be done better. 364 */ 365 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 366 CHKPTRQ(rvalues); 367 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 368 CHKPTRQ(recv_waits); 369 for ( i=0; i<nreceives; i++ ) { 370 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 371 comm,recv_waits+i); 372 } 373 374 /* do sends: 375 1) starts[i] gives the starting index in svalues for stuff going to 376 the ith processor 377 */ 378 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 379 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 380 CHKPTRQ(send_waits); 381 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 382 starts[0] = 0; 383 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 384 for ( i=0; i<aij->stash.n; i++ ) { 385 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 386 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 387 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 388 } 389 PetscFree(owner); 390 starts[0] = 0; 391 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 392 count = 0; 393 for ( i=0; i<size; i++ ) { 394 if (procs[i]) { 395 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 396 comm,send_waits+count++); 397 } 398 } 399 PetscFree(starts); PetscFree(nprocs); 400 401 /* Free cache space */ 402 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 403 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 404 405 aij->svalues = svalues; aij->rvalues = rvalues; 406 aij->nsends = nsends; aij->nrecvs = nreceives; 407 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 408 aij->rmax = nmax; 409 410 return 0; 411 } 412 extern int MatSetUpMultiply_MPIAIJ(Mat); 413 414 #undef __FUNC__ 415 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 416 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 417 { 418 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 419 MPI_Status *send_status,recv_status; 420 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 421 int row,col,other_disassembled; 422 Scalar *values,val; 423 InsertMode addv = mat->insertmode; 424 425 /* wait on receives */ 426 while (count) { 427 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 428 /* unpack receives into our local space */ 429 values = aij->rvalues + 3*imdex*aij->rmax; 430 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 431 n = n/3; 432 for ( i=0; i<n; i++ ) { 433 row = (int) PetscReal(values[3*i]) - aij->rstart; 434 col = (int) PetscReal(values[3*i+1]); 435 val = values[3*i+2]; 436 if (col >= aij->cstart && col < aij->cend) { 437 col -= aij->cstart; 438 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 439 } 440 else { 441 if (mat->was_assembled || mat->assembled) { 442 if (!aij->colmap) { 443 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 444 } 445 col = aij->colmap[col] - 1; 446 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 447 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 448 col = (int) PetscReal(values[3*i+1]); 449 } 450 } 451 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 452 } 453 } 454 count--; 455 } 456 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 457 458 /* wait on sends */ 459 if (aij->nsends) { 460 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 461 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 462 PetscFree(send_status); 463 } 464 PetscFree(aij->send_waits); PetscFree(aij->svalues); 465 466 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 468 469 /* determine if any processor has disassembled, if so we must 470 also disassemble ourselfs, in order that we may reassemble. */ 471 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 472 if (mat->was_assembled && !other_disassembled) { 473 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 474 } 475 476 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 477 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 478 } 479 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 480 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 481 482 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 483 return 0; 484 } 485 486 #undef __FUNC__ 487 #define __FUNC__ "MatZeroEntries_MPIAIJ" 488 int MatZeroEntries_MPIAIJ(Mat A) 489 { 490 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 491 int ierr; 492 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 493 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 494 return 0; 495 } 496 497 /* the code does not do the diagonal entries correctly unless the 498 matrix is square and the column and row owerships are identical. 499 This is a BUG. The only way to fix it seems to be to access 500 aij->A and aij->B directly and not through the MatZeroRows() 501 routine. 502 */ 503 #undef __FUNC__ 504 #define __FUNC__ "MatZeroRows_MPIAIJ" 505 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 506 { 507 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 508 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 509 int *procs,*nprocs,j,found,idx,nsends,*work; 510 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 511 int *rvalues,tag = A->tag,count,base,slen,n,*source; 512 int *lens,imdex,*lrows,*values; 513 MPI_Comm comm = A->comm; 514 MPI_Request *send_waits,*recv_waits; 515 MPI_Status recv_status,*send_status; 516 IS istmp; 517 518 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 519 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 520 521 /* first count number of contributors to each processor */ 522 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 523 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 524 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 525 for ( i=0; i<N; i++ ) { 526 idx = rows[i]; 527 found = 0; 528 for ( j=0; j<size; j++ ) { 529 if (idx >= owners[j] && idx < owners[j+1]) { 530 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 531 } 532 } 533 if (!found) SETERRQ(1,0,"Index out of range"); 534 } 535 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 536 537 /* inform other processors of number of messages and max length*/ 538 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 539 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 540 nrecvs = work[rank]; 541 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 542 nmax = work[rank]; 543 PetscFree(work); 544 545 /* post receives: */ 546 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 547 CHKPTRQ(rvalues); 548 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 549 CHKPTRQ(recv_waits); 550 for ( i=0; i<nrecvs; i++ ) { 551 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 552 } 553 554 /* do sends: 555 1) starts[i] gives the starting index in svalues for stuff going to 556 the ith processor 557 */ 558 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 559 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 560 CHKPTRQ(send_waits); 561 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 562 starts[0] = 0; 563 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 564 for ( i=0; i<N; i++ ) { 565 svalues[starts[owner[i]]++] = rows[i]; 566 } 567 ISRestoreIndices(is,&rows); 568 569 starts[0] = 0; 570 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 571 count = 0; 572 for ( i=0; i<size; i++ ) { 573 if (procs[i]) { 574 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 575 } 576 } 577 PetscFree(starts); 578 579 base = owners[rank]; 580 581 /* wait on receives */ 582 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 583 source = lens + nrecvs; 584 count = nrecvs; slen = 0; 585 while (count) { 586 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 587 /* unpack receives into our local space */ 588 MPI_Get_count(&recv_status,MPI_INT,&n); 589 source[imdex] = recv_status.MPI_SOURCE; 590 lens[imdex] = n; 591 slen += n; 592 count--; 593 } 594 PetscFree(recv_waits); 595 596 /* move the data into the send scatter */ 597 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 598 count = 0; 599 for ( i=0; i<nrecvs; i++ ) { 600 values = rvalues + i*nmax; 601 for ( j=0; j<lens[i]; j++ ) { 602 lrows[count++] = values[j] - base; 603 } 604 } 605 PetscFree(rvalues); PetscFree(lens); 606 PetscFree(owner); PetscFree(nprocs); 607 608 /* actually zap the local rows */ 609 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 610 PLogObjectParent(A,istmp); 611 PetscFree(lrows); 612 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 613 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 614 ierr = ISDestroy(istmp); CHKERRQ(ierr); 615 616 /* wait on sends */ 617 if (nsends) { 618 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 619 CHKPTRQ(send_status); 620 MPI_Waitall(nsends,send_waits,send_status); 621 PetscFree(send_status); 622 } 623 PetscFree(send_waits); PetscFree(svalues); 624 625 return 0; 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatMult_MPIAIJ" 630 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 631 { 632 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 633 int ierr,nt; 634 635 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 636 if (nt != a->n) { 637 SETERRQ(1,0,"Incompatible partition of A and xx"); 638 } 639 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 640 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 641 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 642 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 643 return 0; 644 } 645 646 #undef __FUNC__ 647 #define __FUNC__ "MatMultAdd_MPIAIJ" 648 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 649 { 650 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 651 int ierr; 652 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 653 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 654 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 655 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 656 return 0; 657 } 658 659 #undef __FUNC__ 660 #define __FUNC__ "MatMultTrans_MPIAIJ" 661 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 662 { 663 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 664 int ierr; 665 666 /* do nondiagonal part */ 667 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 668 /* send it on its way */ 669 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 670 /* do local part */ 671 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 672 /* receive remote parts: note this assumes the values are not actually */ 673 /* inserted in yy until the next line, which is true for my implementation*/ 674 /* but is not perhaps always true. */ 675 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 return 0; 677 } 678 679 #undef __FUNC__ 680 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 681 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 682 { 683 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 684 int ierr; 685 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 696 return 0; 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNC__ 704 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 705 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 708 if (a->M != a->N) 709 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 710 if (a->rstart != a->cstart || a->rend != a->cend) { 711 SETERRQ(1,0,"row partition must equal col partition"); } 712 return MatGetDiagonal(a->A,v); 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatScale_MPIAIJ" 717 int MatScale_MPIAIJ(Scalar *aa,Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 720 int ierr; 721 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 722 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 723 return 0; 724 } 725 726 #undef __FUNC__ 727 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 728 int MatDestroy_MPIAIJ(PetscObject obj) 729 { 730 Mat mat = (Mat) obj; 731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 732 int ierr; 733 #if defined(PETSC_LOG) 734 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 735 #endif 736 PetscFree(aij->rowners); 737 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 738 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 739 if (aij->colmap) PetscFree(aij->colmap); 740 if (aij->garray) PetscFree(aij->garray); 741 if (aij->lvec) VecDestroy(aij->lvec); 742 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 743 if (aij->rowvalues) PetscFree(aij->rowvalues); 744 PetscFree(aij); 745 if (mat->mapping) { 746 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 747 } 748 PLogObjectDestroy(mat); 749 PetscHeaderDestroy(mat); 750 return 0; 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 755 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 758 int ierr; 759 760 if (aij->size == 1) { 761 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 762 } 763 else SETERRQ(1,0,"Only uniprocessor output supported"); 764 return 0; 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 769 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 773 int ierr, format,shift = C->indexshift,rank; 774 FILE *fd; 775 ViewerType vtype; 776 777 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 778 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 779 ierr = ViewerGetFormat(viewer,&format); 780 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 781 MatInfo info; 782 int flg; 783 MPI_Comm_rank(mat->comm,&rank); 784 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 785 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 786 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 787 PetscSequentialPhaseBegin(mat->comm,1); 788 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 789 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 790 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 791 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 792 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 793 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 794 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 795 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 796 fflush(fd); 797 PetscSequentialPhaseEnd(mat->comm,1); 798 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 799 return 0; 800 } 801 else if (format == VIEWER_FORMAT_ASCII_INFO) { 802 return 0; 803 } 804 } 805 806 if (vtype == DRAW_VIEWER) { 807 Draw draw; 808 PetscTruth isnull; 809 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 810 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 811 } 812 813 if (vtype == ASCII_FILE_VIEWER) { 814 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 815 PetscSequentialPhaseBegin(mat->comm,1); 816 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 817 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 818 aij->cend); 819 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 820 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 821 fflush(fd); 822 PetscSequentialPhaseEnd(mat->comm,1); 823 } 824 else { 825 int size = aij->size; 826 rank = aij->rank; 827 if (size == 1) { 828 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 829 } 830 else { 831 /* assemble the entire matrix onto first processor. */ 832 Mat A; 833 Mat_SeqAIJ *Aloc; 834 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 835 Scalar *a; 836 837 if (!rank) { 838 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 839 CHKERRQ(ierr); 840 } 841 else { 842 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 843 CHKERRQ(ierr); 844 } 845 PLogObjectParent(mat,A); 846 847 /* copy over the A part */ 848 Aloc = (Mat_SeqAIJ*) aij->A->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 852 for ( i=0; i<m; i++ ) { 853 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 854 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 855 } 856 aj = Aloc->j; 857 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 858 859 /* copy over the B part */ 860 Aloc = (Mat_SeqAIJ*) aij->B->data; 861 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 862 row = aij->rstart; 863 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 864 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 865 for ( i=0; i<m; i++ ) { 866 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 867 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 868 } 869 PetscFree(ct); 870 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 872 if (!rank) { 873 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 874 } 875 ierr = MatDestroy(A); CHKERRQ(ierr); 876 } 877 } 878 return 0; 879 } 880 881 #undef __FUNC__ 882 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 883 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 884 { 885 Mat mat = (Mat) obj; 886 int ierr; 887 ViewerType vtype; 888 889 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 890 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 891 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 892 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 893 } 894 else if (vtype == BINARY_FILE_VIEWER) { 895 return MatView_MPIAIJ_Binary(mat,viewer); 896 } 897 return 0; 898 } 899 900 /* 901 This has to provide several versions. 902 903 2) a) use only local smoothing updating outer values only once. 904 b) local smoothing updating outer values each inner iteration 905 3) color updating out values betwen colors. 906 */ 907 #undef __FUNC__ 908 #define __FUNC__ "MatRelax_MPIAIJ" 909 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 910 double fshift,int its,Vec xx) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 913 Mat AA = mat->A, BB = mat->B; 914 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 915 Scalar *b,*x,*xs,*ls,d,*v,sum; 916 int ierr,*idx, *diag; 917 int n = mat->n, m = mat->m, i,shift = A->indexshift; 918 919 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 920 xs = x + shift; /* shift by one for index start of 1 */ 921 ls = ls + shift; 922 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 923 diag = A->diag; 924 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 925 if (flag & SOR_ZERO_INITIAL_GUESS) { 926 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 927 } 928 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 929 CHKERRQ(ierr); 930 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 931 CHKERRQ(ierr); 932 while (its--) { 933 /* go down through the rows */ 934 for ( i=0; i<m; i++ ) { 935 n = A->i[i+1] - A->i[i]; 936 idx = A->j + A->i[i] + shift; 937 v = A->a + A->i[i] + shift; 938 sum = b[i]; 939 SPARSEDENSEMDOT(sum,xs,v,idx,n); 940 d = fshift + A->a[diag[i]+shift]; 941 n = B->i[i+1] - B->i[i]; 942 idx = B->j + B->i[i] + shift; 943 v = B->a + B->i[i] + shift; 944 SPARSEDENSEMDOT(sum,ls,v,idx,n); 945 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 946 } 947 /* come up through the rows */ 948 for ( i=m-1; i>-1; i-- ) { 949 n = A->i[i+1] - A->i[i]; 950 idx = A->j + A->i[i] + shift; 951 v = A->a + A->i[i] + shift; 952 sum = b[i]; 953 SPARSEDENSEMDOT(sum,xs,v,idx,n); 954 d = fshift + A->a[diag[i]+shift]; 955 n = B->i[i+1] - B->i[i]; 956 idx = B->j + B->i[i] + shift; 957 v = B->a + B->i[i] + shift; 958 SPARSEDENSEMDOT(sum,ls,v,idx,n); 959 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960 } 961 } 962 } 963 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 964 if (flag & SOR_ZERO_INITIAL_GUESS) { 965 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 966 } 967 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 968 CHKERRQ(ierr); 969 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 970 CHKERRQ(ierr); 971 while (its--) { 972 for ( i=0; i<m; i++ ) { 973 n = A->i[i+1] - A->i[i]; 974 idx = A->j + A->i[i] + shift; 975 v = A->a + A->i[i] + shift; 976 sum = b[i]; 977 SPARSEDENSEMDOT(sum,xs,v,idx,n); 978 d = fshift + A->a[diag[i]+shift]; 979 n = B->i[i+1] - B->i[i]; 980 idx = B->j + B->i[i] + shift; 981 v = B->a + B->i[i] + shift; 982 SPARSEDENSEMDOT(sum,ls,v,idx,n); 983 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 984 } 985 } 986 } 987 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 988 if (flag & SOR_ZERO_INITIAL_GUESS) { 989 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 990 } 991 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 992 mat->Mvctx); CHKERRQ(ierr); 993 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 994 mat->Mvctx); CHKERRQ(ierr); 995 while (its--) { 996 for ( i=m-1; i>-1; i-- ) { 997 n = A->i[i+1] - A->i[i]; 998 idx = A->j + A->i[i] + shift; 999 v = A->a + A->i[i] + shift; 1000 sum = b[i]; 1001 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1002 d = fshift + A->a[diag[i]+shift]; 1003 n = B->i[i+1] - B->i[i]; 1004 idx = B->j + B->i[i] + shift; 1005 v = B->a + B->i[i] + shift; 1006 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1007 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1008 } 1009 } 1010 } 1011 else { 1012 SETERRQ(1,0,"Option not supported"); 1013 } 1014 return 0; 1015 } 1016 1017 #undef __FUNC__ 1018 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1019 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1022 Mat A = mat->A, B = mat->B; 1023 int ierr; 1024 double isend[5], irecv[5]; 1025 1026 info->rows_global = (double)mat->M; 1027 info->columns_global = (double)mat->N; 1028 info->rows_local = (double)mat->m; 1029 info->columns_local = (double)mat->N; 1030 info->block_size = 1.0; 1031 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1032 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1033 isend[3] = info->memory; isend[4] = info->mallocs; 1034 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1035 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1036 isend[3] += info->memory; isend[4] += info->mallocs; 1037 if (flag == MAT_LOCAL) { 1038 info->nz_used = isend[0]; 1039 info->nz_allocated = isend[1]; 1040 info->nz_unneeded = isend[2]; 1041 info->memory = isend[3]; 1042 info->mallocs = isend[4]; 1043 } else if (flag == MAT_GLOBAL_MAX) { 1044 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1045 info->nz_used = irecv[0]; 1046 info->nz_allocated = irecv[1]; 1047 info->nz_unneeded = irecv[2]; 1048 info->memory = irecv[3]; 1049 info->mallocs = irecv[4]; 1050 } else if (flag == MAT_GLOBAL_SUM) { 1051 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1052 info->nz_used = irecv[0]; 1053 info->nz_allocated = irecv[1]; 1054 info->nz_unneeded = irecv[2]; 1055 info->memory = irecv[3]; 1056 info->mallocs = irecv[4]; 1057 } 1058 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1059 info->fill_ratio_needed = 0; 1060 info->factor_mallocs = 0; 1061 1062 return 0; 1063 } 1064 1065 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1066 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1067 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1068 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1069 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1070 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1071 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1072 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1073 1074 #undef __FUNC__ 1075 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1076 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1079 1080 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1081 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1082 op == MAT_COLUMNS_UNSORTED || 1083 op == MAT_COLUMNS_SORTED || 1084 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1085 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1086 MatSetOption(a->A,op); 1087 MatSetOption(a->B,op); 1088 } else if (op == MAT_ROW_ORIENTED) { 1089 a->roworiented = 1; 1090 MatSetOption(a->A,op); 1091 MatSetOption(a->B,op); 1092 } else if (op == MAT_ROWS_SORTED || 1093 op == MAT_ROWS_UNSORTED || 1094 op == MAT_SYMMETRIC || 1095 op == MAT_STRUCTURALLY_SYMMETRIC || 1096 op == MAT_YES_NEW_DIAGONALS) 1097 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1098 else if (op == MAT_COLUMN_ORIENTED) { 1099 a->roworiented = 0; 1100 MatSetOption(a->A,op); 1101 MatSetOption(a->B,op); 1102 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1103 a->donotstash = 1; 1104 } else if (op == MAT_NO_NEW_DIAGONALS) 1105 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1106 else 1107 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1108 return 0; 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1113 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1114 { 1115 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1116 *m = mat->M; *n = mat->N; 1117 return 0; 1118 } 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1122 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1123 { 1124 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1125 *m = mat->m; *n = mat->N; 1126 return 0; 1127 } 1128 1129 #undef __FUNC__ 1130 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1131 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1132 { 1133 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1134 *m = mat->rstart; *n = mat->rend; 1135 return 0; 1136 } 1137 1138 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1139 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatGetRow_MPIAIJ" 1143 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1146 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1147 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1148 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1149 int *cmap, *idx_p; 1150 1151 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1152 mat->getrowactive = PETSC_TRUE; 1153 1154 if (!mat->rowvalues && (idx || v)) { 1155 /* 1156 allocate enough space to hold information from the longest row. 1157 */ 1158 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1159 int max = 1,m = mat->m,tmp; 1160 for ( i=0; i<m; i++ ) { 1161 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1162 if (max < tmp) { max = tmp; } 1163 } 1164 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1165 CHKPTRQ(mat->rowvalues); 1166 mat->rowindices = (int *) (mat->rowvalues + max); 1167 } 1168 1169 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1170 lrow = row - rstart; 1171 1172 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1173 if (!v) {pvA = 0; pvB = 0;} 1174 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1175 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1176 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1177 nztot = nzA + nzB; 1178 1179 cmap = mat->garray; 1180 if (v || idx) { 1181 if (nztot) { 1182 /* Sort by increasing column numbers, assuming A and B already sorted */ 1183 int imark = -1; 1184 if (v) { 1185 *v = v_p = mat->rowvalues; 1186 for ( i=0; i<nzB; i++ ) { 1187 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1188 else break; 1189 } 1190 imark = i; 1191 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1192 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1193 } 1194 if (idx) { 1195 *idx = idx_p = mat->rowindices; 1196 if (imark > -1) { 1197 for ( i=0; i<imark; i++ ) { 1198 idx_p[i] = cmap[cworkB[i]]; 1199 } 1200 } else { 1201 for ( i=0; i<nzB; i++ ) { 1202 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1203 else break; 1204 } 1205 imark = i; 1206 } 1207 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1208 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1209 } 1210 } 1211 else { 1212 if (idx) *idx = 0; 1213 if (v) *v = 0; 1214 } 1215 } 1216 *nz = nztot; 1217 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1218 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1219 return 0; 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1224 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1225 { 1226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227 if (aij->getrowactive == PETSC_FALSE) { 1228 SETERRQ(1,0,"MatGetRow not called"); 1229 } 1230 aij->getrowactive = PETSC_FALSE; 1231 return 0; 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatNorm_MPIAIJ" 1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1239 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1240 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1241 double sum = 0.0; 1242 Scalar *v; 1243 1244 if (aij->size == 1) { 1245 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1246 } else { 1247 if (type == NORM_FROBENIUS) { 1248 v = amat->a; 1249 for (i=0; i<amat->nz; i++ ) { 1250 #if defined(PETSC_COMPLEX) 1251 sum += real(conj(*v)*(*v)); v++; 1252 #else 1253 sum += (*v)*(*v); v++; 1254 #endif 1255 } 1256 v = bmat->a; 1257 for (i=0; i<bmat->nz; i++ ) { 1258 #if defined(PETSC_COMPLEX) 1259 sum += real(conj(*v)*(*v)); v++; 1260 #else 1261 sum += (*v)*(*v); v++; 1262 #endif 1263 } 1264 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1265 *norm = sqrt(*norm); 1266 } 1267 else if (type == NORM_1) { /* max column norm */ 1268 double *tmp, *tmp2; 1269 int *jj, *garray = aij->garray; 1270 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1271 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1272 PetscMemzero(tmp,aij->N*sizeof(double)); 1273 *norm = 0.0; 1274 v = amat->a; jj = amat->j; 1275 for ( j=0; j<amat->nz; j++ ) { 1276 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1277 } 1278 v = bmat->a; jj = bmat->j; 1279 for ( j=0; j<bmat->nz; j++ ) { 1280 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1281 } 1282 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1283 for ( j=0; j<aij->N; j++ ) { 1284 if (tmp2[j] > *norm) *norm = tmp2[j]; 1285 } 1286 PetscFree(tmp); PetscFree(tmp2); 1287 } 1288 else if (type == NORM_INFINITY) { /* max row norm */ 1289 double ntemp = 0.0; 1290 for ( j=0; j<amat->m; j++ ) { 1291 v = amat->a + amat->i[j] + shift; 1292 sum = 0.0; 1293 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1294 sum += PetscAbsScalar(*v); v++; 1295 } 1296 v = bmat->a + bmat->i[j] + shift; 1297 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1298 sum += PetscAbsScalar(*v); v++; 1299 } 1300 if (sum > ntemp) ntemp = sum; 1301 } 1302 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1303 } 1304 else { 1305 SETERRQ(1,0,"No support for two norm"); 1306 } 1307 } 1308 return 0; 1309 } 1310 1311 #undef __FUNC__ 1312 #define __FUNC__ "MatTranspose_MPIAIJ" 1313 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1314 { 1315 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1316 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1317 int ierr,shift = Aloc->indexshift; 1318 Mat B; 1319 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1320 Scalar *array; 1321 1322 if (matout == PETSC_NULL && M != N) 1323 SETERRQ(1,0,"Square matrix only for in-place"); 1324 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1325 PETSC_NULL,&B); CHKERRQ(ierr); 1326 1327 /* copy over the A part */ 1328 Aloc = (Mat_SeqAIJ*) a->A->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1332 for ( i=0; i<m; i++ ) { 1333 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1334 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1335 } 1336 aj = Aloc->j; 1337 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1338 1339 /* copy over the B part */ 1340 Aloc = (Mat_SeqAIJ*) a->B->data; 1341 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342 row = a->rstart; 1343 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1344 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1345 for ( i=0; i<m; i++ ) { 1346 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1347 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1348 } 1349 PetscFree(ct); 1350 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1351 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1352 if (matout != PETSC_NULL) { 1353 *matout = B; 1354 } else { 1355 /* This isn't really an in-place transpose .... but free data structures from a */ 1356 PetscFree(a->rowners); 1357 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1358 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1359 if (a->colmap) PetscFree(a->colmap); 1360 if (a->garray) PetscFree(a->garray); 1361 if (a->lvec) VecDestroy(a->lvec); 1362 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1363 PetscFree(a); 1364 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1365 PetscHeaderDestroy(B); 1366 } 1367 return 0; 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1372 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1373 { 1374 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1375 Mat a = aij->A, b = aij->B; 1376 int ierr,s1,s2,s3; 1377 1378 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1379 if (rr) { 1380 s3 = aij->n; 1381 VecGetLocalSize_Fast(rr,s1); 1382 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1385 } 1386 if (ll) { 1387 VecGetLocalSize_Fast(ll,s1); 1388 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1389 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1390 } 1391 /* scale the diagonal block */ 1392 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1393 1394 if (rr) { 1395 /* Do a scatter end and then right scale the off-diagonal block */ 1396 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1397 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1398 } 1399 1400 return 0; 1401 } 1402 1403 1404 extern int MatPrintHelp_SeqAIJ(Mat); 1405 #undef __FUNC__ 1406 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1407 int MatPrintHelp_MPIAIJ(Mat A) 1408 { 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1410 1411 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1412 else return 0; 1413 } 1414 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1417 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1418 { 1419 *bs = 1; 1420 return 0; 1421 } 1422 #undef __FUNC__ 1423 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1424 int MatSetUnfactored_MPIAIJ(Mat A) 1425 { 1426 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1427 int ierr; 1428 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1429 return 0; 1430 } 1431 1432 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1433 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1434 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1435 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1436 /* -------------------------------------------------------------------*/ 1437 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1438 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1439 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1440 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1441 0,0, 1442 0,0, 1443 0,0, 1444 MatRelax_MPIAIJ, 1445 MatTranspose_MPIAIJ, 1446 MatGetInfo_MPIAIJ,0, 1447 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1448 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1449 0, 1450 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1451 0,0,0,0, 1452 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1453 0,0, 1454 0,0,MatConvertSameType_MPIAIJ,0,0, 1455 0,0,0, 1456 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1457 MatPrintHelp_MPIAIJ, 1458 MatScale_MPIAIJ,0,0,0, 1459 MatGetBlockSize_MPIAIJ,0,0,0,0, 1460 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1461 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ "MatCreateMPIAIJ" 1465 /*@C 1466 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1467 (the default parallel PETSc format). For good matrix assembly performance 1468 the user should preallocate the matrix storage by setting the parameters 1469 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1470 performance can be increased by more than a factor of 50. 1471 1472 Input Parameters: 1473 . comm - MPI communicator 1474 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1475 This value should be the same as the local size used in creating the 1476 y vector for the matrix-vector product y = Ax. 1477 . n - This value should be the same as the local size used in creating the 1478 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1479 calculated if N is given) For square matrices n is almost always m. 1480 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1481 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1482 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1483 (same for all local rows) 1484 . d_nzz - array containing the number of nonzeros in the various rows of the 1485 diagonal portion of the local submatrix (possibly different for each row) 1486 or PETSC_NULL. You must leave room for the diagonal entry even if 1487 it is zero. 1488 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1489 submatrix (same for all local rows). 1490 . o_nzz - array containing the number of nonzeros in the various rows of the 1491 off-diagonal portion of the local submatrix (possibly different for 1492 each row) or PETSC_NULL. 1493 1494 Output Parameter: 1495 . A - the matrix 1496 1497 Notes: 1498 The AIJ format (also called the Yale sparse matrix format or 1499 compressed row storage), is fully compatible with standard Fortran 77 1500 storage. That is, the stored row and column indices can begin at 1501 either one (as in Fortran) or zero. See the users manual for details. 1502 1503 The user MUST specify either the local or global matrix dimensions 1504 (possibly both). 1505 1506 By default, this format uses inodes (identical nodes) when possible. 1507 We search for consecutive rows with the same nonzero structure, thereby 1508 reusing matrix information to achieve increased efficiency. 1509 1510 Options Database Keys: 1511 $ -mat_aij_no_inode - Do not use inodes 1512 $ -mat_aij_inode_limit <limit> - Set inode limit. 1513 $ (max limit=5) 1514 $ -mat_aij_oneindex - Internally use indexing starting at 1 1515 $ rather than 0. Note: When calling MatSetValues(), 1516 $ the user still MUST index entries starting at 0! 1517 1518 Storage Information: 1519 For a square global matrix we define each processor's diagonal portion 1520 to be its local rows and the corresponding columns (a square submatrix); 1521 each processor's off-diagonal portion encompasses the remainder of the 1522 local matrix (a rectangular submatrix). 1523 1524 The user can specify preallocated storage for the diagonal part of 1525 the local submatrix with either d_nz or d_nnz (not both). Set 1526 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1527 memory allocation. Likewise, specify preallocated storage for the 1528 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1529 1530 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1531 the figure below we depict these three local rows and all columns (0-11). 1532 1533 $ 0 1 2 3 4 5 6 7 8 9 10 11 1534 $ ------------------- 1535 $ row 3 | o o o d d d o o o o o o 1536 $ row 4 | o o o d d d o o o o o o 1537 $ row 5 | o o o d d d o o o o o o 1538 $ ------------------- 1539 $ 1540 1541 Thus, any entries in the d locations are stored in the d (diagonal) 1542 submatrix, and any entries in the o locations are stored in the 1543 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1544 stored simply in the MATSEQAIJ format for compressed row storage. 1545 1546 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1547 and o_nz should indicate the number of nonzeros per row in the o matrix. 1548 In general, for PDE problems in which most nonzeros are near the diagonal, 1549 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1550 or you will get TERRIBLE performance; see the users' manual chapter on 1551 matrices. 1552 1553 .keywords: matrix, aij, compressed row, sparse, parallel 1554 1555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1556 @*/ 1557 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1558 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1559 { 1560 Mat B; 1561 Mat_MPIAIJ *b; 1562 int ierr, i,sum[2],work[2],size; 1563 1564 *A = 0; 1565 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1566 PLogObjectCreate(B); 1567 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1568 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1569 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1570 MPI_Comm_size(comm,&size); 1571 if (size == 1) { 1572 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1573 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1574 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1575 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1576 B->ops.lufactor = MatLUFactor_MPIAIJ; 1577 B->ops.solve = MatSolve_MPIAIJ; 1578 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1579 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1580 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1581 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1582 } 1583 B->destroy = MatDestroy_MPIAIJ; 1584 B->view = MatView_MPIAIJ; 1585 B->factor = 0; 1586 B->assembled = PETSC_FALSE; 1587 B->mapping = 0; 1588 1589 B->insertmode = NOT_SET_VALUES; 1590 MPI_Comm_rank(comm,&b->rank); 1591 MPI_Comm_size(comm,&b->size); 1592 1593 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1594 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1595 1596 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1597 work[0] = m; work[1] = n; 1598 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1599 if (M == PETSC_DECIDE) M = sum[0]; 1600 if (N == PETSC_DECIDE) N = sum[1]; 1601 } 1602 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1603 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1604 b->m = m; B->m = m; 1605 b->n = n; B->n = n; 1606 b->N = N; B->N = N; 1607 b->M = M; B->M = M; 1608 1609 /* build local table of row and column ownerships */ 1610 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1611 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1612 b->cowners = b->rowners + b->size + 2; 1613 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1614 b->rowners[0] = 0; 1615 for ( i=2; i<=b->size; i++ ) { 1616 b->rowners[i] += b->rowners[i-1]; 1617 } 1618 b->rstart = b->rowners[b->rank]; 1619 b->rend = b->rowners[b->rank+1]; 1620 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1621 b->cowners[0] = 0; 1622 for ( i=2; i<=b->size; i++ ) { 1623 b->cowners[i] += b->cowners[i-1]; 1624 } 1625 b->cstart = b->cowners[b->rank]; 1626 b->cend = b->cowners[b->rank+1]; 1627 1628 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1629 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1630 PLogObjectParent(B,b->A); 1631 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1632 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1633 PLogObjectParent(B,b->B); 1634 1635 /* build cache for off array entries formed */ 1636 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1637 b->donotstash = 0; 1638 b->colmap = 0; 1639 b->garray = 0; 1640 b->roworiented = 1; 1641 1642 /* stuff used for matrix vector multiply */ 1643 b->lvec = 0; 1644 b->Mvctx = 0; 1645 1646 /* stuff for MatGetRow() */ 1647 b->rowindices = 0; 1648 b->rowvalues = 0; 1649 b->getrowactive = PETSC_FALSE; 1650 1651 *A = B; 1652 return 0; 1653 } 1654 1655 #undef __FUNC__ 1656 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1657 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1658 { 1659 Mat mat; 1660 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1661 int ierr, len=0, flg; 1662 1663 *newmat = 0; 1664 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1665 PLogObjectCreate(mat); 1666 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1667 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1668 mat->destroy = MatDestroy_MPIAIJ; 1669 mat->view = MatView_MPIAIJ; 1670 mat->factor = matin->factor; 1671 mat->assembled = PETSC_TRUE; 1672 1673 a->m = mat->m = oldmat->m; 1674 a->n = mat->n = oldmat->n; 1675 a->M = mat->M = oldmat->M; 1676 a->N = mat->N = oldmat->N; 1677 1678 a->rstart = oldmat->rstart; 1679 a->rend = oldmat->rend; 1680 a->cstart = oldmat->cstart; 1681 a->cend = oldmat->cend; 1682 a->size = oldmat->size; 1683 a->rank = oldmat->rank; 1684 mat->insertmode = NOT_SET_VALUES; 1685 a->rowvalues = 0; 1686 a->getrowactive = PETSC_FALSE; 1687 1688 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1689 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1690 a->cowners = a->rowners + a->size + 2; 1691 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1692 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1693 if (oldmat->colmap) { 1694 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1695 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1696 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1697 } else a->colmap = 0; 1698 if (oldmat->garray) { 1699 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1700 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1701 PLogObjectMemory(mat,len*sizeof(int)); 1702 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1703 } else a->garray = 0; 1704 1705 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1706 PLogObjectParent(mat,a->lvec); 1707 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1708 PLogObjectParent(mat,a->Mvctx); 1709 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1710 PLogObjectParent(mat,a->A); 1711 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1712 PLogObjectParent(mat,a->B); 1713 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1714 if (flg) { 1715 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1716 } 1717 *newmat = mat; 1718 return 0; 1719 } 1720 1721 #include "sys.h" 1722 1723 #undef __FUNC__ 1724 #define __FUNC__ "MatLoad_MPIAIJ" 1725 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1726 { 1727 Mat A; 1728 int i, nz, ierr, j,rstart, rend, fd; 1729 Scalar *vals,*svals; 1730 MPI_Comm comm = ((PetscObject)viewer)->comm; 1731 MPI_Status status; 1732 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1733 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1734 int tag = ((PetscObject)viewer)->tag; 1735 1736 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1737 if (!rank) { 1738 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1740 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1741 } 1742 1743 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1744 M = header[1]; N = header[2]; 1745 /* determine ownership of all rows */ 1746 m = M/size + ((M % size) > rank); 1747 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1748 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1749 rowners[0] = 0; 1750 for ( i=2; i<=size; i++ ) { 1751 rowners[i] += rowners[i-1]; 1752 } 1753 rstart = rowners[rank]; 1754 rend = rowners[rank+1]; 1755 1756 /* distribute row lengths to all processors */ 1757 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1758 offlens = ourlens + (rend-rstart); 1759 if (!rank) { 1760 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1761 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1762 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1763 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1764 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1765 PetscFree(sndcounts); 1766 } 1767 else { 1768 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1769 } 1770 1771 if (!rank) { 1772 /* calculate the number of nonzeros on each processor */ 1773 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1774 PetscMemzero(procsnz,size*sizeof(int)); 1775 for ( i=0; i<size; i++ ) { 1776 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1777 procsnz[i] += rowlengths[j]; 1778 } 1779 } 1780 PetscFree(rowlengths); 1781 1782 /* determine max buffer needed and allocate it */ 1783 maxnz = 0; 1784 for ( i=0; i<size; i++ ) { 1785 maxnz = PetscMax(maxnz,procsnz[i]); 1786 } 1787 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1788 1789 /* read in my part of the matrix column indices */ 1790 nz = procsnz[0]; 1791 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1792 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1793 1794 /* read in every one elses and ship off */ 1795 for ( i=1; i<size; i++ ) { 1796 nz = procsnz[i]; 1797 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1798 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1799 } 1800 PetscFree(cols); 1801 } 1802 else { 1803 /* determine buffer space needed for message */ 1804 nz = 0; 1805 for ( i=0; i<m; i++ ) { 1806 nz += ourlens[i]; 1807 } 1808 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1809 1810 /* receive message of column indices*/ 1811 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1812 MPI_Get_count(&status,MPI_INT,&maxnz); 1813 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1814 } 1815 1816 /* loop over local rows, determining number of off diagonal entries */ 1817 PetscMemzero(offlens,m*sizeof(int)); 1818 jj = 0; 1819 for ( i=0; i<m; i++ ) { 1820 for ( j=0; j<ourlens[i]; j++ ) { 1821 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1822 jj++; 1823 } 1824 } 1825 1826 /* create our matrix */ 1827 for ( i=0; i<m; i++ ) { 1828 ourlens[i] -= offlens[i]; 1829 } 1830 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1831 A = *newmat; 1832 MatSetOption(A,MAT_COLUMNS_SORTED); 1833 for ( i=0; i<m; i++ ) { 1834 ourlens[i] += offlens[i]; 1835 } 1836 1837 if (!rank) { 1838 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1839 1840 /* read in my part of the matrix numerical values */ 1841 nz = procsnz[0]; 1842 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1843 1844 /* insert into matrix */ 1845 jj = rstart; 1846 smycols = mycols; 1847 svals = vals; 1848 for ( i=0; i<m; i++ ) { 1849 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1850 smycols += ourlens[i]; 1851 svals += ourlens[i]; 1852 jj++; 1853 } 1854 1855 /* read in other processors and ship out */ 1856 for ( i=1; i<size; i++ ) { 1857 nz = procsnz[i]; 1858 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1859 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1860 } 1861 PetscFree(procsnz); 1862 } 1863 else { 1864 /* receive numeric values */ 1865 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1866 1867 /* receive message of values*/ 1868 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1869 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1870 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1871 1872 /* insert into matrix */ 1873 jj = rstart; 1874 smycols = mycols; 1875 svals = vals; 1876 for ( i=0; i<m; i++ ) { 1877 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1878 smycols += ourlens[i]; 1879 svals += ourlens[i]; 1880 jj++; 1881 } 1882 } 1883 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1884 1885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1887 return 0; 1888 } 1889