1 #ifndef lint 2 static char vcid[] = "$Id: mpibaij.c,v 1.3 1996/06/19 23:03:32 balay Exp balay $"; 3 #endif 4 5 #include "mpibaij.h" 6 7 8 #include "draw.h" 9 #include "pinclude/pviewer.h" 10 11 extern int MatSetUpMultiply_MPIBAIJ(Mat); 12 extern int DisAssemble_MPIBAIJ(Mat); 13 14 /* local utility routine that creates a mapping from the global column 15 number to the local number in the off-diagonal part of the local 16 storage of the matrix. This is done in a non scable way since the 17 length of colmap equals the global matrix length. 18 */ 19 static int CreateColmap_MPIBAIJ_Private(Mat mat) 20 { 21 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 23 int nbs = B->nbs,i; 24 25 baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 26 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 27 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 28 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 29 return 0; 30 } 31 32 33 static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 34 { 35 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 36 int ierr; 37 if (baij->size == 1) { 38 ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 39 } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 40 return 0; 41 } 42 43 static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 46 Scalar value; 47 int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 48 int cstart = baij->cstart, cend = baij->cend,row,col; 49 int roworiented = baij->roworiented,rstart_orig,rend_orig; 50 int cstart_orig,cend_orig,bs=baij->bs; 51 52 if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 53 SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 54 } 55 baij->insertmode = addv; 56 rstart_orig = rstart*bs; 57 rend_orig = rend*bs; 58 cstart_orig = cstart*bs; 59 cend_orig = cend*bs; 60 for ( i=0; i<m; i++ ) { 61 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 62 if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 63 if (im[i] >= rstart_orig && im[i] < rend_orig) { 64 row = im[i] - rstart_orig; 65 for ( j=0; j<n; j++ ) { 66 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 67 if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 68 if (in[j] >= cstart_orig && in[j] < cend_orig){ 69 col = in[j] - cstart_orig; 70 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 71 ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 72 } 73 else { 74 if (mat->was_assembled) { 75 if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 76 col = baij->colmap[in[j]]; 77 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 78 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 79 col = in[j]; 80 } 81 } 82 else col = in[j]; 83 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 84 ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 85 } 86 } 87 } 88 else { 89 if (roworiented) { 90 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 91 } 92 else { 93 row = im[i]; 94 for ( j=0; j<n; j++ ) { 95 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 96 } 97 } 98 } 99 } 100 return 0; 101 } 102 103 104 static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 105 { 106 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 107 MPI_Comm comm = mat->comm; 108 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 109 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 110 MPI_Request *send_waits,*recv_waits; 111 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 112 InsertMode addv; 113 Scalar *rvalues,*svalues; 114 115 /* make sure all processors are either in INSERTMODE or ADDMODE */ 116 MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 117 if (addv == (ADD_VALUES|INSERT_VALUES)) { 118 SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 119 } 120 baij->insertmode = addv; /* in case this processor had no cache */ 121 122 /* first count number of contributors to each processor */ 123 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 124 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 125 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 126 for ( i=0; i<baij->stash.n; i++ ) { 127 idx = baij->stash.idx[i]; 128 for ( j=0; j<size; j++ ) { 129 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 130 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 131 } 132 } 133 } 134 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 135 136 /* inform other processors of number of messages and max length*/ 137 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 138 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 139 nreceives = work[rank]; 140 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 141 nmax = work[rank]; 142 PetscFree(work); 143 144 /* post receives: 145 1) each message will consist of ordered pairs 146 (global index,value) we store the global index as a double 147 to simplify the message passing. 148 2) since we don't know how long each individual message is we 149 allocate the largest needed buffer for each receive. Potentially 150 this is a lot of wasted space. 151 152 153 This could be done better. 154 */ 155 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 156 CHKPTRQ(rvalues); 157 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 158 CHKPTRQ(recv_waits); 159 for ( i=0; i<nreceives; i++ ) { 160 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 161 comm,recv_waits+i); 162 } 163 164 /* do sends: 165 1) starts[i] gives the starting index in svalues for stuff going to 166 the ith processor 167 */ 168 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 169 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 170 CHKPTRQ(send_waits); 171 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 172 starts[0] = 0; 173 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 174 for ( i=0; i<baij->stash.n; i++ ) { 175 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 176 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 177 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 178 } 179 PetscFree(owner); 180 starts[0] = 0; 181 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 182 count = 0; 183 for ( i=0; i<size; i++ ) { 184 if (procs[i]) { 185 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 186 comm,send_waits+count++); 187 } 188 } 189 PetscFree(starts); PetscFree(nprocs); 190 191 /* Free cache space */ 192 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 193 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 194 195 baij->svalues = svalues; baij->rvalues = rvalues; 196 baij->nsends = nsends; baij->nrecvs = nreceives; 197 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 198 baij->rmax = nmax; 199 200 return 0; 201 } 202 203 204 static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 205 { 206 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 207 MPI_Status *send_status,recv_status; 208 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 209 int bs=baij->bs,row,col,other_disassembled; 210 Scalar *values,val; 211 InsertMode addv = baij->insertmode; 212 213 /* wait on receives */ 214 while (count) { 215 MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 216 /* unpack receives into our local space */ 217 values = baij->rvalues + 3*imdex*baij->rmax; 218 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 219 n = n/3; 220 for ( i=0; i<n; i++ ) { 221 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 222 col = (int) PetscReal(values[3*i+1]); 223 val = values[3*i+2]; 224 if (col >= baij->cstart*bs && col < baij->cend*bs) { 225 col -= baij->cstart*bs; 226 MatSetValues(baij->A,1,&row,1,&col,&val,addv); 227 } 228 else { 229 if (mat->was_assembled) { 230 if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 231 col = baij->colmap[col/bs]*bs + col%bs; 232 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 233 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 234 col = (int) PetscReal(values[3*i+1]); 235 } 236 } 237 MatSetValues(baij->B,1,&row,1,&col,&val,addv); 238 } 239 } 240 count--; 241 } 242 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 243 244 /* wait on sends */ 245 if (baij->nsends) { 246 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 247 CHKPTRQ(send_status); 248 MPI_Waitall(baij->nsends,baij->send_waits,send_status); 249 PetscFree(send_status); 250 } 251 PetscFree(baij->send_waits); PetscFree(baij->svalues); 252 253 baij->insertmode = NOT_SET_VALUES; 254 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 255 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 256 257 /* determine if any processor has disassembled, if so we must 258 also disassemble ourselfs, in order that we may reassemble. */ 259 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 260 if (mat->was_assembled && !other_disassembled) { 261 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 262 } 263 264 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 265 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 269 270 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 271 return 0; 272 } 273 274 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 int ierr; 278 279 if (baij->size == 1) { 280 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 281 } 282 else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 283 return 0; 284 } 285 286 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 287 { 288 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 289 int ierr, format,rank,bs=baij->bs; 290 FILE *fd; 291 ViewerType vtype; 292 293 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 294 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 295 ierr = ViewerGetFormat(viewer,&format); 296 if (format == ASCII_FORMAT_INFO_DETAILED) { 297 int nz, nzalloc, mem; 298 MPI_Comm_rank(mat->comm,&rank); 299 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 300 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 301 PetscSequentialPhaseBegin(mat->comm,1); 302 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 303 rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 304 ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 305 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 306 ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 307 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 308 fflush(fd); 309 PetscSequentialPhaseEnd(mat->comm,1); 310 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 311 return 0; 312 } 313 else if (format == ASCII_FORMAT_INFO) { 314 return 0; 315 } 316 } 317 318 if (vtype == DRAW_VIEWER) { 319 Draw draw; 320 PetscTruth isnull; 321 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 322 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 323 } 324 325 if (vtype == ASCII_FILE_VIEWER) { 326 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 327 PetscSequentialPhaseBegin(mat->comm,1); 328 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 329 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 330 baij->cstart*bs,baij->cend*bs); 331 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 332 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 333 fflush(fd); 334 PetscSequentialPhaseEnd(mat->comm,1); 335 } 336 else { 337 int size = baij->size; 338 rank = baij->rank; 339 if (size == 1) { 340 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 341 } 342 else { 343 /* assemble the entire matrix onto first processor. */ 344 Mat A; 345 Mat_SeqBAIJ *Aloc; 346 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 347 int mbs=baij->mbs; 348 Scalar *a; 349 350 if (!rank) { 351 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 352 CHKERRQ(ierr); 353 } 354 else { 355 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 356 CHKERRQ(ierr); 357 } 358 PLogObjectParent(mat,A); 359 360 /* copy over the A part */ 361 Aloc = (Mat_SeqBAIJ*) baij->A->data; 362 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 363 row = baij->rstart; 364 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 365 366 for ( i=0; i<mbs; i++ ) { 367 rvals[0] = bs*(baij->rstart + i); 368 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 369 for ( j=ai[i]; j<ai[i+1]; j++ ) { 370 col = (baij->cstart+aj[j])*bs; 371 for (k=0; k<bs; k++ ) { 372 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 373 col++; a += bs; 374 } 375 } 376 } 377 /* copy over the B part */ 378 Aloc = (Mat_SeqBAIJ*) baij->B->data; 379 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 380 row = baij->rstart*bs; 381 for ( i=0; i<mbs; i++ ) { 382 rvals[0] = bs*(baij->rstart + i); 383 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 384 for ( j=ai[i]; j<ai[i+1]; j++ ) { 385 col = baij->garray[aj[j]]*bs; 386 for (k=0; k<bs; k++ ) { 387 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 388 col++; a += bs; 389 } 390 } 391 } 392 PetscFree(rvals); 393 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 395 if (!rank) { 396 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 397 } 398 ierr = MatDestroy(A); CHKERRQ(ierr); 399 } 400 } 401 return 0; 402 } 403 404 405 406 static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 407 { 408 Mat mat = (Mat) obj; 409 int ierr; 410 ViewerType vtype; 411 412 if (!viewer) { 413 viewer = STDOUT_VIEWER_SELF; 414 } 415 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 416 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 417 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 418 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 419 } 420 else if (vtype == BINARY_FILE_VIEWER) { 421 return MatView_MPIBAIJ_Binary(mat,viewer); 422 } 423 return 0; 424 } 425 426 static int MatDestroy_MPIBAIJ(PetscObject obj) 427 { 428 Mat mat = (Mat) obj; 429 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 430 int ierr; 431 432 #if defined(PETSC_LOG) 433 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 434 #endif 435 436 PetscFree(baij->rowners); 437 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 438 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 439 if (baij->colmap) PetscFree(baij->colmap); 440 if (baij->garray) PetscFree(baij->garray); 441 if (baij->lvec) VecDestroy(baij->lvec); 442 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 443 if (baij->rowvalues) PetscFree(baij->rowvalues); 444 PetscFree(baij); 445 PLogObjectDestroy(mat); 446 PetscHeaderDestroy(mat); 447 return 0; 448 } 449 450 static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 451 { 452 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 453 int ierr; 454 455 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 456 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 457 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 458 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 459 return 0; 460 } 461 462 static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 465 int ierr; 466 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 467 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 468 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 469 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 470 return 0; 471 } 472 473 static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 476 int ierr; 477 478 /* do nondiagonal part */ 479 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 480 /* send it on its way */ 481 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 482 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 483 /* do local part */ 484 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 485 /* receive remote parts: note this assumes the values are not actually */ 486 /* inserted in yy until the next line, which is true for my implementation*/ 487 /* but is not perhaps always true. */ 488 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 return 0; 491 } 492 493 static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 494 { 495 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 496 int ierr; 497 498 /* do nondiagonal part */ 499 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 500 /* send it on its way */ 501 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 502 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 503 /* do local part */ 504 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 505 /* receive remote parts: note this assumes the values are not actually */ 506 /* inserted in yy until the next line, which is true for my implementation*/ 507 /* but is not perhaps always true. */ 508 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 509 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 510 return 0; 511 } 512 513 /* 514 This only works correctly for square matrices where the subblock A->A is the 515 diagonal block 516 */ 517 static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 518 { 519 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 520 if (a->M != a->N) 521 SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 522 return MatGetDiagonal(a->A,v); 523 } 524 525 static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 526 { 527 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 528 int ierr; 529 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 530 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 531 return 0; 532 } 533 static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 534 { 535 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 536 *m = mat->M; *n = mat->N; 537 return 0; 538 } 539 540 static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 541 { 542 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 543 *m = mat->m; *n = mat->N; 544 return 0; 545 } 546 547 static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 548 { 549 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 550 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 551 return 0; 552 } 553 554 /* -------------------------------------------------------------------*/ 555 static struct _MatOps MatOps = { 556 MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ, 557 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 558 0,0,0,0, 559 0,0,0,0, 560 0,MatGetDiagonal_MPIBAIJ,0,0, 561 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 562 0,0,MatGetReordering_MPIBAIJ,0, 563 0,0,0,MatGetSize_MPIBAIJ, 564 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 565 0,0,0,0, 566 0,0,0,0, 567 0,0,0,0, 568 0,0,0,0, 569 MatScale_MPIBAIJ,0,0}; 570 571 572 /*@C 573 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 574 (block compressed row). For good matrix assembly performance 575 the user should preallocate the matrix storage by setting the parameters 576 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 577 performance can be increased by more than a factor of 50. 578 579 Input Parameters: 580 . comm - MPI communicator 581 . bs - size of blockk 582 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 583 . n - number of local columns (or PETSC_DECIDE to have calculated 584 if N is given) 585 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 586 . N - number of global columns (or PETSC_DECIDE to have calculated 587 if n is given) 588 . d_nz - number of block nonzeros per block row in diagonal portion of local 589 submatrix (same for all local rows) 590 . d_nzz - number of block nonzeros per block row in diagonal portion of local 591 submatrix or null (possibly different for each row). You must leave 592 room for the diagonal entry even if it is zero. 593 . o_nz - number of block nonzeros per block row in off-diagonal portion of local 594 submatrix (same for all local rows). 595 . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 596 submatrix or null (possibly different for each row). 597 598 Output Parameter: 599 . A - the matrix 600 601 Notes: 602 The user MUST specify either the local or global matrix dimensions 603 (possibly both). 604 605 Storage Information: 606 For a square global matrix we define each processor's diagonal portion 607 to be its local rows and the corresponding columns (a square submatrix); 608 each processor's off-diagonal portion encompasses the remainder of the 609 local matrix (a rectangular submatrix). 610 611 The user can specify preallocated storage for the diagonal part of 612 the local submatrix with either d_nz or d_nnz (not both). Set 613 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 614 memory allocation. Likewise, specify preallocated storage for the 615 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 616 617 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 618 the figure below we depict these three local rows and all columns (0-11). 619 620 $ 0 1 2 3 4 5 6 7 8 9 10 11 621 $ ------------------- 622 $ row 3 | o o o d d d o o o o o o 623 $ row 4 | o o o d d d o o o o o o 624 $ row 5 | o o o d d d o o o o o o 625 $ ------------------- 626 $ 627 628 Thus, any entries in the d locations are stored in the d (diagonal) 629 submatrix, and any entries in the o locations are stored in the 630 o (off-diagonal) submatrix. Note that the d and the o submatrices are 631 stored simply in the MATSEQBAIJ format for compressed row storage. 632 633 Now d_nz should indicate the number of nonzeros per row in the d matrix, 634 and o_nz should indicate the number of nonzeros per row in the o matrix. 635 In general, for PDE problems in which most nonzeros are near the diagonal, 636 one expects d_nz >> o_nz. For additional details, see the users manual 637 chapter on matrices and the file $(PETSC_DIR)/Performance. 638 639 .keywords: matrix, aij, compressed row, sparse, parallel 640 641 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 642 @*/ 643 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 644 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 645 { 646 Mat B; 647 Mat_MPIBAIJ *b; 648 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 649 650 if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 651 *A = 0; 652 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 653 PLogObjectCreate(B); 654 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 655 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 656 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 657 B->destroy = MatDestroy_MPIBAIJ; 658 B->view = MatView_MPIBAIJ; 659 660 B->factor = 0; 661 B->assembled = PETSC_FALSE; 662 663 b->insertmode = NOT_SET_VALUES; 664 MPI_Comm_rank(comm,&b->rank); 665 MPI_Comm_size(comm,&b->size); 666 667 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 668 SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 669 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 670 if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 671 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 672 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 673 674 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 675 work[0] = m; work[1] = n; 676 mbs = m/bs; nbs = n/bs; 677 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 678 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 679 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 680 } 681 if (m == PETSC_DECIDE) { 682 Mbs = M/bs; 683 if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 684 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 685 m = mbs*bs; 686 } 687 if (n == PETSC_DECIDE) { 688 Nbs = N/bs; 689 if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 690 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 691 n = nbs*bs; 692 } 693 if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 694 695 b->m = m; B->m = m; 696 b->n = n; B->n = n; 697 b->N = N; B->N = N; 698 b->M = M; B->M = M; 699 b->bs = bs; 700 b->bs2 = bs*bs; 701 b->mbs = mbs; 702 b->nbs = nbs; 703 b->Mbs = Mbs; 704 b->Nbs = Nbs; 705 706 /* build local table of row and column ownerships */ 707 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 708 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 709 b->cowners = b->rowners + b->size + 1; 710 MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 711 b->rowners[0] = 0; 712 for ( i=2; i<=b->size; i++ ) { 713 b->rowners[i] += b->rowners[i-1]; 714 } 715 b->rstart = b->rowners[b->rank]; 716 b->rend = b->rowners[b->rank+1]; 717 MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 718 b->cowners[0] = 0; 719 for ( i=2; i<=b->size; i++ ) { 720 b->cowners[i] += b->cowners[i-1]; 721 } 722 b->cstart = b->cowners[b->rank]; 723 b->cend = b->cowners[b->rank+1]; 724 725 if (d_nz == PETSC_DEFAULT) d_nz = 5; 726 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 727 PLogObjectParent(B,b->A); 728 if (o_nz == PETSC_DEFAULT) o_nz = 0; 729 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 730 PLogObjectParent(B,b->B); 731 732 /* build cache for off array entries formed */ 733 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 734 b->colmap = 0; 735 b->garray = 0; 736 b->roworiented = 1; 737 738 /* stuff used for matrix vector multiply */ 739 b->lvec = 0; 740 b->Mvctx = 0; 741 742 /* stuff for MatGetRow() */ 743 b->rowindices = 0; 744 b->rowvalues = 0; 745 b->getrowactive = PETSC_FALSE; 746 747 *A = B; 748 return 0; 749 } 750 751 #include "sys.h" 752 753 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 754 { 755 Mat A; 756 int i, nz, ierr, j,rstart, rend, fd; 757 Scalar *vals,*buf; 758 MPI_Comm comm = ((PetscObject)viewer)->comm; 759 MPI_Status status; 760 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 761 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 762 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 763 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 764 int dcount,kmax,k,nzcount,tmp; 765 766 767 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 768 bs2 = bs*bs; 769 770 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 771 if (!rank) { 772 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 773 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 774 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 775 } 776 777 MPI_Bcast(header+1,3,MPI_INT,0,comm); 778 M = header[1]; N = header[2]; 779 780 781 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 782 783 /* 784 This code adds extra rows to make sure the number of rows is 785 divisible by the blocksize 786 */ 787 Mbs = M/bs; 788 extra_rows = bs - M + bs*(Mbs); 789 if (extra_rows == bs) extra_rows = 0; 790 else Mbs++; 791 if (extra_rows &&!rank) { 792 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 793 } 794 /* determine ownership of all rows */ 795 mbs = Mbs/size + ((Mbs % size) > rank); 796 m = mbs * bs; 797 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 798 browners = rowners + size + 1; 799 MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 800 rowners[0] = 0; 801 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 802 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 803 rstart = rowners[rank]; 804 rend = rowners[rank+1]; 805 806 /* distribute row lengths to all processors */ 807 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 808 if (!rank) { 809 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 810 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 811 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 812 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 813 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 814 MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 815 PetscFree(sndcounts); 816 } 817 else { 818 MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 819 } 820 821 if (!rank) { 822 /* calculate the number of nonzeros on each processor */ 823 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 824 PetscMemzero(procsnz,size*sizeof(int)); 825 for ( i=0; i<size; i++ ) { 826 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 827 procsnz[i] += rowlengths[j]; 828 } 829 } 830 PetscFree(rowlengths); 831 832 /* determine max buffer needed and allocate it */ 833 maxnz = 0; 834 for ( i=0; i<size; i++ ) { 835 maxnz = PetscMax(maxnz,procsnz[i]); 836 } 837 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 838 839 /* read in my part of the matrix column indices */ 840 nz = procsnz[0]; 841 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 842 mycols = ibuf; 843 if (size == 1) nz -= extra_rows; 844 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 845 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 846 847 /* read in every ones (except the last) and ship off */ 848 for ( i=1; i<size-1; i++ ) { 849 nz = procsnz[i]; 850 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 851 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 852 } 853 /* read in the stuff for the last proc */ 854 if ( size != 1 ) { 855 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 856 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 857 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 858 MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 859 } 860 PetscFree(cols); 861 } 862 else { 863 /* determine buffer space needed for message */ 864 nz = 0; 865 for ( i=0; i<m; i++ ) { 866 nz += locrowlens[i]; 867 } 868 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 869 mycols = ibuf; 870 /* receive message of column indices*/ 871 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 872 MPI_Get_count(&status,MPI_INT,&maxnz); 873 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 874 } 875 876 /* loop over local rows, determining number of off diagonal entries */ 877 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 878 odlens = dlens + (rend-rstart); 879 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 880 PetscMemzero(mask,3*Mbs*sizeof(int)); 881 masked1 = mask + Mbs; 882 masked2 = masked1 + Mbs; 883 rowcount = 0; nzcount = 0; 884 for ( i=0; i<mbs; i++ ) { 885 dcount = 0; 886 odcount = 0; 887 for ( j=0; j<bs; j++ ) { 888 kmax = locrowlens[rowcount]; 889 for ( k=0; k<kmax; k++ ) { 890 tmp = mycols[nzcount++]/bs; 891 if (!mask[tmp]) { 892 mask[tmp] = 1; 893 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 894 else masked1[dcount++] = tmp; 895 } 896 } 897 rowcount++; 898 } 899 900 dlens[i] = dcount; 901 odlens[i] = odcount; 902 903 /* zero out the mask elements we set */ 904 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 905 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 906 } 907 908 /* create our matrix */ 909 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 910 A = *newmat; 911 MatSetOption(A,COLUMNS_SORTED); 912 913 if (!rank) { 914 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 915 /* read in my part of the matrix numerical values */ 916 nz = procsnz[0]; 917 vals = buf; 918 mycols = ibuf; 919 if (size == 1) nz -= extra_rows; 920 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 921 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 922 /* insert into matrix */ 923 jj = rstart*bs; 924 for ( i=0; i<m; i++ ) { 925 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 926 mycols += locrowlens[i]; 927 vals += locrowlens[i]; 928 jj++; 929 } 930 /* read in other processors( except the last one) and ship out */ 931 for ( i=1; i<size-1; i++ ) { 932 nz = procsnz[i]; 933 vals = buf; 934 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 935 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 936 } 937 /* the last proc */ 938 if ( size != 1 ){ 939 nz = procsnz[i] - extra_rows; 940 vals = buf; 941 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 942 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 943 MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 944 } 945 PetscFree(procsnz); 946 } 947 else { 948 /* receive numeric values */ 949 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 950 951 /* receive message of values*/ 952 vals = buf; 953 mycols = ibuf; 954 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 955 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 956 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 957 958 /* insert into matrix */ 959 jj = rstart*bs; 960 for ( i=0; i<m; i++ ) { 961 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 962 mycols += locrowlens[i]; 963 vals += locrowlens[i]; 964 jj++; 965 } 966 } 967 PetscFree(locrowlens); 968 PetscFree(buf); 969 PetscFree(ibuf); 970 PetscFree(rowners); 971 PetscFree(dlens); 972 PetscFree(mask); 973 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 974 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 975 return 0; 976 } 977 978 979