1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.209 1997/07/09 20:54:04 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 low = 0; high = nrow; \ 67 while (high-low > 5) { \ 68 t = (low+high)/2; \ 69 if (rp[t] > col) high = t; \ 70 else low = t; \ 71 } \ 72 for ( _i=0; _i<nrow; _i++ ) { \ 73 if (rp[_i] > col1) break; \ 74 if (rp[_i] == col1) { \ 75 if (addv == ADD_VALUES) ap[_i] += value; \ 76 else ap[_i] = value; \ 77 goto a_noinsert; \ 78 } \ 79 } \ 80 if (nonew == 1) goto a_noinsert; \ 81 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 82 if (nrow >= rmax) { \ 83 /* there is no extra room in row, therefore enlarge */ \ 84 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 85 Scalar *new_a; \ 86 \ 87 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 88 \ 89 /* malloc new storage space */ \ 90 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 91 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 92 new_j = (int *) (new_a + new_nz); \ 93 new_i = new_j + new_nz; \ 94 \ 95 /* copy over old data into new slots */ \ 96 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 97 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 98 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 99 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 100 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 101 len*sizeof(int)); \ 102 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 103 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 104 len*sizeof(Scalar)); \ 105 /* free up old matrix storage */ \ 106 \ 107 PetscFree(a->a); \ 108 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 109 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 110 a->singlemalloc = 1; \ 111 \ 112 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 113 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 114 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 115 a->maxnz += CHUNKSIZE; \ 116 a->reallocs++; \ 117 } \ 118 N = nrow++ - 1; a->nz++; \ 119 /* shift up all the later entries in this row */ \ 120 for ( ii=N; ii>=_i; ii-- ) { \ 121 rp[ii+1] = rp[ii]; \ 122 ap[ii+1] = ap[ii]; \ 123 } \ 124 rp[_i] = col1; \ 125 ap[_i] = value; \ 126 a_noinsert: ; \ 127 ailen[row] = nrow; \ 128 } 129 130 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 131 { \ 132 \ 133 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 134 rmax = bimax[row]; nrow = bilen[row]; \ 135 col1 = col - shift; \ 136 \ 137 low = 0; high = nrow; \ 138 while (high-low > 5) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > col) high = t; \ 141 else low = t; \ 142 } \ 143 for ( _i=0; _i<nrow; _i++ ) { \ 144 if (rp[_i] > col1) break; \ 145 if (rp[_i] == col1) { \ 146 if (addv == ADD_VALUES) ap[_i] += value; \ 147 else ap[_i] = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (nonew == 1) goto b_noinsert; \ 152 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 171 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 174 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 175 len*sizeof(Scalar)); \ 176 /* free up old matrix storage */ \ 177 \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 184 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 186 b->maxnz += CHUNKSIZE; \ 187 b->reallocs++; \ 188 } \ 189 N = nrow++ - 1; b->nz++; \ 190 /* shift up all the later entries in this row */ \ 191 for ( ii=N; ii>=_i; ii-- ) { \ 192 rp[ii+1] = rp[ii]; \ 193 ap[ii+1] = ap[ii]; \ 194 } \ 195 rp[_i] = col1; \ 196 ap[_i] = value; \ 197 b_noinsert: ; \ 198 bilen[row] = nrow; \ 199 } 200 201 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 202 #undef __FUNC__ 203 #define __FUNC__ "MatSetValues_MPIAIJ" 204 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 205 { 206 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 207 Scalar value; 208 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 209 int cstart = aij->cstart, cend = aij->cend,row,col; 210 int roworiented = aij->roworiented; 211 212 /* Some Variables required in the macro */ 213 Mat A = aij->A; 214 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 215 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 216 Scalar *aa = a->a; 217 218 Mat B = aij->B; 219 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 220 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 221 Scalar *ba = b->a; 222 223 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 224 int nonew = a->nonew,shift = a->indexshift; 225 Scalar *ap; 226 227 for ( i=0; i<m; i++ ) { 228 #if defined(PETSC_BOPT_g) 229 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 230 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 231 #endif 232 if (im[i] >= rstart && im[i] < rend) { 233 row = im[i] - rstart; 234 for ( j=0; j<n; j++ ) { 235 if (in[j] >= cstart && in[j] < cend){ 236 col = in[j] - cstart; 237 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 238 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 239 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 240 } 241 #if defined(PETSC_BOPT_g) 242 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 243 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 244 #endif 245 else { 246 if (mat->was_assembled) { 247 if (!aij->colmap) { 248 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 249 } 250 col = aij->colmap[in[j]] - 1; 251 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 252 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 253 col = in[j]; 254 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 255 B = aij->B; 256 b = (Mat_SeqAIJ *) B->data; 257 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 258 ba = b->a; 259 } 260 } 261 else col = in[j]; 262 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 263 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 264 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 265 } 266 } 267 } 268 else { 269 if (roworiented && !aij->donotstash) { 270 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 271 } 272 else { 273 if (!aij->donotstash) { 274 row = im[i]; 275 for ( j=0; j<n; j++ ) { 276 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 277 } 278 } 279 } 280 } 281 } 282 return 0; 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatGetValues_MPIAIJ" 287 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 290 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 291 int cstart = aij->cstart, cend = aij->cend,row,col; 292 293 for ( i=0; i<m; i++ ) { 294 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 295 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 296 if (idxm[i] >= rstart && idxm[i] < rend) { 297 row = idxm[i] - rstart; 298 for ( j=0; j<n; j++ ) { 299 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 300 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 301 if (idxn[j] >= cstart && idxn[j] < cend){ 302 col = idxn[j] - cstart; 303 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 304 } 305 else { 306 if (!aij->colmap) { 307 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 308 } 309 col = aij->colmap[idxn[j]] - 1; 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 313 } 314 } 315 } 316 } 317 else { 318 SETERRQ(1,0,"Only local values currently supported"); 319 } 320 } 321 return 0; 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 326 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 327 { 328 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 329 MPI_Comm comm = mat->comm; 330 int size = aij->size, *owners = aij->rowners; 331 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 332 MPI_Request *send_waits,*recv_waits; 333 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 334 InsertMode addv; 335 Scalar *rvalues,*svalues; 336 337 /* make sure all processors are either in INSERTMODE or ADDMODE */ 338 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 339 if (addv == (ADD_VALUES|INSERT_VALUES)) { 340 SETERRQ(1,0,"Some processors inserted others added"); 341 } 342 mat->insertmode = addv; /* in case this processor had no cache */ 343 344 /* first count number of contributors to each processor */ 345 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 346 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 347 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 348 for ( i=0; i<aij->stash.n; i++ ) { 349 idx = aij->stash.idx[i]; 350 for ( j=0; j<size; j++ ) { 351 if (idx >= owners[j] && idx < owners[j+1]) { 352 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 353 } 354 } 355 } 356 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 357 358 /* inform other processors of number of messages and max length*/ 359 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 360 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 361 nreceives = work[rank]; 362 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 363 nmax = work[rank]; 364 PetscFree(work); 365 366 /* post receives: 367 1) each message will consist of ordered pairs 368 (global index,value) we store the global index as a double 369 to simplify the message passing. 370 2) since we don't know how long each individual message is we 371 allocate the largest needed buffer for each receive. Potentially 372 this is a lot of wasted space. 373 374 375 This could be done better. 376 */ 377 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 378 CHKPTRQ(rvalues); 379 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 380 CHKPTRQ(recv_waits); 381 for ( i=0; i<nreceives; i++ ) { 382 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 383 comm,recv_waits+i); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 391 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 392 CHKPTRQ(send_waits); 393 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 394 starts[0] = 0; 395 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 396 for ( i=0; i<aij->stash.n; i++ ) { 397 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 398 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 399 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 400 } 401 PetscFree(owner); 402 starts[0] = 0; 403 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 404 count = 0; 405 for ( i=0; i<size; i++ ) { 406 if (procs[i]) { 407 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 408 comm,send_waits+count++); 409 } 410 } 411 PetscFree(starts); PetscFree(nprocs); 412 413 /* Free cache space */ 414 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 415 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 416 417 aij->svalues = svalues; aij->rvalues = rvalues; 418 aij->nsends = nsends; aij->nrecvs = nreceives; 419 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 420 aij->rmax = nmax; 421 422 return 0; 423 } 424 extern int MatSetUpMultiply_MPIAIJ(Mat); 425 426 #undef __FUNC__ 427 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 428 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 429 { 430 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 431 MPI_Status *send_status,recv_status; 432 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 433 int row,col,other_disassembled; 434 Scalar *values,val; 435 InsertMode addv = mat->insertmode; 436 437 /* wait on receives */ 438 while (count) { 439 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 440 /* unpack receives into our local space */ 441 values = aij->rvalues + 3*imdex*aij->rmax; 442 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 443 n = n/3; 444 for ( i=0; i<n; i++ ) { 445 row = (int) PetscReal(values[3*i]) - aij->rstart; 446 col = (int) PetscReal(values[3*i+1]); 447 val = values[3*i+2]; 448 if (col >= aij->cstart && col < aij->cend) { 449 col -= aij->cstart; 450 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 451 } 452 else { 453 if (mat->was_assembled || mat->assembled) { 454 if (!aij->colmap) { 455 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 456 } 457 col = aij->colmap[col] - 1; 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 469 470 /* wait on sends */ 471 if (aij->nsends) { 472 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 473 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 474 PetscFree(send_status); 475 } 476 PetscFree(aij->send_waits); PetscFree(aij->svalues); 477 478 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 479 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 480 481 /* determine if any processor has disassembled, if so we must 482 also disassemble ourselfs, in order that we may reassemble. */ 483 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 484 if (mat->was_assembled && !other_disassembled) { 485 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 486 } 487 488 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 489 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 490 } 491 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 492 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 493 494 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 495 return 0; 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ "MatZeroEntries_MPIAIJ" 500 int MatZeroEntries_MPIAIJ(Mat A) 501 { 502 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 503 int ierr; 504 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 505 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 506 return 0; 507 } 508 509 /* the code does not do the diagonal entries correctly unless the 510 matrix is square and the column and row owerships are identical. 511 This is a BUG. The only way to fix it seems to be to access 512 aij->A and aij->B directly and not through the MatZeroRows() 513 routine. 514 */ 515 #undef __FUNC__ 516 #define __FUNC__ "MatZeroRows_MPIAIJ" 517 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 518 { 519 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 520 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 521 int *procs,*nprocs,j,found,idx,nsends,*work; 522 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 523 int *rvalues,tag = A->tag,count,base,slen,n,*source; 524 int *lens,imdex,*lrows,*values; 525 MPI_Comm comm = A->comm; 526 MPI_Request *send_waits,*recv_waits; 527 MPI_Status recv_status,*send_status; 528 IS istmp; 529 530 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 531 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 532 533 /* first count number of contributors to each processor */ 534 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 535 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 536 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 537 for ( i=0; i<N; i++ ) { 538 idx = rows[i]; 539 found = 0; 540 for ( j=0; j<size; j++ ) { 541 if (idx >= owners[j] && idx < owners[j+1]) { 542 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 543 } 544 } 545 if (!found) SETERRQ(1,0,"Index out of range"); 546 } 547 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 548 549 /* inform other processors of number of messages and max length*/ 550 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 551 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 552 nrecvs = work[rank]; 553 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 554 nmax = work[rank]; 555 PetscFree(work); 556 557 /* post receives: */ 558 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 559 CHKPTRQ(rvalues); 560 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 561 CHKPTRQ(recv_waits); 562 for ( i=0; i<nrecvs; i++ ) { 563 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 564 } 565 566 /* do sends: 567 1) starts[i] gives the starting index in svalues for stuff going to 568 the ith processor 569 */ 570 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 571 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 572 CHKPTRQ(send_waits); 573 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 574 starts[0] = 0; 575 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 576 for ( i=0; i<N; i++ ) { 577 svalues[starts[owner[i]]++] = rows[i]; 578 } 579 ISRestoreIndices(is,&rows); 580 581 starts[0] = 0; 582 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 583 count = 0; 584 for ( i=0; i<size; i++ ) { 585 if (procs[i]) { 586 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 587 } 588 } 589 PetscFree(starts); 590 591 base = owners[rank]; 592 593 /* wait on receives */ 594 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 595 source = lens + nrecvs; 596 count = nrecvs; slen = 0; 597 while (count) { 598 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 599 /* unpack receives into our local space */ 600 MPI_Get_count(&recv_status,MPI_INT,&n); 601 source[imdex] = recv_status.MPI_SOURCE; 602 lens[imdex] = n; 603 slen += n; 604 count--; 605 } 606 PetscFree(recv_waits); 607 608 /* move the data into the send scatter */ 609 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 610 count = 0; 611 for ( i=0; i<nrecvs; i++ ) { 612 values = rvalues + i*nmax; 613 for ( j=0; j<lens[i]; j++ ) { 614 lrows[count++] = values[j] - base; 615 } 616 } 617 PetscFree(rvalues); PetscFree(lens); 618 PetscFree(owner); PetscFree(nprocs); 619 620 /* actually zap the local rows */ 621 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 622 PLogObjectParent(A,istmp); 623 PetscFree(lrows); 624 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 625 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 626 ierr = ISDestroy(istmp); CHKERRQ(ierr); 627 628 /* wait on sends */ 629 if (nsends) { 630 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 631 CHKPTRQ(send_status); 632 MPI_Waitall(nsends,send_waits,send_status); 633 PetscFree(send_status); 634 } 635 PetscFree(send_waits); PetscFree(svalues); 636 637 return 0; 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatMult_MPIAIJ" 642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645 int ierr,nt; 646 647 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 648 if (nt != a->n) { 649 SETERRQ(1,0,"Incompatible partition of A and xx"); 650 } 651 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 652 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 653 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 654 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 655 return 0; 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatMultAdd_MPIAIJ" 660 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 661 { 662 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663 int ierr; 664 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 665 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 666 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 667 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 668 return 0; 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTrans_MPIAIJ" 673 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 /* do nondiagonal part */ 679 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 680 /* send it on its way */ 681 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 /* do local part */ 683 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 684 /* receive remote parts: note this assumes the values are not actually */ 685 /* inserted in yy until the next line, which is true for my implementation*/ 686 /* but is not perhaps always true. */ 687 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688 return 0; 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 693 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 /* do nondiagonal part */ 699 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 700 /* send it on its way */ 701 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 702 /* do local part */ 703 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 704 /* receive remote parts: note this assumes the values are not actually */ 705 /* inserted in yy until the next line, which is true for my implementation*/ 706 /* but is not perhaps always true. */ 707 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708 return 0; 709 } 710 711 /* 712 This only works correctly for square matrices where the subblock A->A is the 713 diagonal block 714 */ 715 #undef __FUNC__ 716 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 717 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 720 if (a->M != a->N) 721 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(1,0,"row partition must equal col partition"); } 724 return MatGetDiagonal(a->A,v); 725 } 726 727 #undef __FUNC__ 728 #define __FUNC__ "MatScale_MPIAIJ" 729 int MatScale_MPIAIJ(Scalar *aa,Mat A) 730 { 731 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 732 int ierr; 733 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 734 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 735 return 0; 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 740 int MatDestroy_MPIAIJ(PetscObject obj) 741 { 742 Mat mat = (Mat) obj; 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 744 int ierr; 745 #if defined(PETSC_LOG) 746 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 747 #endif 748 PetscFree(aij->rowners); 749 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 750 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 751 if (aij->colmap) PetscFree(aij->colmap); 752 if (aij->garray) PetscFree(aij->garray); 753 if (aij->lvec) VecDestroy(aij->lvec); 754 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 755 if (aij->rowvalues) PetscFree(aij->rowvalues); 756 PetscFree(aij); 757 if (mat->mapping) { 758 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 759 } 760 PLogObjectDestroy(mat); 761 PetscHeaderDestroy(mat); 762 return 0; 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 767 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 768 { 769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 770 int ierr; 771 772 if (aij->size == 1) { 773 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 774 } 775 else SETERRQ(1,0,"Only uniprocessor output supported"); 776 return 0; 777 } 778 779 #undef __FUNC__ 780 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 781 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 782 { 783 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 784 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 785 int ierr, format,shift = C->indexshift,rank; 786 FILE *fd; 787 ViewerType vtype; 788 789 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 790 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 791 ierr = ViewerGetFormat(viewer,&format); 792 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 793 MatInfo info; 794 int flg; 795 MPI_Comm_rank(mat->comm,&rank); 796 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 797 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 798 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 799 PetscSequentialPhaseBegin(mat->comm,1); 800 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 801 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 802 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 803 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 804 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 805 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 806 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 807 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 808 fflush(fd); 809 PetscSequentialPhaseEnd(mat->comm,1); 810 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 811 return 0; 812 } 813 else if (format == VIEWER_FORMAT_ASCII_INFO) { 814 return 0; 815 } 816 } 817 818 if (vtype == DRAW_VIEWER) { 819 Draw draw; 820 PetscTruth isnull; 821 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 822 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 823 } 824 825 if (vtype == ASCII_FILE_VIEWER) { 826 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 827 PetscSequentialPhaseBegin(mat->comm,1); 828 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 829 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 830 aij->cend); 831 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 832 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 833 fflush(fd); 834 PetscSequentialPhaseEnd(mat->comm,1); 835 } 836 else { 837 int size = aij->size; 838 rank = aij->rank; 839 if (size == 1) { 840 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 841 } 842 else { 843 /* assemble the entire matrix onto first processor. */ 844 Mat A; 845 Mat_SeqAIJ *Aloc; 846 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 847 Scalar *a; 848 849 if (!rank) { 850 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 851 CHKERRQ(ierr); 852 } 853 else { 854 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 855 CHKERRQ(ierr); 856 } 857 PLogObjectParent(mat,A); 858 859 /* copy over the A part */ 860 Aloc = (Mat_SeqAIJ*) aij->A->data; 861 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 862 row = aij->rstart; 863 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 864 for ( i=0; i<m; i++ ) { 865 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 866 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 867 } 868 aj = Aloc->j; 869 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 870 871 /* copy over the B part */ 872 Aloc = (Mat_SeqAIJ*) aij->B->data; 873 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 874 row = aij->rstart; 875 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 876 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 877 for ( i=0; i<m; i++ ) { 878 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 879 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 880 } 881 PetscFree(ct); 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 884 if (!rank) { 885 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 886 } 887 ierr = MatDestroy(A); CHKERRQ(ierr); 888 } 889 } 890 return 0; 891 } 892 893 #undef __FUNC__ 894 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 895 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 896 { 897 Mat mat = (Mat) obj; 898 int ierr; 899 ViewerType vtype; 900 901 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 902 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 903 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 904 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 905 } 906 else if (vtype == BINARY_FILE_VIEWER) { 907 return MatView_MPIAIJ_Binary(mat,viewer); 908 } 909 return 0; 910 } 911 912 /* 913 This has to provide several versions. 914 915 2) a) use only local smoothing updating outer values only once. 916 b) local smoothing updating outer values each inner iteration 917 3) color updating out values betwen colors. 918 */ 919 #undef __FUNC__ 920 #define __FUNC__ "MatRelax_MPIAIJ" 921 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 922 double fshift,int its,Vec xx) 923 { 924 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925 Mat AA = mat->A, BB = mat->B; 926 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 927 Scalar *b,*x,*xs,*ls,d,*v,sum; 928 int ierr,*idx, *diag; 929 int n = mat->n, m = mat->m, i,shift = A->indexshift; 930 931 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 932 xs = x + shift; /* shift by one for index start of 1 */ 933 ls = ls + shift; 934 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 935 diag = A->diag; 936 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 937 if (flag & SOR_ZERO_INITIAL_GUESS) { 938 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 939 } 940 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 941 CHKERRQ(ierr); 942 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 943 CHKERRQ(ierr); 944 while (its--) { 945 /* go down through the rows */ 946 for ( i=0; i<m; i++ ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958 } 959 /* come up through the rows */ 960 for ( i=m-1; i>-1; i-- ) { 961 n = A->i[i+1] - A->i[i]; 962 idx = A->j + A->i[i] + shift; 963 v = A->a + A->i[i] + shift; 964 sum = b[i]; 965 SPARSEDENSEMDOT(sum,xs,v,idx,n); 966 d = fshift + A->a[diag[i]+shift]; 967 n = B->i[i+1] - B->i[i]; 968 idx = B->j + B->i[i] + shift; 969 v = B->a + B->i[i] + shift; 970 SPARSEDENSEMDOT(sum,ls,v,idx,n); 971 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 972 } 973 } 974 } 975 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 976 if (flag & SOR_ZERO_INITIAL_GUESS) { 977 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 978 } 979 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 980 CHKERRQ(ierr); 981 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 982 CHKERRQ(ierr); 983 while (its--) { 984 for ( i=0; i<m; i++ ) { 985 n = A->i[i+1] - A->i[i]; 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 } 999 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1000 if (flag & SOR_ZERO_INITIAL_GUESS) { 1001 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 1002 } 1003 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1004 mat->Mvctx); CHKERRQ(ierr); 1005 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1006 mat->Mvctx); CHKERRQ(ierr); 1007 while (its--) { 1008 for ( i=m-1; i>-1; i-- ) { 1009 n = A->i[i+1] - A->i[i]; 1010 idx = A->j + A->i[i] + shift; 1011 v = A->a + A->i[i] + shift; 1012 sum = b[i]; 1013 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1014 d = fshift + A->a[diag[i]+shift]; 1015 n = B->i[i+1] - B->i[i]; 1016 idx = B->j + B->i[i] + shift; 1017 v = B->a + B->i[i] + shift; 1018 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1019 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1020 } 1021 } 1022 } 1023 else { 1024 SETERRQ(1,0,"Option not supported"); 1025 } 1026 return 0; 1027 } 1028 1029 #undef __FUNC__ 1030 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1031 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1034 Mat A = mat->A, B = mat->B; 1035 int ierr; 1036 double isend[5], irecv[5]; 1037 1038 info->rows_global = (double)mat->M; 1039 info->columns_global = (double)mat->N; 1040 info->rows_local = (double)mat->m; 1041 info->columns_local = (double)mat->N; 1042 info->block_size = 1.0; 1043 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1044 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1045 isend[3] = info->memory; isend[4] = info->mallocs; 1046 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1047 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1048 isend[3] += info->memory; isend[4] += info->mallocs; 1049 if (flag == MAT_LOCAL) { 1050 info->nz_used = isend[0]; 1051 info->nz_allocated = isend[1]; 1052 info->nz_unneeded = isend[2]; 1053 info->memory = isend[3]; 1054 info->mallocs = isend[4]; 1055 } else if (flag == MAT_GLOBAL_MAX) { 1056 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1057 info->nz_used = irecv[0]; 1058 info->nz_allocated = irecv[1]; 1059 info->nz_unneeded = irecv[2]; 1060 info->memory = irecv[3]; 1061 info->mallocs = irecv[4]; 1062 } else if (flag == MAT_GLOBAL_SUM) { 1063 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1064 info->nz_used = irecv[0]; 1065 info->nz_allocated = irecv[1]; 1066 info->nz_unneeded = irecv[2]; 1067 info->memory = irecv[3]; 1068 info->mallocs = irecv[4]; 1069 } 1070 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1071 info->fill_ratio_needed = 0; 1072 info->factor_mallocs = 0; 1073 1074 return 0; 1075 } 1076 1077 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1078 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1079 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1080 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1081 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1082 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1083 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1084 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1088 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1091 1092 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1093 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1094 op == MAT_COLUMNS_UNSORTED || 1095 op == MAT_COLUMNS_SORTED || 1096 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1097 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1098 MatSetOption(a->A,op); 1099 MatSetOption(a->B,op); 1100 } else if (op == MAT_ROW_ORIENTED) { 1101 a->roworiented = 1; 1102 MatSetOption(a->A,op); 1103 MatSetOption(a->B,op); 1104 } else if (op == MAT_ROWS_SORTED || 1105 op == MAT_ROWS_UNSORTED || 1106 op == MAT_SYMMETRIC || 1107 op == MAT_STRUCTURALLY_SYMMETRIC || 1108 op == MAT_YES_NEW_DIAGONALS) 1109 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1110 else if (op == MAT_COLUMN_ORIENTED) { 1111 a->roworiented = 0; 1112 MatSetOption(a->A,op); 1113 MatSetOption(a->B,op); 1114 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1115 a->donotstash = 1; 1116 } else if (op == MAT_NO_NEW_DIAGONALS) 1117 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1118 else 1119 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1120 return 0; 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1125 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1126 { 1127 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1128 *m = mat->M; *n = mat->N; 1129 return 0; 1130 } 1131 1132 #undef __FUNC__ 1133 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1134 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1135 { 1136 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1137 *m = mat->m; *n = mat->N; 1138 return 0; 1139 } 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1143 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1144 { 1145 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1146 *m = mat->rstart; *n = mat->rend; 1147 return 0; 1148 } 1149 1150 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1151 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1152 1153 #undef __FUNC__ 1154 #define __FUNC__ "MatGetRow_MPIAIJ" 1155 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1156 { 1157 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1158 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1159 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1160 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1161 int *cmap, *idx_p; 1162 1163 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1164 mat->getrowactive = PETSC_TRUE; 1165 1166 if (!mat->rowvalues && (idx || v)) { 1167 /* 1168 allocate enough space to hold information from the longest row. 1169 */ 1170 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1171 int max = 1,m = mat->m,tmp; 1172 for ( i=0; i<m; i++ ) { 1173 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1174 if (max < tmp) { max = tmp; } 1175 } 1176 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1177 CHKPTRQ(mat->rowvalues); 1178 mat->rowindices = (int *) (mat->rowvalues + max); 1179 } 1180 1181 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1182 lrow = row - rstart; 1183 1184 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1185 if (!v) {pvA = 0; pvB = 0;} 1186 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1187 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1188 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1189 nztot = nzA + nzB; 1190 1191 cmap = mat->garray; 1192 if (v || idx) { 1193 if (nztot) { 1194 /* Sort by increasing column numbers, assuming A and B already sorted */ 1195 int imark = -1; 1196 if (v) { 1197 *v = v_p = mat->rowvalues; 1198 for ( i=0; i<nzB; i++ ) { 1199 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1200 else break; 1201 } 1202 imark = i; 1203 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1204 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1205 } 1206 if (idx) { 1207 *idx = idx_p = mat->rowindices; 1208 if (imark > -1) { 1209 for ( i=0; i<imark; i++ ) { 1210 idx_p[i] = cmap[cworkB[i]]; 1211 } 1212 } else { 1213 for ( i=0; i<nzB; i++ ) { 1214 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1215 else break; 1216 } 1217 imark = i; 1218 } 1219 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1220 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1221 } 1222 } 1223 else { 1224 if (idx) *idx = 0; 1225 if (v) *v = 0; 1226 } 1227 } 1228 *nz = nztot; 1229 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1230 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1231 return 0; 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1236 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1239 if (aij->getrowactive == PETSC_FALSE) { 1240 SETERRQ(1,0,"MatGetRow not called"); 1241 } 1242 aij->getrowactive = PETSC_FALSE; 1243 return 0; 1244 } 1245 1246 #undef __FUNC__ 1247 #define __FUNC__ "MatNorm_MPIAIJ" 1248 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1249 { 1250 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1251 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1252 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1253 double sum = 0.0; 1254 Scalar *v; 1255 1256 if (aij->size == 1) { 1257 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1258 } else { 1259 if (type == NORM_FROBENIUS) { 1260 v = amat->a; 1261 for (i=0; i<amat->nz; i++ ) { 1262 #if defined(PETSC_COMPLEX) 1263 sum += real(conj(*v)*(*v)); v++; 1264 #else 1265 sum += (*v)*(*v); v++; 1266 #endif 1267 } 1268 v = bmat->a; 1269 for (i=0; i<bmat->nz; i++ ) { 1270 #if defined(PETSC_COMPLEX) 1271 sum += real(conj(*v)*(*v)); v++; 1272 #else 1273 sum += (*v)*(*v); v++; 1274 #endif 1275 } 1276 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1277 *norm = sqrt(*norm); 1278 } 1279 else if (type == NORM_1) { /* max column norm */ 1280 double *tmp, *tmp2; 1281 int *jj, *garray = aij->garray; 1282 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1283 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1284 PetscMemzero(tmp,aij->N*sizeof(double)); 1285 *norm = 0.0; 1286 v = amat->a; jj = amat->j; 1287 for ( j=0; j<amat->nz; j++ ) { 1288 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1289 } 1290 v = bmat->a; jj = bmat->j; 1291 for ( j=0; j<bmat->nz; j++ ) { 1292 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1293 } 1294 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1295 for ( j=0; j<aij->N; j++ ) { 1296 if (tmp2[j] > *norm) *norm = tmp2[j]; 1297 } 1298 PetscFree(tmp); PetscFree(tmp2); 1299 } 1300 else if (type == NORM_INFINITY) { /* max row norm */ 1301 double ntemp = 0.0; 1302 for ( j=0; j<amat->m; j++ ) { 1303 v = amat->a + amat->i[j] + shift; 1304 sum = 0.0; 1305 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1306 sum += PetscAbsScalar(*v); v++; 1307 } 1308 v = bmat->a + bmat->i[j] + shift; 1309 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1310 sum += PetscAbsScalar(*v); v++; 1311 } 1312 if (sum > ntemp) ntemp = sum; 1313 } 1314 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1315 } 1316 else { 1317 SETERRQ(1,0,"No support for two norm"); 1318 } 1319 } 1320 return 0; 1321 } 1322 1323 #undef __FUNC__ 1324 #define __FUNC__ "MatTranspose_MPIAIJ" 1325 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1326 { 1327 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1328 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1329 int ierr,shift = Aloc->indexshift; 1330 Mat B; 1331 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1332 Scalar *array; 1333 1334 if (matout == PETSC_NULL && M != N) 1335 SETERRQ(1,0,"Square matrix only for in-place"); 1336 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1337 PETSC_NULL,&B); CHKERRQ(ierr); 1338 1339 /* copy over the A part */ 1340 Aloc = (Mat_SeqAIJ*) a->A->data; 1341 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342 row = a->rstart; 1343 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1344 for ( i=0; i<m; i++ ) { 1345 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1346 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1347 } 1348 aj = Aloc->j; 1349 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1350 1351 /* copy over the B part */ 1352 Aloc = (Mat_SeqAIJ*) a->B->data; 1353 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1354 row = a->rstart; 1355 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1356 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1357 for ( i=0; i<m; i++ ) { 1358 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1359 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1360 } 1361 PetscFree(ct); 1362 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1363 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1364 if (matout != PETSC_NULL) { 1365 *matout = B; 1366 } else { 1367 /* This isn't really an in-place transpose .... but free data structures from a */ 1368 PetscFree(a->rowners); 1369 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1370 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1371 if (a->colmap) PetscFree(a->colmap); 1372 if (a->garray) PetscFree(a->garray); 1373 if (a->lvec) VecDestroy(a->lvec); 1374 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1375 PetscFree(a); 1376 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1377 PetscHeaderDestroy(B); 1378 } 1379 return 0; 1380 } 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1384 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1385 { 1386 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1387 Mat a = aij->A, b = aij->B; 1388 int ierr,s1,s2,s3; 1389 1390 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1391 if (rr) { 1392 s3 = aij->n; 1393 VecGetLocalSize_Fast(rr,s1); 1394 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1395 /* Overlap communication with computation. */ 1396 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1397 } 1398 if (ll) { 1399 VecGetLocalSize_Fast(ll,s1); 1400 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1401 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1402 } 1403 /* scale the diagonal block */ 1404 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1405 1406 if (rr) { 1407 /* Do a scatter end and then right scale the off-diagonal block */ 1408 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1409 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1410 } 1411 1412 return 0; 1413 } 1414 1415 1416 extern int MatPrintHelp_SeqAIJ(Mat); 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1419 int MatPrintHelp_MPIAIJ(Mat A) 1420 { 1421 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1422 1423 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1424 else return 0; 1425 } 1426 1427 #undef __FUNC__ 1428 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1429 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1430 { 1431 *bs = 1; 1432 return 0; 1433 } 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1436 int MatSetUnfactored_MPIAIJ(Mat A) 1437 { 1438 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1439 int ierr; 1440 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1441 return 0; 1442 } 1443 1444 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1445 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1446 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1447 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1448 /* -------------------------------------------------------------------*/ 1449 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1450 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1451 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1452 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1453 0,0, 1454 0,0, 1455 0,0, 1456 MatRelax_MPIAIJ, 1457 MatTranspose_MPIAIJ, 1458 MatGetInfo_MPIAIJ,0, 1459 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1460 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1461 0, 1462 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1463 0,0,0,0, 1464 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1465 0,0, 1466 0,0,MatConvertSameType_MPIAIJ,0,0, 1467 0,0,0, 1468 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1469 MatPrintHelp_MPIAIJ, 1470 MatScale_MPIAIJ,0,0,0, 1471 MatGetBlockSize_MPIAIJ,0,0,0,0, 1472 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1473 1474 1475 #undef __FUNC__ 1476 #define __FUNC__ "MatCreateMPIAIJ" 1477 /*@C 1478 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1479 (the default parallel PETSc format). For good matrix assembly performance 1480 the user should preallocate the matrix storage by setting the parameters 1481 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1482 performance can be increased by more than a factor of 50. 1483 1484 Input Parameters: 1485 . comm - MPI communicator 1486 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1487 This value should be the same as the local size used in creating the 1488 y vector for the matrix-vector product y = Ax. 1489 . n - This value should be the same as the local size used in creating the 1490 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1491 calculated if N is given) For square matrices n is almost always m. 1492 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1493 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1494 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1495 (same for all local rows) 1496 . d_nzz - array containing the number of nonzeros in the various rows of the 1497 diagonal portion of the local submatrix (possibly different for each row) 1498 or PETSC_NULL. You must leave room for the diagonal entry even if 1499 it is zero. 1500 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1501 submatrix (same for all local rows). 1502 . o_nzz - array containing the number of nonzeros in the various rows of the 1503 off-diagonal portion of the local submatrix (possibly different for 1504 each row) or PETSC_NULL. 1505 1506 Output Parameter: 1507 . A - the matrix 1508 1509 Notes: 1510 The AIJ format (also called the Yale sparse matrix format or 1511 compressed row storage), is fully compatible with standard Fortran 77 1512 storage. That is, the stored row and column indices can begin at 1513 either one (as in Fortran) or zero. See the users manual for details. 1514 1515 The user MUST specify either the local or global matrix dimensions 1516 (possibly both). 1517 1518 By default, this format uses inodes (identical nodes) when possible. 1519 We search for consecutive rows with the same nonzero structure, thereby 1520 reusing matrix information to achieve increased efficiency. 1521 1522 Options Database Keys: 1523 $ -mat_aij_no_inode - Do not use inodes 1524 $ -mat_aij_inode_limit <limit> - Set inode limit. 1525 $ (max limit=5) 1526 $ -mat_aij_oneindex - Internally use indexing starting at 1 1527 $ rather than 0. Note: When calling MatSetValues(), 1528 $ the user still MUST index entries starting at 0! 1529 1530 Storage Information: 1531 For a square global matrix we define each processor's diagonal portion 1532 to be its local rows and the corresponding columns (a square submatrix); 1533 each processor's off-diagonal portion encompasses the remainder of the 1534 local matrix (a rectangular submatrix). 1535 1536 The user can specify preallocated storage for the diagonal part of 1537 the local submatrix with either d_nz or d_nnz (not both). Set 1538 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1539 memory allocation. Likewise, specify preallocated storage for the 1540 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1541 1542 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1543 the figure below we depict these three local rows and all columns (0-11). 1544 1545 $ 0 1 2 3 4 5 6 7 8 9 10 11 1546 $ ------------------- 1547 $ row 3 | o o o d d d o o o o o o 1548 $ row 4 | o o o d d d o o o o o o 1549 $ row 5 | o o o d d d o o o o o o 1550 $ ------------------- 1551 $ 1552 1553 Thus, any entries in the d locations are stored in the d (diagonal) 1554 submatrix, and any entries in the o locations are stored in the 1555 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1556 stored simply in the MATSEQAIJ format for compressed row storage. 1557 1558 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1559 and o_nz should indicate the number of nonzeros per row in the o matrix. 1560 In general, for PDE problems in which most nonzeros are near the diagonal, 1561 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1562 or you will get TERRIBLE performance; see the users' manual chapter on 1563 matrices. 1564 1565 .keywords: matrix, aij, compressed row, sparse, parallel 1566 1567 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1568 @*/ 1569 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1570 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1571 { 1572 Mat B; 1573 Mat_MPIAIJ *b; 1574 int ierr, i,sum[2],work[2],size; 1575 1576 *A = 0; 1577 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1578 PLogObjectCreate(B); 1579 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1580 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1581 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1582 MPI_Comm_size(comm,&size); 1583 if (size == 1) { 1584 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1585 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1586 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1587 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1588 B->ops.lufactor = MatLUFactor_MPIAIJ; 1589 B->ops.solve = MatSolve_MPIAIJ; 1590 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1591 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1592 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1593 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1594 } 1595 B->destroy = MatDestroy_MPIAIJ; 1596 B->view = MatView_MPIAIJ; 1597 B->factor = 0; 1598 B->assembled = PETSC_FALSE; 1599 B->mapping = 0; 1600 1601 B->insertmode = NOT_SET_VALUES; 1602 MPI_Comm_rank(comm,&b->rank); 1603 MPI_Comm_size(comm,&b->size); 1604 1605 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1606 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1607 1608 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1609 work[0] = m; work[1] = n; 1610 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1611 if (M == PETSC_DECIDE) M = sum[0]; 1612 if (N == PETSC_DECIDE) N = sum[1]; 1613 } 1614 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1615 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1616 b->m = m; B->m = m; 1617 b->n = n; B->n = n; 1618 b->N = N; B->N = N; 1619 b->M = M; B->M = M; 1620 1621 /* build local table of row and column ownerships */ 1622 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1623 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1624 b->cowners = b->rowners + b->size + 2; 1625 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1626 b->rowners[0] = 0; 1627 for ( i=2; i<=b->size; i++ ) { 1628 b->rowners[i] += b->rowners[i-1]; 1629 } 1630 b->rstart = b->rowners[b->rank]; 1631 b->rend = b->rowners[b->rank+1]; 1632 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1633 b->cowners[0] = 0; 1634 for ( i=2; i<=b->size; i++ ) { 1635 b->cowners[i] += b->cowners[i-1]; 1636 } 1637 b->cstart = b->cowners[b->rank]; 1638 b->cend = b->cowners[b->rank+1]; 1639 1640 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1641 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1642 PLogObjectParent(B,b->A); 1643 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1644 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1645 PLogObjectParent(B,b->B); 1646 1647 /* build cache for off array entries formed */ 1648 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1649 b->donotstash = 0; 1650 b->colmap = 0; 1651 b->garray = 0; 1652 b->roworiented = 1; 1653 1654 /* stuff used for matrix vector multiply */ 1655 b->lvec = 0; 1656 b->Mvctx = 0; 1657 1658 /* stuff for MatGetRow() */ 1659 b->rowindices = 0; 1660 b->rowvalues = 0; 1661 b->getrowactive = PETSC_FALSE; 1662 1663 *A = B; 1664 return 0; 1665 } 1666 1667 #undef __FUNC__ 1668 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1669 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1670 { 1671 Mat mat; 1672 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1673 int ierr, len=0, flg; 1674 1675 *newmat = 0; 1676 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1677 PLogObjectCreate(mat); 1678 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1679 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1680 mat->destroy = MatDestroy_MPIAIJ; 1681 mat->view = MatView_MPIAIJ; 1682 mat->factor = matin->factor; 1683 mat->assembled = PETSC_TRUE; 1684 1685 a->m = mat->m = oldmat->m; 1686 a->n = mat->n = oldmat->n; 1687 a->M = mat->M = oldmat->M; 1688 a->N = mat->N = oldmat->N; 1689 1690 a->rstart = oldmat->rstart; 1691 a->rend = oldmat->rend; 1692 a->cstart = oldmat->cstart; 1693 a->cend = oldmat->cend; 1694 a->size = oldmat->size; 1695 a->rank = oldmat->rank; 1696 mat->insertmode = NOT_SET_VALUES; 1697 a->rowvalues = 0; 1698 a->getrowactive = PETSC_FALSE; 1699 1700 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1701 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1702 a->cowners = a->rowners + a->size + 2; 1703 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1704 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1705 if (oldmat->colmap) { 1706 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1707 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1708 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1709 } else a->colmap = 0; 1710 if (oldmat->garray) { 1711 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1712 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1713 PLogObjectMemory(mat,len*sizeof(int)); 1714 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1715 } else a->garray = 0; 1716 1717 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1718 PLogObjectParent(mat,a->lvec); 1719 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1720 PLogObjectParent(mat,a->Mvctx); 1721 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1722 PLogObjectParent(mat,a->A); 1723 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1724 PLogObjectParent(mat,a->B); 1725 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1726 if (flg) { 1727 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1728 } 1729 *newmat = mat; 1730 return 0; 1731 } 1732 1733 #include "sys.h" 1734 1735 #undef __FUNC__ 1736 #define __FUNC__ "MatLoad_MPIAIJ" 1737 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1738 { 1739 Mat A; 1740 int i, nz, ierr, j,rstart, rend, fd; 1741 Scalar *vals,*svals; 1742 MPI_Comm comm = ((PetscObject)viewer)->comm; 1743 MPI_Status status; 1744 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1745 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1746 int tag = ((PetscObject)viewer)->tag; 1747 1748 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1749 if (!rank) { 1750 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1751 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1752 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1753 } 1754 1755 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1756 M = header[1]; N = header[2]; 1757 /* determine ownership of all rows */ 1758 m = M/size + ((M % size) > rank); 1759 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1760 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1761 rowners[0] = 0; 1762 for ( i=2; i<=size; i++ ) { 1763 rowners[i] += rowners[i-1]; 1764 } 1765 rstart = rowners[rank]; 1766 rend = rowners[rank+1]; 1767 1768 /* distribute row lengths to all processors */ 1769 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1770 offlens = ourlens + (rend-rstart); 1771 if (!rank) { 1772 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1773 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1774 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1775 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1776 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1777 PetscFree(sndcounts); 1778 } 1779 else { 1780 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1781 } 1782 1783 if (!rank) { 1784 /* calculate the number of nonzeros on each processor */ 1785 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1786 PetscMemzero(procsnz,size*sizeof(int)); 1787 for ( i=0; i<size; i++ ) { 1788 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1789 procsnz[i] += rowlengths[j]; 1790 } 1791 } 1792 PetscFree(rowlengths); 1793 1794 /* determine max buffer needed and allocate it */ 1795 maxnz = 0; 1796 for ( i=0; i<size; i++ ) { 1797 maxnz = PetscMax(maxnz,procsnz[i]); 1798 } 1799 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1800 1801 /* read in my part of the matrix column indices */ 1802 nz = procsnz[0]; 1803 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1804 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1805 1806 /* read in every one elses and ship off */ 1807 for ( i=1; i<size; i++ ) { 1808 nz = procsnz[i]; 1809 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1810 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1811 } 1812 PetscFree(cols); 1813 } 1814 else { 1815 /* determine buffer space needed for message */ 1816 nz = 0; 1817 for ( i=0; i<m; i++ ) { 1818 nz += ourlens[i]; 1819 } 1820 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1821 1822 /* receive message of column indices*/ 1823 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1824 MPI_Get_count(&status,MPI_INT,&maxnz); 1825 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1826 } 1827 1828 /* loop over local rows, determining number of off diagonal entries */ 1829 PetscMemzero(offlens,m*sizeof(int)); 1830 jj = 0; 1831 for ( i=0; i<m; i++ ) { 1832 for ( j=0; j<ourlens[i]; j++ ) { 1833 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1834 jj++; 1835 } 1836 } 1837 1838 /* create our matrix */ 1839 for ( i=0; i<m; i++ ) { 1840 ourlens[i] -= offlens[i]; 1841 } 1842 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1843 A = *newmat; 1844 MatSetOption(A,MAT_COLUMNS_SORTED); 1845 for ( i=0; i<m; i++ ) { 1846 ourlens[i] += offlens[i]; 1847 } 1848 1849 if (!rank) { 1850 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1851 1852 /* read in my part of the matrix numerical values */ 1853 nz = procsnz[0]; 1854 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1855 1856 /* insert into matrix */ 1857 jj = rstart; 1858 smycols = mycols; 1859 svals = vals; 1860 for ( i=0; i<m; i++ ) { 1861 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1862 smycols += ourlens[i]; 1863 svals += ourlens[i]; 1864 jj++; 1865 } 1866 1867 /* read in other processors and ship out */ 1868 for ( i=1; i<size; i++ ) { 1869 nz = procsnz[i]; 1870 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1871 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1872 } 1873 PetscFree(procsnz); 1874 } 1875 else { 1876 /* receive numeric values */ 1877 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1878 1879 /* receive message of values*/ 1880 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1881 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1882 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1883 1884 /* insert into matrix */ 1885 jj = rstart; 1886 smycols = mycols; 1887 svals = vals; 1888 for ( i=0; i<m; i++ ) { 1889 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1890 smycols += ourlens[i]; 1891 svals += ourlens[i]; 1892 jj++; 1893 } 1894 } 1895 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1896 1897 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1898 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1899 return 0; 1900 } 1901