1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.216 1997/09/26 02:19:06 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 PetscFunctionBegin; 24 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 25 PLogObjectMemory(mat,aij->N*sizeof(int)); 26 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 27 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 28 PetscFunctionReturn(0); 29 } 30 31 extern int DisAssemble_MPIAIJ(Mat); 32 33 #define CHUNKSIZE 15 34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 35 { \ 36 \ 37 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 38 rmax = aimax[row]; nrow = ailen[row]; \ 39 col1 = col - shift; \ 40 \ 41 low = 0; high = nrow; \ 42 while (high-low > 5) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > col) high = t; \ 45 else low = t; \ 46 } \ 47 for ( _i=0; _i<nrow; _i++ ) { \ 48 if (rp[_i] > col1) break; \ 49 if (rp[_i] == col1) { \ 50 if (addv == ADD_VALUES) ap[_i] += value; \ 51 else ap[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 57 if (nrow >= rmax) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 60 Scalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 66 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 67 new_j = (int *) (new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 72 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 75 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 76 len*sizeof(int)); \ 77 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 78 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 79 len*sizeof(Scalar)); \ 80 /* free up old matrix storage */ \ 81 \ 82 PetscFree(a->a); \ 83 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = 1; \ 86 \ 87 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 88 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for ( ii=N; ii>=_i; ii-- ) { \ 96 rp[ii+1] = rp[ii]; \ 97 ap[ii+1] = ap[ii]; \ 98 } \ 99 rp[_i] = col1; \ 100 ap[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 \ 108 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 109 rmax = bimax[row]; nrow = bilen[row]; \ 110 col1 = col - shift; \ 111 \ 112 low = 0; high = nrow; \ 113 while (high-low > 5) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > col) high = t; \ 116 else low = t; \ 117 } \ 118 for ( _i=0; _i<nrow; _i++ ) { \ 119 if (rp[_i] > col1) break; \ 120 if (rp[_i] == col1) { \ 121 if (addv == ADD_VALUES) ap[_i] += value; \ 122 else ap[_i] = value; \ 123 goto b_noinsert; \ 124 } \ 125 } \ 126 if (nonew == 1) goto b_noinsert; \ 127 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 128 if (nrow >= rmax) { \ 129 /* there is no extra room in row, therefore enlarge */ \ 130 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 131 Scalar *new_a; \ 132 \ 133 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 134 \ 135 /* malloc new storage space */ \ 136 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 137 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 138 new_j = (int *) (new_a + new_nz); \ 139 new_i = new_j + new_nz; \ 140 \ 141 /* copy over old data into new slots */ \ 142 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 143 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 144 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 145 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 146 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 147 len*sizeof(int)); \ 148 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 149 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 150 len*sizeof(Scalar)); \ 151 /* free up old matrix storage */ \ 152 \ 153 PetscFree(b->a); \ 154 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 155 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 156 b->singlemalloc = 1; \ 157 \ 158 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 159 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 160 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 161 b->maxnz += CHUNKSIZE; \ 162 b->reallocs++; \ 163 } \ 164 N = nrow++ - 1; b->nz++; \ 165 /* shift up all the later entries in this row */ \ 166 for ( ii=N; ii>=_i; ii-- ) { \ 167 rp[ii+1] = rp[ii]; \ 168 ap[ii+1] = ap[ii]; \ 169 } \ 170 rp[_i] = col1; \ 171 ap[_i] = value; \ 172 b_noinsert: ; \ 173 bilen[row] = nrow; \ 174 } 175 176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 177 #undef __FUNC__ 178 #define __FUNC__ "MatSetValues_MPIAIJ" 179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 180 { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 182 Scalar value; 183 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 184 int cstart = aij->cstart, cend = aij->cend,row,col; 185 int roworiented = aij->roworiented; 186 187 /* Some Variables required in the macro */ 188 Mat A = aij->A; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 190 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 191 Scalar *aa = a->a; 192 193 Mat B = aij->B; 194 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 195 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 196 Scalar *ba = b->a; 197 198 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 199 int nonew = a->nonew,shift = a->indexshift; 200 Scalar *ap; 201 202 PetscFunctionBegin; 203 for ( i=0; i<m; i++ ) { 204 #if defined(USE_PETSC_BOPT_g) 205 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 206 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 207 #endif 208 if (im[i] >= rstart && im[i] < rend) { 209 row = im[i] - rstart; 210 for ( j=0; j<n; j++ ) { 211 if (in[j] >= cstart && in[j] < cend){ 212 col = in[j] - cstart; 213 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 214 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 215 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 216 } 217 #if defined(USE_PETSC_BOPT_g) 218 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 219 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 220 #endif 221 else { 222 if (mat->was_assembled) { 223 if (!aij->colmap) { 224 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225 } 226 col = aij->colmap[in[j]] - 1; 227 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 228 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 229 col = in[j]; 230 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 231 B = aij->B; 232 b = (Mat_SeqAIJ *) B->data; 233 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 234 ba = b->a; 235 } 236 } 237 else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(1,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 339 nreceives = work[rank]; 340 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nreceives; i++ ) { 360 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 361 comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 369 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTRQ(send_waits); 371 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 372 starts[0] = 0; 373 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<aij->stash.n; i++ ) { 375 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 376 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 377 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 378 } 379 PetscFree(owner); 380 starts[0] = 0; 381 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 382 count = 0; 383 for ( i=0; i<size; i++ ) { 384 if (procs[i]) { 385 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 386 comm,send_waits+count++); 387 } 388 } 389 PetscFree(starts); PetscFree(nprocs); 390 391 /* Free cache space */ 392 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 393 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 394 395 aij->svalues = svalues; aij->rvalues = rvalues; 396 aij->nsends = nsends; aij->nrecvs = nreceives; 397 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 398 aij->rmax = nmax; 399 400 PetscFunctionReturn(0); 401 } 402 extern int MatSetUpMultiply_MPIAIJ(Mat); 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 406 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 409 MPI_Status *send_status,recv_status; 410 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 411 int row,col,other_disassembled; 412 Scalar *values,val; 413 InsertMode addv = mat->insertmode; 414 415 PetscFunctionBegin; 416 /* wait on receives */ 417 while (count) { 418 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 419 /* unpack receives into our local space */ 420 values = aij->rvalues + 3*imdex*aij->rmax; 421 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 422 n = n/3; 423 for ( i=0; i<n; i++ ) { 424 row = (int) PetscReal(values[3*i]) - aij->rstart; 425 col = (int) PetscReal(values[3*i+1]); 426 val = values[3*i+2]; 427 if (col >= aij->cstart && col < aij->cend) { 428 col -= aij->cstart; 429 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 430 } else { 431 if (mat->was_assembled || mat->assembled) { 432 if (!aij->colmap) { 433 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 434 } 435 col = aij->colmap[col] - 1; 436 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 437 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 438 col = (int) PetscReal(values[3*i+1]); 439 } 440 } 441 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 442 } 443 } 444 count--; 445 } 446 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 447 448 /* wait on sends */ 449 if (aij->nsends) { 450 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 451 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 452 PetscFree(send_status); 453 } 454 PetscFree(aij->send_waits); PetscFree(aij->svalues); 455 456 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 457 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 458 459 /* determine if any processor has disassembled, if so we must 460 also disassemble ourselfs, in order that we may reassemble. */ 461 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 462 if (mat->was_assembled && !other_disassembled) { 463 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 466 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 467 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 468 } 469 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 470 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 471 472 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatZeroEntries_MPIAIJ" 478 int MatZeroEntries_MPIAIJ(Mat A) 479 { 480 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 481 int ierr; 482 483 PetscFunctionBegin; 484 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 485 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 /* the code does not do the diagonal entries correctly unless the 490 matrix is square and the column and row owerships are identical. 491 This is a BUG. The only way to fix it seems to be to access 492 aij->A and aij->B directly and not through the MatZeroRows() 493 routine. 494 */ 495 #undef __FUNC__ 496 #define __FUNC__ "MatZeroRows_MPIAIJ" 497 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 498 { 499 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 500 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 501 int *procs,*nprocs,j,found,idx,nsends,*work; 502 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 503 int *rvalues,tag = A->tag,count,base,slen,n,*source; 504 int *lens,imdex,*lrows,*values; 505 MPI_Comm comm = A->comm; 506 MPI_Request *send_waits,*recv_waits; 507 MPI_Status recv_status,*send_status; 508 IS istmp; 509 510 PetscFunctionBegin; 511 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 512 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 513 514 /* first count number of contributors to each processor */ 515 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 516 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 517 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 518 for ( i=0; i<N; i++ ) { 519 idx = rows[i]; 520 found = 0; 521 for ( j=0; j<size; j++ ) { 522 if (idx >= owners[j] && idx < owners[j+1]) { 523 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 524 } 525 } 526 if (!found) SETERRQ(1,0,"Index out of range"); 527 } 528 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 529 530 /* inform other processors of number of messages and max length*/ 531 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 532 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 533 nrecvs = work[rank]; 534 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 535 nmax = work[rank]; 536 PetscFree(work); 537 538 /* post receives: */ 539 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 540 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 541 CHKPTRQ(recv_waits); 542 for ( i=0; i<nrecvs; i++ ) { 543 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 544 } 545 546 /* do sends: 547 1) starts[i] gives the starting index in svalues for stuff going to 548 the ith processor 549 */ 550 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 551 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 552 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 553 starts[0] = 0; 554 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 555 for ( i=0; i<N; i++ ) { 556 svalues[starts[owner[i]]++] = rows[i]; 557 } 558 ISRestoreIndices(is,&rows); 559 560 starts[0] = 0; 561 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 562 count = 0; 563 for ( i=0; i<size; i++ ) { 564 if (procs[i]) { 565 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 566 } 567 } 568 PetscFree(starts); 569 570 base = owners[rank]; 571 572 /* wait on receives */ 573 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 574 source = lens + nrecvs; 575 count = nrecvs; slen = 0; 576 while (count) { 577 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 578 /* unpack receives into our local space */ 579 MPI_Get_count(&recv_status,MPI_INT,&n); 580 source[imdex] = recv_status.MPI_SOURCE; 581 lens[imdex] = n; 582 slen += n; 583 count--; 584 } 585 PetscFree(recv_waits); 586 587 /* move the data into the send scatter */ 588 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 589 count = 0; 590 for ( i=0; i<nrecvs; i++ ) { 591 values = rvalues + i*nmax; 592 for ( j=0; j<lens[i]; j++ ) { 593 lrows[count++] = values[j] - base; 594 } 595 } 596 PetscFree(rvalues); PetscFree(lens); 597 PetscFree(owner); PetscFree(nprocs); 598 599 /* actually zap the local rows */ 600 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 601 PLogObjectParent(A,istmp); 602 PetscFree(lrows); 603 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 605 ierr = ISDestroy(istmp); CHKERRQ(ierr); 606 607 /* wait on sends */ 608 if (nsends) { 609 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 610 CHKPTRQ(send_status); 611 MPI_Waitall(nsends,send_waits,send_status); 612 PetscFree(send_status); 613 } 614 PetscFree(send_waits); PetscFree(svalues); 615 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNC__ 620 #define __FUNC__ "MatMult_MPIAIJ" 621 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 624 int ierr,nt; 625 626 PetscFunctionBegin; 627 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 628 if (nt != a->n) { 629 SETERRQ(1,0,"Incompatible partition of A and xx"); 630 } 631 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 632 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 633 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 634 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 #undef __FUNC__ 639 #define __FUNC__ "MatMultAdd_MPIAIJ" 640 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 643 int ierr; 644 645 PetscFunctionBegin; 646 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 647 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 648 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 649 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ "MatMultTrans_MPIAIJ" 655 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 656 { 657 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 658 int ierr; 659 660 PetscFunctionBegin; 661 /* do nondiagonal part */ 662 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 663 /* send it on its way */ 664 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 /* do local part */ 666 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 667 /* receive remote parts: note this assumes the values are not actually */ 668 /* inserted in yy until the next line, which is true for my implementation*/ 669 /* but is not perhaps always true. */ 670 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 676 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNC__ 700 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 701 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 int ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 705 706 PetscFunctionBegin; 707 if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(1,0,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatScale_MPIAIJ" 717 int MatScale_MPIAIJ(Scalar *aa,Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatDestroy_MPIAIJ" 730 int MatDestroy_MPIAIJ(PetscObject obj) 731 { 732 Mat mat = (Mat) obj; 733 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 #if defined(USE_PETSC_LOG) 738 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 739 #endif 740 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 741 PetscFree(aij->rowners); 742 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 743 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 744 if (aij->colmap) PetscFree(aij->colmap); 745 if (aij->garray) PetscFree(aij->garray); 746 if (aij->lvec) VecDestroy(aij->lvec); 747 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 748 if (aij->rowvalues) PetscFree(aij->rowvalues); 749 PetscFree(aij); 750 PLogObjectDestroy(mat); 751 PetscHeaderDestroy(mat); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatView_MPIAIJ_Binary" 757 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 760 int ierr; 761 762 PetscFunctionBegin; 763 if (aij->size == 1) { 764 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 765 } 766 else SETERRQ(1,0,"Only uniprocessor output supported"); 767 PetscFunctionReturn(0); 768 } 769 770 #undef __FUNC__ 771 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 772 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 773 { 774 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 775 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 776 int ierr, format,shift = C->indexshift,rank; 777 FILE *fd; 778 ViewerType vtype; 779 780 PetscFunctionBegin; 781 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 782 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 783 ierr = ViewerGetFormat(viewer,&format); 784 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 785 MatInfo info; 786 int flg; 787 MPI_Comm_rank(mat->comm,&rank); 788 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 789 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 790 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 791 PetscSequentialPhaseBegin(mat->comm,1); 792 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 793 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 794 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 795 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 796 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 797 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 798 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 799 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 800 fflush(fd); 801 PetscSequentialPhaseEnd(mat->comm,1); 802 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (vtype == DRAW_VIEWER) { 810 Draw draw; 811 PetscTruth isnull; 812 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 813 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (vtype == ASCII_FILE_VIEWER) { 817 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 818 PetscSequentialPhaseBegin(mat->comm,1); 819 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 820 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 821 aij->cend); 822 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 823 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 824 fflush(fd); 825 PetscSequentialPhaseEnd(mat->comm,1); 826 } else { 827 int size = aij->size; 828 rank = aij->rank; 829 if (size == 1) { 830 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 831 } else { 832 /* assemble the entire matrix onto first processor. */ 833 Mat A; 834 Mat_SeqAIJ *Aloc; 835 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 836 Scalar *a; 837 838 if (!rank) { 839 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 840 CHKERRQ(ierr); 841 } else { 842 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 843 CHKERRQ(ierr); 844 } 845 PLogObjectParent(mat,A); 846 847 /* copy over the A part */ 848 Aloc = (Mat_SeqAIJ*) aij->A->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 852 for ( i=0; i<m; i++ ) { 853 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 854 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 855 } 856 aj = Aloc->j; 857 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 858 859 /* copy over the B part */ 860 Aloc = (Mat_SeqAIJ*) aij->B->data; 861 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 862 row = aij->rstart; 863 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 864 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 865 for ( i=0; i<m; i++ ) { 866 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 867 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 868 } 869 PetscFree(ct); 870 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 872 if (!rank) { 873 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 874 } 875 ierr = MatDestroy(A); CHKERRQ(ierr); 876 } 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNC__ 882 #define __FUNC__ "MatView_MPIAIJ" 883 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 884 { 885 Mat mat = (Mat) obj; 886 int ierr; 887 ViewerType vtype; 888 889 PetscFunctionBegin; 890 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 891 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 892 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 893 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 894 } 895 else if (vtype == BINARY_FILE_VIEWER) { 896 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 /* 902 This has to provide several versions. 903 904 2) a) use only local smoothing updating outer values only once. 905 b) local smoothing updating outer values each inner iteration 906 3) color updating out values betwen colors. 907 */ 908 #undef __FUNC__ 909 #define __FUNC__ "MatRelax_MPIAIJ" 910 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 911 double fshift,int its,Vec xx) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 914 Mat AA = mat->A, BB = mat->B; 915 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 916 Scalar *b,*x,*xs,*ls,d,*v,sum; 917 int ierr,*idx, *diag; 918 int n = mat->n, m = mat->m, i,shift = A->indexshift; 919 920 PetscFunctionBegin; 921 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 922 xs = x + shift; /* shift by one for index start of 1 */ 923 ls = ls + shift; 924 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 925 diag = A->diag; 926 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 927 if (flag & SOR_ZERO_INITIAL_GUESS) { 928 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 932 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 933 while (its--) { 934 /* go down through the rows */ 935 for ( i=0; i<m; i++ ) { 936 n = A->i[i+1] - A->i[i]; 937 idx = A->j + A->i[i] + shift; 938 v = A->a + A->i[i] + shift; 939 sum = b[i]; 940 SPARSEDENSEMDOT(sum,xs,v,idx,n); 941 d = fshift + A->a[diag[i]+shift]; 942 n = B->i[i+1] - B->i[i]; 943 idx = B->j + B->i[i] + shift; 944 v = B->a + B->i[i] + shift; 945 SPARSEDENSEMDOT(sum,ls,v,idx,n); 946 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 947 } 948 /* come up through the rows */ 949 for ( i=m-1; i>-1; i-- ) { 950 n = A->i[i+1] - A->i[i]; 951 idx = A->j + A->i[i] + shift; 952 v = A->a + A->i[i] + shift; 953 sum = b[i]; 954 SPARSEDENSEMDOT(sum,xs,v,idx,n); 955 d = fshift + A->a[diag[i]+shift]; 956 n = B->i[i+1] - B->i[i]; 957 idx = B->j + B->i[i] + shift; 958 v = B->a + B->i[i] + shift; 959 SPARSEDENSEMDOT(sum,ls,v,idx,n); 960 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 961 } 962 } 963 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 964 if (flag & SOR_ZERO_INITIAL_GUESS) { 965 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 970 while (its--) { 971 for ( i=0; i<m; i++ ) { 972 n = A->i[i+1] - A->i[i]; 973 idx = A->j + A->i[i] + shift; 974 v = A->a + A->i[i] + shift; 975 sum = b[i]; 976 SPARSEDENSEMDOT(sum,xs,v,idx,n); 977 d = fshift + A->a[diag[i]+shift]; 978 n = B->i[i+1] - B->i[i]; 979 idx = B->j + B->i[i] + shift; 980 v = B->a + B->i[i] + shift; 981 SPARSEDENSEMDOT(sum,ls,v,idx,n); 982 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 983 } 984 } 985 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 991 mat->Mvctx); CHKERRQ(ierr); 992 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 993 mat->Mvctx); CHKERRQ(ierr); 994 while (its--) { 995 for ( i=m-1; i>-1; i-- ) { 996 n = A->i[i+1] - A->i[i]; 997 idx = A->j + A->i[i] + shift; 998 v = A->a + A->i[i] + shift; 999 sum = b[i]; 1000 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1001 d = fshift + A->a[diag[i]+shift]; 1002 n = B->i[i+1] - B->i[i]; 1003 idx = B->j + B->i[i] + shift; 1004 v = B->a + B->i[i] + shift; 1005 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1006 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1007 } 1008 } 1009 } else { 1010 SETERRQ(1,0,"Option not supported"); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNC__ 1016 #define __FUNC__ "MatGetInfo_MPIAIJ" 1017 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1020 Mat A = mat->A, B = mat->B; 1021 int ierr; 1022 double isend[5], irecv[5]; 1023 1024 PetscFunctionBegin; 1025 info->rows_global = (double)mat->M; 1026 info->columns_global = (double)mat->N; 1027 info->rows_local = (double)mat->m; 1028 info->columns_local = (double)mat->N; 1029 info->block_size = 1.0; 1030 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1031 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1032 isend[3] = info->memory; isend[4] = info->mallocs; 1033 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1034 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1035 isend[3] += info->memory; isend[4] += info->mallocs; 1036 if (flag == MAT_LOCAL) { 1037 info->nz_used = isend[0]; 1038 info->nz_allocated = isend[1]; 1039 info->nz_unneeded = isend[2]; 1040 info->memory = isend[3]; 1041 info->mallocs = isend[4]; 1042 } else if (flag == MAT_GLOBAL_MAX) { 1043 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1044 info->nz_used = irecv[0]; 1045 info->nz_allocated = irecv[1]; 1046 info->nz_unneeded = irecv[2]; 1047 info->memory = irecv[3]; 1048 info->mallocs = irecv[4]; 1049 } else if (flag == MAT_GLOBAL_SUM) { 1050 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1051 info->nz_used = irecv[0]; 1052 info->nz_allocated = irecv[1]; 1053 info->nz_unneeded = irecv[2]; 1054 info->memory = irecv[3]; 1055 info->mallocs = irecv[4]; 1056 } 1057 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1058 info->fill_ratio_needed = 0; 1059 info->factor_mallocs = 0; 1060 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNC__ 1065 #define __FUNC__ "MatSetOption_MPIAIJ" 1066 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1069 1070 PetscFunctionBegin; 1071 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1072 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1073 op == MAT_COLUMNS_UNSORTED || 1074 op == MAT_COLUMNS_SORTED || 1075 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1076 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1077 MatSetOption(a->A,op); 1078 MatSetOption(a->B,op); 1079 } else if (op == MAT_ROW_ORIENTED) { 1080 a->roworiented = 1; 1081 MatSetOption(a->A,op); 1082 MatSetOption(a->B,op); 1083 } else if (op == MAT_ROWS_SORTED || 1084 op == MAT_ROWS_UNSORTED || 1085 op == MAT_SYMMETRIC || 1086 op == MAT_STRUCTURALLY_SYMMETRIC || 1087 op == MAT_YES_NEW_DIAGONALS) 1088 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1089 else if (op == MAT_COLUMN_ORIENTED) { 1090 a->roworiented = 0; 1091 MatSetOption(a->A,op); 1092 MatSetOption(a->B,op); 1093 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1094 a->donotstash = 1; 1095 } else if (op == MAT_NO_NEW_DIAGONALS){ 1096 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1097 } else { 1098 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ "MatGetSize_MPIAIJ" 1105 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1108 1109 PetscFunctionBegin; 1110 if (m) *m = mat->M; 1111 if (n) *n = mat->N; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1117 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1118 { 1119 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1120 1121 PetscFunctionBegin; 1122 if (m) *m = mat->m; 1123 if (n) *n = mat->N; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNC__ 1128 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1129 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1130 { 1131 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1132 1133 PetscFunctionBegin; 1134 *m = mat->rstart; *n = mat->rend; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1139 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatGetRow_MPIAIJ" 1143 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1146 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1147 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1148 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1149 int *cmap, *idx_p; 1150 1151 PetscFunctionBegin; 1152 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1153 mat->getrowactive = PETSC_TRUE; 1154 1155 if (!mat->rowvalues && (idx || v)) { 1156 /* 1157 allocate enough space to hold information from the longest row. 1158 */ 1159 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1160 int max = 1,m = mat->m,tmp; 1161 for ( i=0; i<m; i++ ) { 1162 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1163 if (max < tmp) { max = tmp; } 1164 } 1165 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1166 CHKPTRQ(mat->rowvalues); 1167 mat->rowindices = (int *) (mat->rowvalues + max); 1168 } 1169 1170 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1171 lrow = row - rstart; 1172 1173 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1174 if (!v) {pvA = 0; pvB = 0;} 1175 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1176 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1177 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1178 nztot = nzA + nzB; 1179 1180 cmap = mat->garray; 1181 if (v || idx) { 1182 if (nztot) { 1183 /* Sort by increasing column numbers, assuming A and B already sorted */ 1184 int imark = -1; 1185 if (v) { 1186 *v = v_p = mat->rowvalues; 1187 for ( i=0; i<nzB; i++ ) { 1188 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1189 else break; 1190 } 1191 imark = i; 1192 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1193 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1194 } 1195 if (idx) { 1196 *idx = idx_p = mat->rowindices; 1197 if (imark > -1) { 1198 for ( i=0; i<imark; i++ ) { 1199 idx_p[i] = cmap[cworkB[i]]; 1200 } 1201 } else { 1202 for ( i=0; i<nzB; i++ ) { 1203 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1204 else break; 1205 } 1206 imark = i; 1207 } 1208 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1209 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1210 } 1211 } 1212 else { 1213 if (idx) *idx = 0; 1214 if (v) *v = 0; 1215 } 1216 } 1217 *nz = nztot; 1218 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1219 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1225 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1228 1229 PetscFunctionBegin; 1230 if (aij->getrowactive == PETSC_FALSE) { 1231 SETERRQ(1,0,"MatGetRow not called"); 1232 } 1233 aij->getrowactive = PETSC_FALSE; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNC__ 1238 #define __FUNC__ "MatNorm_MPIAIJ" 1239 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1240 { 1241 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1242 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1243 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1244 double sum = 0.0; 1245 Scalar *v; 1246 1247 PetscFunctionBegin; 1248 if (aij->size == 1) { 1249 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1250 } else { 1251 if (type == NORM_FROBENIUS) { 1252 v = amat->a; 1253 for (i=0; i<amat->nz; i++ ) { 1254 #if defined(USE_PETSC_COMPLEX) 1255 sum += real(conj(*v)*(*v)); v++; 1256 #else 1257 sum += (*v)*(*v); v++; 1258 #endif 1259 } 1260 v = bmat->a; 1261 for (i=0; i<bmat->nz; i++ ) { 1262 #if defined(USE_PETSC_COMPLEX) 1263 sum += real(conj(*v)*(*v)); v++; 1264 #else 1265 sum += (*v)*(*v); v++; 1266 #endif 1267 } 1268 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1269 *norm = sqrt(*norm); 1270 } else if (type == NORM_1) { /* max column norm */ 1271 double *tmp, *tmp2; 1272 int *jj, *garray = aij->garray; 1273 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1274 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1275 PetscMemzero(tmp,aij->N*sizeof(double)); 1276 *norm = 0.0; 1277 v = amat->a; jj = amat->j; 1278 for ( j=0; j<amat->nz; j++ ) { 1279 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1280 } 1281 v = bmat->a; jj = bmat->j; 1282 for ( j=0; j<bmat->nz; j++ ) { 1283 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1284 } 1285 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1286 for ( j=0; j<aij->N; j++ ) { 1287 if (tmp2[j] > *norm) *norm = tmp2[j]; 1288 } 1289 PetscFree(tmp); PetscFree(tmp2); 1290 } else if (type == NORM_INFINITY) { /* max row norm */ 1291 double ntemp = 0.0; 1292 for ( j=0; j<amat->m; j++ ) { 1293 v = amat->a + amat->i[j] + shift; 1294 sum = 0.0; 1295 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1296 sum += PetscAbsScalar(*v); v++; 1297 } 1298 v = bmat->a + bmat->i[j] + shift; 1299 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1300 sum += PetscAbsScalar(*v); v++; 1301 } 1302 if (sum > ntemp) ntemp = sum; 1303 } 1304 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1305 } 1306 else { 1307 SETERRQ(1,0,"No support for two norm"); 1308 } 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatTranspose_MPIAIJ" 1315 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1316 { 1317 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1318 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1319 int ierr,shift = Aloc->indexshift; 1320 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1321 Mat B; 1322 Scalar *array; 1323 1324 PetscFunctionBegin; 1325 if (matout == PETSC_NULL && M != N) { 1326 SETERRQ(1,0,"Square matrix only for in-place"); 1327 } 1328 1329 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1330 1331 /* copy over the A part */ 1332 Aloc = (Mat_SeqAIJ*) a->A->data; 1333 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1334 row = a->rstart; 1335 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1336 for ( i=0; i<m; i++ ) { 1337 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1338 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1339 } 1340 aj = Aloc->j; 1341 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1342 1343 /* copy over the B part */ 1344 Aloc = (Mat_SeqAIJ*) a->B->data; 1345 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1346 row = a->rstart; 1347 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1348 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1349 for ( i=0; i<m; i++ ) { 1350 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1351 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1352 } 1353 PetscFree(ct); 1354 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1355 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1356 if (matout != PETSC_NULL) { 1357 *matout = B; 1358 } else { 1359 /* This isn't really an in-place transpose .... but free data structures from a */ 1360 PetscFree(a->rowners); 1361 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1362 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1363 if (a->colmap) PetscFree(a->colmap); 1364 if (a->garray) PetscFree(a->garray); 1365 if (a->lvec) VecDestroy(a->lvec); 1366 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1367 PetscFree(a); 1368 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1369 PetscHeaderDestroy(B); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNC__ 1375 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1376 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1377 { 1378 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1379 Mat a = aij->A, b = aij->B; 1380 int ierr,s1,s2,s3; 1381 1382 PetscFunctionBegin; 1383 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1384 if (rr) { 1385 s3 = aij->n; 1386 VecGetLocalSize_Fast(rr,s1); 1387 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1388 /* Overlap communication with computation. */ 1389 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1390 } 1391 if (ll) { 1392 VecGetLocalSize_Fast(ll,s1); 1393 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1394 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1395 } 1396 /* scale the diagonal block */ 1397 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1398 1399 if (rr) { 1400 /* Do a scatter end and then right scale the off-diagonal block */ 1401 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1402 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1403 } 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 1409 extern int MatPrintHelp_SeqAIJ(Mat); 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1412 int MatPrintHelp_MPIAIJ(Mat A) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 if (!a->rank) { 1419 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1427 { 1428 PetscFunctionBegin; 1429 *bs = 1; 1430 PetscFunctionReturn(0); 1431 } 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1434 int MatSetUnfactored_MPIAIJ(Mat A) 1435 { 1436 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ "MatEqual_MPIAIJ" 1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1447 { 1448 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1449 Mat a, b, c, d; 1450 PetscTruth flg; 1451 int ierr; 1452 1453 PetscFunctionBegin; 1454 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1455 a = matA->A; b = matA->B; 1456 c = matB->A; d = matB->B; 1457 1458 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1459 if (flg == PETSC_TRUE) { 1460 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1461 } 1462 MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1467 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1468 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1469 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1470 /* -------------------------------------------------------------------*/ 1471 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1472 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1473 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1474 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1475 0,0, 1476 0,0, 1477 0,0, 1478 MatRelax_MPIAIJ, 1479 MatTranspose_MPIAIJ, 1480 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1481 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1482 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1483 0, 1484 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1485 0,0,0,0, 1486 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1487 0,0, 1488 0,0,MatConvertSameType_MPIAIJ,0,0, 1489 0,0,0, 1490 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1491 MatPrintHelp_MPIAIJ, 1492 MatScale_MPIAIJ,0,0,0, 1493 MatGetBlockSize_MPIAIJ,0,0,0,0, 1494 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1495 1496 1497 #undef __FUNC__ 1498 #define __FUNC__ "MatCreateMPIAIJ" 1499 /*@C 1500 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1501 (the default parallel PETSc format). For good matrix assembly performance 1502 the user should preallocate the matrix storage by setting the parameters 1503 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1504 performance can be increased by more than a factor of 50. 1505 1506 Input Parameters: 1507 . comm - MPI communicator 1508 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1509 This value should be the same as the local size used in creating the 1510 y vector for the matrix-vector product y = Ax. 1511 . n - This value should be the same as the local size used in creating the 1512 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1513 calculated if N is given) For square matrices n is almost always m. 1514 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1515 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1516 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1517 (same for all local rows) 1518 . d_nzz - array containing the number of nonzeros in the various rows of the 1519 diagonal portion of the local submatrix (possibly different for each row) 1520 or PETSC_NULL. You must leave room for the diagonal entry even if 1521 it is zero. 1522 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1523 submatrix (same for all local rows). 1524 . o_nzz - array containing the number of nonzeros in the various rows of the 1525 off-diagonal portion of the local submatrix (possibly different for 1526 each row) or PETSC_NULL. 1527 1528 Output Parameter: 1529 . A - the matrix 1530 1531 Notes: 1532 The AIJ format (also called the Yale sparse matrix format or 1533 compressed row storage), is fully compatible with standard Fortran 77 1534 storage. That is, the stored row and column indices can begin at 1535 either one (as in Fortran) or zero. See the users manual for details. 1536 1537 The user MUST specify either the local or global matrix dimensions 1538 (possibly both). 1539 1540 By default, this format uses inodes (identical nodes) when possible. 1541 We search for consecutive rows with the same nonzero structure, thereby 1542 reusing matrix information to achieve increased efficiency. 1543 1544 Options Database Keys: 1545 $ -mat_aij_no_inode - Do not use inodes 1546 $ -mat_aij_inode_limit <limit> - Set inode limit. 1547 $ (max limit=5) 1548 $ -mat_aij_oneindex - Internally use indexing starting at 1 1549 $ rather than 0. Note: When calling MatSetValues(), 1550 $ the user still MUST index entries starting at 0! 1551 1552 Storage Information: 1553 For a square global matrix we define each processor's diagonal portion 1554 to be its local rows and the corresponding columns (a square submatrix); 1555 each processor's off-diagonal portion encompasses the remainder of the 1556 local matrix (a rectangular submatrix). 1557 1558 The user can specify preallocated storage for the diagonal part of 1559 the local submatrix with either d_nz or d_nnz (not both). Set 1560 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1561 memory allocation. Likewise, specify preallocated storage for the 1562 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1563 1564 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1565 the figure below we depict these three local rows and all columns (0-11). 1566 1567 $ 0 1 2 3 4 5 6 7 8 9 10 11 1568 $ ------------------- 1569 $ row 3 | o o o d d d o o o o o o 1570 $ row 4 | o o o d d d o o o o o o 1571 $ row 5 | o o o d d d o o o o o o 1572 $ ------------------- 1573 $ 1574 1575 Thus, any entries in the d locations are stored in the d (diagonal) 1576 submatrix, and any entries in the o locations are stored in the 1577 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1578 stored simply in the MATSEQAIJ format for compressed row storage. 1579 1580 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1581 and o_nz should indicate the number of nonzeros per row in the o matrix. 1582 In general, for PDE problems in which most nonzeros are near the diagonal, 1583 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1584 or you will get TERRIBLE performance; see the users' manual chapter on 1585 matrices. 1586 1587 .keywords: matrix, aij, compressed row, sparse, parallel 1588 1589 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1590 @*/ 1591 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1592 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1593 { 1594 Mat B; 1595 Mat_MPIAIJ *b; 1596 int ierr, i,sum[2],work[2],size; 1597 1598 PetscFunctionBegin; 1599 MPI_Comm_size(comm,&size); 1600 if (size == 1) { 1601 if (M == PETSC_DECIDE) M = m; 1602 if (N == PETSC_DECIDE) N = n; 1603 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 *A = 0; 1608 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1609 PLogObjectCreate(B); 1610 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1611 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1612 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1613 B->destroy = MatDestroy_MPIAIJ; 1614 B->view = MatView_MPIAIJ; 1615 B->factor = 0; 1616 B->assembled = PETSC_FALSE; 1617 B->mapping = 0; 1618 1619 B->insertmode = NOT_SET_VALUES; 1620 b->size = size; 1621 MPI_Comm_rank(comm,&b->rank); 1622 1623 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1624 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1625 } 1626 1627 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1628 work[0] = m; work[1] = n; 1629 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1630 if (M == PETSC_DECIDE) M = sum[0]; 1631 if (N == PETSC_DECIDE) N = sum[1]; 1632 } 1633 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1634 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1635 b->m = m; B->m = m; 1636 b->n = n; B->n = n; 1637 b->N = N; B->N = N; 1638 b->M = M; B->M = M; 1639 1640 /* build local table of row and column ownerships */ 1641 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1642 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1643 b->cowners = b->rowners + b->size + 2; 1644 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1645 b->rowners[0] = 0; 1646 for ( i=2; i<=b->size; i++ ) { 1647 b->rowners[i] += b->rowners[i-1]; 1648 } 1649 b->rstart = b->rowners[b->rank]; 1650 b->rend = b->rowners[b->rank+1]; 1651 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1652 b->cowners[0] = 0; 1653 for ( i=2; i<=b->size; i++ ) { 1654 b->cowners[i] += b->cowners[i-1]; 1655 } 1656 b->cstart = b->cowners[b->rank]; 1657 b->cend = b->cowners[b->rank+1]; 1658 1659 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1660 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1661 PLogObjectParent(B,b->A); 1662 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1663 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1664 PLogObjectParent(B,b->B); 1665 1666 /* build cache for off array entries formed */ 1667 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1668 b->donotstash = 0; 1669 b->colmap = 0; 1670 b->garray = 0; 1671 b->roworiented = 1; 1672 1673 /* stuff used for matrix vector multiply */ 1674 b->lvec = 0; 1675 b->Mvctx = 0; 1676 1677 /* stuff for MatGetRow() */ 1678 b->rowindices = 0; 1679 b->rowvalues = 0; 1680 b->getrowactive = PETSC_FALSE; 1681 1682 *A = B; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNC__ 1687 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1688 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1689 { 1690 Mat mat; 1691 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1692 int ierr, len=0, flg; 1693 1694 PetscFunctionBegin; 1695 *newmat = 0; 1696 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1697 PLogObjectCreate(mat); 1698 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1699 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1700 mat->destroy = MatDestroy_MPIAIJ; 1701 mat->view = MatView_MPIAIJ; 1702 mat->factor = matin->factor; 1703 mat->assembled = PETSC_TRUE; 1704 1705 a->m = mat->m = oldmat->m; 1706 a->n = mat->n = oldmat->n; 1707 a->M = mat->M = oldmat->M; 1708 a->N = mat->N = oldmat->N; 1709 1710 a->rstart = oldmat->rstart; 1711 a->rend = oldmat->rend; 1712 a->cstart = oldmat->cstart; 1713 a->cend = oldmat->cend; 1714 a->size = oldmat->size; 1715 a->rank = oldmat->rank; 1716 mat->insertmode = NOT_SET_VALUES; 1717 a->rowvalues = 0; 1718 a->getrowactive = PETSC_FALSE; 1719 1720 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1721 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1722 a->cowners = a->rowners + a->size + 2; 1723 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1724 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1725 if (oldmat->colmap) { 1726 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1727 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1728 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1729 } else a->colmap = 0; 1730 if (oldmat->garray) { 1731 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1732 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1733 PLogObjectMemory(mat,len*sizeof(int)); 1734 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1735 } else a->garray = 0; 1736 1737 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1738 PLogObjectParent(mat,a->lvec); 1739 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1740 PLogObjectParent(mat,a->Mvctx); 1741 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1742 PLogObjectParent(mat,a->A); 1743 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1744 PLogObjectParent(mat,a->B); 1745 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1746 if (flg) { 1747 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1748 } 1749 *newmat = mat; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #include "sys.h" 1754 1755 #undef __FUNC__ 1756 #define __FUNC__ "MatLoad_MPIAIJ" 1757 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1758 { 1759 Mat A; 1760 Scalar *vals,*svals; 1761 MPI_Comm comm = ((PetscObject)viewer)->comm; 1762 MPI_Status status; 1763 int i, nz, ierr, j,rstart, rend, fd; 1764 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1765 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1766 int tag = ((PetscObject)viewer)->tag; 1767 1768 PetscFunctionBegin; 1769 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1770 if (!rank) { 1771 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1772 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1773 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1774 } 1775 1776 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1777 M = header[1]; N = header[2]; 1778 /* determine ownership of all rows */ 1779 m = M/size + ((M % size) > rank); 1780 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1781 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1782 rowners[0] = 0; 1783 for ( i=2; i<=size; i++ ) { 1784 rowners[i] += rowners[i-1]; 1785 } 1786 rstart = rowners[rank]; 1787 rend = rowners[rank+1]; 1788 1789 /* distribute row lengths to all processors */ 1790 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1791 offlens = ourlens + (rend-rstart); 1792 if (!rank) { 1793 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1794 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1795 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1796 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1797 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1798 PetscFree(sndcounts); 1799 } else { 1800 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1801 } 1802 1803 if (!rank) { 1804 /* calculate the number of nonzeros on each processor */ 1805 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1806 PetscMemzero(procsnz,size*sizeof(int)); 1807 for ( i=0; i<size; i++ ) { 1808 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1809 procsnz[i] += rowlengths[j]; 1810 } 1811 } 1812 PetscFree(rowlengths); 1813 1814 /* determine max buffer needed and allocate it */ 1815 maxnz = 0; 1816 for ( i=0; i<size; i++ ) { 1817 maxnz = PetscMax(maxnz,procsnz[i]); 1818 } 1819 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1820 1821 /* read in my part of the matrix column indices */ 1822 nz = procsnz[0]; 1823 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1824 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1825 1826 /* read in every one elses and ship off */ 1827 for ( i=1; i<size; i++ ) { 1828 nz = procsnz[i]; 1829 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1830 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1831 } 1832 PetscFree(cols); 1833 } else { 1834 /* determine buffer space needed for message */ 1835 nz = 0; 1836 for ( i=0; i<m; i++ ) { 1837 nz += ourlens[i]; 1838 } 1839 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1840 1841 /* receive message of column indices*/ 1842 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1843 MPI_Get_count(&status,MPI_INT,&maxnz); 1844 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1845 } 1846 1847 /* loop over local rows, determining number of off diagonal entries */ 1848 PetscMemzero(offlens,m*sizeof(int)); 1849 jj = 0; 1850 for ( i=0; i<m; i++ ) { 1851 for ( j=0; j<ourlens[i]; j++ ) { 1852 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1853 jj++; 1854 } 1855 } 1856 1857 /* create our matrix */ 1858 for ( i=0; i<m; i++ ) { 1859 ourlens[i] -= offlens[i]; 1860 } 1861 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1862 A = *newmat; 1863 MatSetOption(A,MAT_COLUMNS_SORTED); 1864 for ( i=0; i<m; i++ ) { 1865 ourlens[i] += offlens[i]; 1866 } 1867 1868 if (!rank) { 1869 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1870 1871 /* read in my part of the matrix numerical values */ 1872 nz = procsnz[0]; 1873 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1874 1875 /* insert into matrix */ 1876 jj = rstart; 1877 smycols = mycols; 1878 svals = vals; 1879 for ( i=0; i<m; i++ ) { 1880 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1881 smycols += ourlens[i]; 1882 svals += ourlens[i]; 1883 jj++; 1884 } 1885 1886 /* read in other processors and ship out */ 1887 for ( i=1; i<size; i++ ) { 1888 nz = procsnz[i]; 1889 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1890 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1891 } 1892 PetscFree(procsnz); 1893 } else { 1894 /* receive numeric values */ 1895 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1896 1897 /* receive message of values*/ 1898 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1899 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1900 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1901 1902 /* insert into matrix */ 1903 jj = rstart; 1904 smycols = mycols; 1905 svals = vals; 1906 for ( i=0; i<m; i++ ) { 1907 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1908 smycols += ourlens[i]; 1909 svals += ourlens[i]; 1910 jj++; 1911 } 1912 } 1913 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1914 1915 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1916 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919