1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = ((PetscObject)A)->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 943 944 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 945 if (!rank) { 946 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 947 } else { 948 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 949 } 950 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 951 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 952 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 953 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 954 955 /* copy over the A part */ 956 Aloc = (Mat_SeqAIJ*)aij->A->data; 957 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 958 row = mat->rmap.rstart; 959 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 963 } 964 aj = Aloc->j; 965 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 966 967 /* copy over the B part */ 968 Aloc = (Mat_SeqAIJ*)aij->B->data; 969 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 970 row = mat->rmap.rstart; 971 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 972 ct = cols; 973 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 974 for (i=0; i<m; i++) { 975 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 976 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 977 } 978 ierr = PetscFree(ct);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 /* 982 Everyone has to call to draw the matrix since the graphics waits are 983 synchronized across all processors that share the PetscDraw object 984 */ 985 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 986 if (!rank) { 987 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 988 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 989 } 990 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 991 ierr = MatDestroy(A);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatView_MPIAIJ" 998 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 999 { 1000 PetscErrorCode ierr; 1001 PetscTruth iascii,isdraw,issocket,isbinary; 1002 1003 PetscFunctionBegin; 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1008 if (iascii || isdraw || isbinary || issocket) { 1009 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1010 } else { 1011 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRelax_MPIAIJ" 1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1021 PetscErrorCode ierr; 1022 Vec bb1; 1023 1024 PetscFunctionBegin; 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1043 } 1044 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1047 its--; 1048 } 1049 while (its--) { 1050 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1075 } 1076 } else { 1077 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1078 } 1079 1080 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatPermute_MPIAIJ" 1086 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1087 { 1088 MPI_Comm comm,pcomm; 1089 PetscInt first,local_size,nrows,*rows; 1090 int ntids; 1091 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1096 /* make a collective version of 'rowp' */ 1097 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1098 if (pcomm==comm) { 1099 crowp = rowp; 1100 } else { 1101 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1102 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1103 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1104 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1105 } 1106 /* collect the global row permutation and invert it */ 1107 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1108 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1109 if (pcomm!=comm) { 1110 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1111 } 1112 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1113 /* get the local target indices */ 1114 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1115 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1117 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1118 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1119 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1120 /* the column permutation is so much easier; 1121 make a local version of 'colp' and invert it */ 1122 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1123 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1124 if (ntids==1) { 1125 lcolp = colp; 1126 } else { 1127 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1128 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1129 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1130 } 1131 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1132 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1133 if (ntids>1) { 1134 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1135 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1136 } 1137 /* now we just get the submatrix */ 1138 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1139 /* clean up */ 1140 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1141 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1147 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1148 { 1149 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1150 Mat A = mat->A,B = mat->B; 1151 PetscErrorCode ierr; 1152 PetscReal isend[5],irecv[5]; 1153 1154 PetscFunctionBegin; 1155 info->block_size = 1.0; 1156 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1157 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1158 isend[3] = info->memory; isend[4] = info->mallocs; 1159 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1161 isend[3] += info->memory; isend[4] += info->mallocs; 1162 if (flag == MAT_LOCAL) { 1163 info->nz_used = isend[0]; 1164 info->nz_allocated = isend[1]; 1165 info->nz_unneeded = isend[2]; 1166 info->memory = isend[3]; 1167 info->mallocs = isend[4]; 1168 } else if (flag == MAT_GLOBAL_MAX) { 1169 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1170 info->nz_used = irecv[0]; 1171 info->nz_allocated = irecv[1]; 1172 info->nz_unneeded = irecv[2]; 1173 info->memory = irecv[3]; 1174 info->mallocs = irecv[4]; 1175 } else if (flag == MAT_GLOBAL_SUM) { 1176 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1177 info->nz_used = irecv[0]; 1178 info->nz_allocated = irecv[1]; 1179 info->nz_unneeded = irecv[2]; 1180 info->memory = irecv[3]; 1181 info->mallocs = irecv[4]; 1182 } 1183 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1184 info->fill_ratio_needed = 0; 1185 info->factor_mallocs = 0; 1186 info->rows_global = (double)matin->rmap.N; 1187 info->columns_global = (double)matin->cmap.N; 1188 info->rows_local = (double)matin->rmap.n; 1189 info->columns_local = (double)matin->cmap.N; 1190 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSetOption_MPIAIJ" 1196 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 switch (op) { 1203 case MAT_NEW_NONZERO_LOCATIONS: 1204 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1205 case MAT_KEEP_ZEROED_ROWS: 1206 case MAT_NEW_NONZERO_LOCATION_ERR: 1207 case MAT_USE_INODES: 1208 case MAT_IGNORE_ZERO_ENTRIES: 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_ROW_ORIENTED: 1213 a->roworiented = flg; 1214 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1216 break; 1217 case MAT_NEW_DIAGONALS: 1218 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1219 break; 1220 case MAT_IGNORE_OFF_PROC_ENTRIES: 1221 a->donotstash = PETSC_TRUE; 1222 break; 1223 case MAT_SYMMETRIC: 1224 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1225 break; 1226 case MAT_STRUCTURALLY_SYMMETRIC: 1227 case MAT_HERMITIAN: 1228 case MAT_SYMMETRY_ETERNAL: 1229 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1230 break; 1231 default: 1232 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatGetRow_MPIAIJ" 1239 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1240 { 1241 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1242 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1243 PetscErrorCode ierr; 1244 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1245 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1246 PetscInt *cmap,*idx_p; 1247 1248 PetscFunctionBegin; 1249 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1250 mat->getrowactive = PETSC_TRUE; 1251 1252 if (!mat->rowvalues && (idx || v)) { 1253 /* 1254 allocate enough space to hold information from the longest row. 1255 */ 1256 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1257 PetscInt max = 1,tmp; 1258 for (i=0; i<matin->rmap.n; i++) { 1259 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1260 if (max < tmp) { max = tmp; } 1261 } 1262 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1263 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1264 } 1265 1266 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1267 lrow = row - rstart; 1268 1269 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1270 if (!v) {pvA = 0; pvB = 0;} 1271 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1272 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1274 nztot = nzA + nzB; 1275 1276 cmap = mat->garray; 1277 if (v || idx) { 1278 if (nztot) { 1279 /* Sort by increasing column numbers, assuming A and B already sorted */ 1280 PetscInt imark = -1; 1281 if (v) { 1282 *v = v_p = mat->rowvalues; 1283 for (i=0; i<nzB; i++) { 1284 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1285 else break; 1286 } 1287 imark = i; 1288 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1289 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1290 } 1291 if (idx) { 1292 *idx = idx_p = mat->rowindices; 1293 if (imark > -1) { 1294 for (i=0; i<imark; i++) { 1295 idx_p[i] = cmap[cworkB[i]]; 1296 } 1297 } else { 1298 for (i=0; i<nzB; i++) { 1299 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1300 else break; 1301 } 1302 imark = i; 1303 } 1304 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1305 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1306 } 1307 } else { 1308 if (idx) *idx = 0; 1309 if (v) *v = 0; 1310 } 1311 } 1312 *nz = nztot; 1313 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1314 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1320 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1321 { 1322 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1323 1324 PetscFunctionBegin; 1325 if (!aij->getrowactive) { 1326 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1327 } 1328 aij->getrowactive = PETSC_FALSE; 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatNorm_MPIAIJ" 1334 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1335 { 1336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1337 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1338 PetscErrorCode ierr; 1339 PetscInt i,j,cstart = mat->cmap.rstart; 1340 PetscReal sum = 0.0; 1341 PetscScalar *v; 1342 1343 PetscFunctionBegin; 1344 if (aij->size == 1) { 1345 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1346 } else { 1347 if (type == NORM_FROBENIUS) { 1348 v = amat->a; 1349 for (i=0; i<amat->nz; i++) { 1350 #if defined(PETSC_USE_COMPLEX) 1351 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1352 #else 1353 sum += (*v)*(*v); v++; 1354 #endif 1355 } 1356 v = bmat->a; 1357 for (i=0; i<bmat->nz; i++) { 1358 #if defined(PETSC_USE_COMPLEX) 1359 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1360 #else 1361 sum += (*v)*(*v); v++; 1362 #endif 1363 } 1364 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1365 *norm = sqrt(*norm); 1366 } else if (type == NORM_1) { /* max column norm */ 1367 PetscReal *tmp,*tmp2; 1368 PetscInt *jj,*garray = aij->garray; 1369 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1370 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1371 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1372 *norm = 0.0; 1373 v = amat->a; jj = amat->j; 1374 for (j=0; j<amat->nz; j++) { 1375 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1376 } 1377 v = bmat->a; jj = bmat->j; 1378 for (j=0; j<bmat->nz; j++) { 1379 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1380 } 1381 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1382 for (j=0; j<mat->cmap.N; j++) { 1383 if (tmp2[j] > *norm) *norm = tmp2[j]; 1384 } 1385 ierr = PetscFree(tmp);CHKERRQ(ierr); 1386 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1387 } else if (type == NORM_INFINITY) { /* max row norm */ 1388 PetscReal ntemp = 0.0; 1389 for (j=0; j<aij->A->rmap.n; j++) { 1390 v = amat->a + amat->i[j]; 1391 sum = 0.0; 1392 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1393 sum += PetscAbsScalar(*v); v++; 1394 } 1395 v = bmat->a + bmat->i[j]; 1396 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1397 sum += PetscAbsScalar(*v); v++; 1398 } 1399 if (sum > ntemp) ntemp = sum; 1400 } 1401 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1402 } else { 1403 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1404 } 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatTranspose_MPIAIJ" 1411 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1412 { 1413 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1414 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1415 PetscErrorCode ierr; 1416 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1417 PetscInt cstart=A->cmap.rstart,ncol; 1418 Mat B; 1419 PetscScalar *array; 1420 1421 PetscFunctionBegin; 1422 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1423 1424 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1425 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1426 ai = Aloc->i; aj = Aloc->j; 1427 bi = Bloc->i; bj = Bloc->j; 1428 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1429 cols = d_nnz + na + 1; /* work space to be used by B part */ 1430 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1431 for (i=0; i<ai[ma]; i++){ 1432 d_nnz[aj[i]] ++; 1433 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1434 } 1435 1436 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1437 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1438 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1439 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1440 1441 /* copy over the A part */ 1442 array = Aloc->a; 1443 row = A->rmap.rstart; 1444 for (i=0; i<ma; i++) { 1445 ncol = ai[i+1]-ai[i]; 1446 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1447 row++; array += ncol; aj += ncol; 1448 } 1449 aj = Aloc->j; 1450 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1451 1452 /* copy over the B part */ 1453 array = Bloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1456 for (i=0; i<mb; i++) { 1457 ncol = bi[i+1]-bi[i]; 1458 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1459 row++; array += ncol; cols += ncol; 1460 } 1461 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1462 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 if (matout) { 1465 *matout = B; 1466 } else { 1467 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1474 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1475 { 1476 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1477 Mat a = aij->A,b = aij->B; 1478 PetscErrorCode ierr; 1479 PetscInt s1,s2,s3; 1480 1481 PetscFunctionBegin; 1482 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1483 if (rr) { 1484 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1485 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1486 /* Overlap communication with computation. */ 1487 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 } 1489 if (ll) { 1490 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1491 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1492 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1493 } 1494 /* scale the diagonal block */ 1495 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1496 1497 if (rr) { 1498 /* Do a scatter end and then right scale the off-diagonal block */ 1499 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1501 } 1502 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1508 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1509 { 1510 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 1513 PetscFunctionBegin; 1514 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1515 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1520 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1521 { 1522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatEqual_MPIAIJ" 1532 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1533 { 1534 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1535 Mat a,b,c,d; 1536 PetscTruth flg; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 a = matA->A; b = matA->B; 1541 c = matB->A; d = matB->B; 1542 1543 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1544 if (flg) { 1545 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1546 } 1547 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatCopy_MPIAIJ" 1553 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1554 { 1555 PetscErrorCode ierr; 1556 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1557 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1558 1559 PetscFunctionBegin; 1560 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1561 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1562 /* because of the column compression in the off-processor part of the matrix a->B, 1563 the number of columns in a->B and b->B may be different, hence we cannot call 1564 the MatCopy() directly on the two parts. If need be, we can provide a more 1565 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1566 then copying the submatrices */ 1567 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1568 } else { 1569 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1570 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1577 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1578 { 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #include "petscblaslapack.h" 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatAXPY_MPIAIJ" 1589 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1590 { 1591 PetscErrorCode ierr; 1592 PetscInt i; 1593 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1594 PetscBLASInt bnz,one=1; 1595 Mat_SeqAIJ *x,*y; 1596 1597 PetscFunctionBegin; 1598 if (str == SAME_NONZERO_PATTERN) { 1599 PetscScalar alpha = a; 1600 x = (Mat_SeqAIJ *)xx->A->data; 1601 y = (Mat_SeqAIJ *)yy->A->data; 1602 bnz = PetscBLASIntCast(x->nz); 1603 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1604 x = (Mat_SeqAIJ *)xx->B->data; 1605 y = (Mat_SeqAIJ *)yy->B->data; 1606 bnz = PetscBLASIntCast(x->nz); 1607 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1608 } else if (str == SUBSET_NONZERO_PATTERN) { 1609 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1610 1611 x = (Mat_SeqAIJ *)xx->B->data; 1612 y = (Mat_SeqAIJ *)yy->B->data; 1613 if (y->xtoy && y->XtoY != xx->B) { 1614 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1615 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1616 } 1617 if (!y->xtoy) { /* get xtoy */ 1618 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1619 y->XtoY = xx->B; 1620 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1621 } 1622 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1623 } else { 1624 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatConjugate_MPIAIJ" 1633 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1634 { 1635 #if defined(PETSC_USE_COMPLEX) 1636 PetscErrorCode ierr; 1637 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1638 1639 PetscFunctionBegin; 1640 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1641 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1642 #else 1643 PetscFunctionBegin; 1644 #endif 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatRealPart_MPIAIJ" 1650 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1651 { 1652 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1653 PetscErrorCode ierr; 1654 1655 PetscFunctionBegin; 1656 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1657 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1663 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1670 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #ifdef PETSC_HAVE_PBGL 1675 1676 #include <boost/parallel/mpi/bsp_process_group.hpp> 1677 #include <boost/graph/distributed/ilu_default_graph.hpp> 1678 #include <boost/graph/distributed/ilu_0_block.hpp> 1679 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1680 #include <boost/graph/distributed/petsc/interface.hpp> 1681 #include <boost/multi_array.hpp> 1682 #include <boost/parallel/distributed_property_map.hpp> 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1686 /* 1687 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1688 */ 1689 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1690 { 1691 namespace petsc = boost::distributed::petsc; 1692 1693 namespace graph_dist = boost::graph::distributed; 1694 using boost::graph::distributed::ilu_default::process_group_type; 1695 using boost::graph::ilu_permuted; 1696 1697 PetscTruth row_identity, col_identity; 1698 PetscContainer c; 1699 PetscInt m, n, M, N; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1704 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1705 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1706 if (!row_identity || !col_identity) { 1707 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1708 } 1709 1710 process_group_type pg; 1711 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1712 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1713 lgraph_type& level_graph = *lgraph_p; 1714 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1715 1716 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1717 ilu_permuted(level_graph); 1718 1719 /* put together the new matrix */ 1720 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1721 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1722 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1723 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1724 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1725 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1726 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 (*fact)->factor = FACTOR_LU; 1728 1729 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1730 ierr = PetscContainerSetPointer(c, lgraph_p); 1731 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1737 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1738 { 1739 PetscFunctionBegin; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSolve_MPIAIJ" 1745 /* 1746 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1747 */ 1748 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1749 { 1750 namespace graph_dist = boost::graph::distributed; 1751 1752 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1753 lgraph_type* lgraph_p; 1754 PetscContainer c; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1759 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1760 ierr = VecCopy(b, x); CHKERRQ(ierr); 1761 1762 PetscScalar* array_x; 1763 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1764 PetscInt sx; 1765 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1766 1767 PetscScalar* array_b; 1768 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1769 PetscInt sb; 1770 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1771 1772 lgraph_type& level_graph = *lgraph_p; 1773 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1774 1775 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1776 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1777 ref_x(array_x, boost::extents[num_vertices(graph)]); 1778 1779 typedef boost::iterator_property_map<array_ref_type::iterator, 1780 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1781 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1782 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1783 1784 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1785 1786 PetscFunctionReturn(0); 1787 } 1788 #endif 1789 1790 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1791 PetscInt nzlocal,nsends,nrecvs; 1792 PetscMPIInt *send_rank; 1793 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1794 PetscScalar *sbuf_a,**rbuf_a; 1795 PetscErrorCode (*MatDestroy)(Mat); 1796 } Mat_Redundant; 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1800 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1801 { 1802 PetscErrorCode ierr; 1803 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1804 PetscInt i; 1805 1806 PetscFunctionBegin; 1807 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1808 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1809 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1810 for (i=0; i<redund->nrecvs; i++){ 1811 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1812 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1813 } 1814 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1815 ierr = PetscFree(redund);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatDestroy_MatRedundant" 1821 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1822 { 1823 PetscErrorCode ierr; 1824 PetscContainer container; 1825 Mat_Redundant *redund=PETSC_NULL; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1829 if (container) { 1830 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1831 } else { 1832 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1833 } 1834 A->ops->destroy = redund->MatDestroy; 1835 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1836 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1837 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1843 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1844 { 1845 PetscMPIInt rank,size; 1846 MPI_Comm comm=((PetscObject)mat)->comm; 1847 PetscErrorCode ierr; 1848 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1849 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1850 PetscInt *rowrange=mat->rmap.range; 1851 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1852 Mat A=aij->A,B=aij->B,C=*matredundant; 1853 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1854 PetscScalar *sbuf_a; 1855 PetscInt nzlocal=a->nz+b->nz; 1856 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1857 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1858 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1859 PetscScalar *vals,*aworkA,*aworkB; 1860 PetscMPIInt tag1,tag2,tag3,imdex; 1861 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1862 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1863 MPI_Status recv_status,*send_status; 1864 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1865 PetscInt **rbuf_j=PETSC_NULL; 1866 PetscScalar **rbuf_a=PETSC_NULL; 1867 Mat_Redundant *redund=PETSC_NULL; 1868 PetscContainer container; 1869 1870 PetscFunctionBegin; 1871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1873 1874 if (reuse == MAT_REUSE_MATRIX) { 1875 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1876 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1877 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1878 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1879 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1880 if (container) { 1881 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1882 } else { 1883 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1884 } 1885 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1886 1887 nsends = redund->nsends; 1888 nrecvs = redund->nrecvs; 1889 send_rank = redund->send_rank; recv_rank = send_rank + size; 1890 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1891 sbuf_j = redund->sbuf_j; 1892 sbuf_a = redund->sbuf_a; 1893 rbuf_j = redund->rbuf_j; 1894 rbuf_a = redund->rbuf_a; 1895 } 1896 1897 if (reuse == MAT_INITIAL_MATRIX){ 1898 PetscMPIInt subrank,subsize; 1899 PetscInt nleftover,np_subcomm; 1900 /* get the destination processors' id send_rank, nsends and nrecvs */ 1901 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1902 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1903 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1904 recv_rank = send_rank + size; 1905 np_subcomm = size/nsubcomm; 1906 nleftover = size - nsubcomm*np_subcomm; 1907 nsends = 0; nrecvs = 0; 1908 for (i=0; i<size; i++){ /* i=rank*/ 1909 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1910 send_rank[nsends] = i; nsends++; 1911 recv_rank[nrecvs++] = i; 1912 } 1913 } 1914 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1915 i = size-nleftover-1; 1916 j = 0; 1917 while (j < nsubcomm - nleftover){ 1918 send_rank[nsends++] = i; 1919 i--; j++; 1920 } 1921 } 1922 1923 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1924 for (i=0; i<nleftover; i++){ 1925 recv_rank[nrecvs++] = size-nleftover+i; 1926 } 1927 } 1928 1929 /* allocate sbuf_j, sbuf_a */ 1930 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1931 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1932 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1933 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1934 1935 /* copy mat's local entries into the buffers */ 1936 if (reuse == MAT_INITIAL_MATRIX){ 1937 rownz_max = 0; 1938 rptr = sbuf_j; 1939 cols = sbuf_j + rend-rstart + 1; 1940 vals = sbuf_a; 1941 rptr[0] = 0; 1942 for (i=0; i<rend-rstart; i++){ 1943 row = i + rstart; 1944 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1945 ncols = nzA + nzB; 1946 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1947 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1948 /* load the column indices for this row into cols */ 1949 lwrite = 0; 1950 for (l=0; l<nzB; l++) { 1951 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1952 vals[lwrite] = aworkB[l]; 1953 cols[lwrite++] = ctmp; 1954 } 1955 } 1956 for (l=0; l<nzA; l++){ 1957 vals[lwrite] = aworkA[l]; 1958 cols[lwrite++] = cstart + cworkA[l]; 1959 } 1960 for (l=0; l<nzB; l++) { 1961 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1962 vals[lwrite] = aworkB[l]; 1963 cols[lwrite++] = ctmp; 1964 } 1965 } 1966 vals += ncols; 1967 cols += ncols; 1968 rptr[i+1] = rptr[i] + ncols; 1969 if (rownz_max < ncols) rownz_max = ncols; 1970 } 1971 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1972 } else { /* only copy matrix values into sbuf_a */ 1973 rptr = sbuf_j; 1974 vals = sbuf_a; 1975 rptr[0] = 0; 1976 for (i=0; i<rend-rstart; i++){ 1977 row = i + rstart; 1978 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1979 ncols = nzA + nzB; 1980 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1981 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1982 lwrite = 0; 1983 for (l=0; l<nzB; l++) { 1984 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1985 } 1986 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1987 for (l=0; l<nzB; l++) { 1988 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1989 } 1990 vals += ncols; 1991 rptr[i+1] = rptr[i] + ncols; 1992 } 1993 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1994 1995 /* send nzlocal to others, and recv other's nzlocal */ 1996 /*--------------------------------------------------*/ 1997 if (reuse == MAT_INITIAL_MATRIX){ 1998 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 1999 s_waits2 = s_waits3 + nsends; 2000 s_waits1 = s_waits2 + nsends; 2001 r_waits1 = s_waits1 + nsends; 2002 r_waits2 = r_waits1 + nrecvs; 2003 r_waits3 = r_waits2 + nrecvs; 2004 } else { 2005 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2006 r_waits3 = s_waits3 + nsends; 2007 } 2008 2009 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2010 if (reuse == MAT_INITIAL_MATRIX){ 2011 /* get new tags to keep the communication clean */ 2012 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2013 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2014 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2015 rbuf_nz = sbuf_nz + nsends; 2016 2017 /* post receives of other's nzlocal */ 2018 for (i=0; i<nrecvs; i++){ 2019 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2020 } 2021 /* send nzlocal to others */ 2022 for (i=0; i<nsends; i++){ 2023 sbuf_nz[i] = nzlocal; 2024 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2025 } 2026 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2027 count = nrecvs; 2028 while (count) { 2029 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2030 recv_rank[imdex] = recv_status.MPI_SOURCE; 2031 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2032 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2033 2034 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2035 rbuf_nz[imdex] += i + 2; 2036 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2037 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2038 count--; 2039 } 2040 /* wait on sends of nzlocal */ 2041 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2042 /* send mat->i,j to others, and recv from other's */ 2043 /*------------------------------------------------*/ 2044 for (i=0; i<nsends; i++){ 2045 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2046 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2047 } 2048 /* wait on receives of mat->i,j */ 2049 /*------------------------------*/ 2050 count = nrecvs; 2051 while (count) { 2052 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2053 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2054 count--; 2055 } 2056 /* wait on sends of mat->i,j */ 2057 /*---------------------------*/ 2058 if (nsends) { 2059 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2060 } 2061 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2062 2063 /* post receives, send and receive mat->a */ 2064 /*----------------------------------------*/ 2065 for (imdex=0; imdex<nrecvs; imdex++) { 2066 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2067 } 2068 for (i=0; i<nsends; i++){ 2069 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2070 } 2071 count = nrecvs; 2072 while (count) { 2073 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2074 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2075 count--; 2076 } 2077 if (nsends) { 2078 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2079 } 2080 2081 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2082 2083 /* create redundant matrix */ 2084 /*-------------------------*/ 2085 if (reuse == MAT_INITIAL_MATRIX){ 2086 /* compute rownz_max for preallocation */ 2087 for (imdex=0; imdex<nrecvs; imdex++){ 2088 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2089 rptr = rbuf_j[imdex]; 2090 for (i=0; i<j; i++){ 2091 ncols = rptr[i+1] - rptr[i]; 2092 if (rownz_max < ncols) rownz_max = ncols; 2093 } 2094 } 2095 2096 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2097 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2098 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2099 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2100 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2101 } else { 2102 C = *matredundant; 2103 } 2104 2105 /* insert local matrix entries */ 2106 rptr = sbuf_j; 2107 cols = sbuf_j + rend-rstart + 1; 2108 vals = sbuf_a; 2109 for (i=0; i<rend-rstart; i++){ 2110 row = i + rstart; 2111 ncols = rptr[i+1] - rptr[i]; 2112 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2113 vals += ncols; 2114 cols += ncols; 2115 } 2116 /* insert received matrix entries */ 2117 for (imdex=0; imdex<nrecvs; imdex++){ 2118 rstart = rowrange[recv_rank[imdex]]; 2119 rend = rowrange[recv_rank[imdex]+1]; 2120 rptr = rbuf_j[imdex]; 2121 cols = rbuf_j[imdex] + rend-rstart + 1; 2122 vals = rbuf_a[imdex]; 2123 for (i=0; i<rend-rstart; i++){ 2124 row = i + rstart; 2125 ncols = rptr[i+1] - rptr[i]; 2126 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2127 vals += ncols; 2128 cols += ncols; 2129 } 2130 } 2131 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2132 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2133 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2134 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2135 if (reuse == MAT_INITIAL_MATRIX){ 2136 PetscContainer container; 2137 *matredundant = C; 2138 /* create a supporting struct and attach it to C for reuse */ 2139 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2140 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2141 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2142 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2143 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2144 2145 redund->nzlocal = nzlocal; 2146 redund->nsends = nsends; 2147 redund->nrecvs = nrecvs; 2148 redund->send_rank = send_rank; 2149 redund->sbuf_nz = sbuf_nz; 2150 redund->sbuf_j = sbuf_j; 2151 redund->sbuf_a = sbuf_a; 2152 redund->rbuf_j = rbuf_j; 2153 redund->rbuf_a = rbuf_a; 2154 2155 redund->MatDestroy = C->ops->destroy; 2156 C->ops->destroy = MatDestroy_MatRedundant; 2157 } 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2163 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2164 { 2165 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2166 PetscInt n = A->rmap.n; 2167 PetscInt cstart = A->cmap.rstart; 2168 PetscInt *cmap = mat->garray; 2169 PetscInt *diagIdx, *offdiagIdx; 2170 Vec diagV, offdiagV; 2171 PetscScalar *a, *diagA, *offdiagA; 2172 PetscInt r; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2177 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2178 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2179 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2180 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2181 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2182 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2183 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2184 for(r = 0; r < n; ++r) { 2185 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2186 a[r] = diagA[r]; 2187 idx[r] = cstart + diagIdx[r]; 2188 } else { 2189 a[r] = offdiagA[r]; 2190 idx[r] = cmap[offdiagIdx[r]]; 2191 } 2192 } 2193 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2194 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2195 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2196 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2197 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2198 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2204 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2205 { 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 /* -------------------------------------------------------------------*/ 2214 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2215 MatGetRow_MPIAIJ, 2216 MatRestoreRow_MPIAIJ, 2217 MatMult_MPIAIJ, 2218 /* 4*/ MatMultAdd_MPIAIJ, 2219 MatMultTranspose_MPIAIJ, 2220 MatMultTransposeAdd_MPIAIJ, 2221 #ifdef PETSC_HAVE_PBGL 2222 MatSolve_MPIAIJ, 2223 #else 2224 0, 2225 #endif 2226 0, 2227 0, 2228 /*10*/ 0, 2229 0, 2230 0, 2231 MatRelax_MPIAIJ, 2232 MatTranspose_MPIAIJ, 2233 /*15*/ MatGetInfo_MPIAIJ, 2234 MatEqual_MPIAIJ, 2235 MatGetDiagonal_MPIAIJ, 2236 MatDiagonalScale_MPIAIJ, 2237 MatNorm_MPIAIJ, 2238 /*20*/ MatAssemblyBegin_MPIAIJ, 2239 MatAssemblyEnd_MPIAIJ, 2240 0, 2241 MatSetOption_MPIAIJ, 2242 MatZeroEntries_MPIAIJ, 2243 /*25*/ MatZeroRows_MPIAIJ, 2244 0, 2245 #ifdef PETSC_HAVE_PBGL 2246 MatLUFactorNumeric_MPIAIJ, 2247 #else 2248 0, 2249 #endif 2250 0, 2251 0, 2252 /*30*/ MatSetUpPreallocation_MPIAIJ, 2253 #ifdef PETSC_HAVE_PBGL 2254 MatILUFactorSymbolic_MPIAIJ, 2255 #else 2256 0, 2257 #endif 2258 0, 2259 0, 2260 0, 2261 /*35*/ MatDuplicate_MPIAIJ, 2262 0, 2263 0, 2264 0, 2265 0, 2266 /*40*/ MatAXPY_MPIAIJ, 2267 MatGetSubMatrices_MPIAIJ, 2268 MatIncreaseOverlap_MPIAIJ, 2269 MatGetValues_MPIAIJ, 2270 MatCopy_MPIAIJ, 2271 /*45*/ 0, 2272 MatScale_MPIAIJ, 2273 0, 2274 0, 2275 0, 2276 /*50*/ MatSetBlockSize_MPIAIJ, 2277 0, 2278 0, 2279 0, 2280 0, 2281 /*55*/ MatFDColoringCreate_MPIAIJ, 2282 0, 2283 MatSetUnfactored_MPIAIJ, 2284 MatPermute_MPIAIJ, 2285 0, 2286 /*60*/ MatGetSubMatrix_MPIAIJ, 2287 MatDestroy_MPIAIJ, 2288 MatView_MPIAIJ, 2289 0, 2290 0, 2291 /*65*/ 0, 2292 0, 2293 0, 2294 0, 2295 0, 2296 /*70*/ 0, 2297 0, 2298 MatSetColoring_MPIAIJ, 2299 #if defined(PETSC_HAVE_ADIC) 2300 MatSetValuesAdic_MPIAIJ, 2301 #else 2302 0, 2303 #endif 2304 MatSetValuesAdifor_MPIAIJ, 2305 /*75*/ 0, 2306 0, 2307 0, 2308 0, 2309 0, 2310 /*80*/ 0, 2311 0, 2312 0, 2313 0, 2314 /*84*/ MatLoad_MPIAIJ, 2315 0, 2316 0, 2317 0, 2318 0, 2319 0, 2320 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2321 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2322 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2323 MatPtAP_Basic, 2324 MatPtAPSymbolic_MPIAIJ, 2325 /*95*/ MatPtAPNumeric_MPIAIJ, 2326 0, 2327 0, 2328 0, 2329 0, 2330 /*100*/0, 2331 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2332 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2333 MatConjugate_MPIAIJ, 2334 0, 2335 /*105*/MatSetValuesRow_MPIAIJ, 2336 MatRealPart_MPIAIJ, 2337 MatImaginaryPart_MPIAIJ, 2338 0, 2339 0, 2340 /*110*/0, 2341 MatGetRedundantMatrix_MPIAIJ, 2342 MatGetRowMin_MPIAIJ, 2343 0, 2344 0, 2345 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2346 2347 /* ----------------------------------------------------------------------------------------*/ 2348 2349 EXTERN_C_BEGIN 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2352 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2353 { 2354 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2359 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 EXTERN_C_END 2363 2364 EXTERN_C_BEGIN 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2367 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2368 { 2369 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2374 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 EXTERN_C_END 2378 2379 #include "petscpc.h" 2380 EXTERN_C_BEGIN 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2384 { 2385 Mat_MPIAIJ *b; 2386 PetscErrorCode ierr; 2387 PetscInt i; 2388 2389 PetscFunctionBegin; 2390 B->preallocated = PETSC_TRUE; 2391 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2392 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2393 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2394 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2395 2396 B->rmap.bs = B->cmap.bs = 1; 2397 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2398 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2399 if (d_nnz) { 2400 for (i=0; i<B->rmap.n; i++) { 2401 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2402 } 2403 } 2404 if (o_nnz) { 2405 for (i=0; i<B->rmap.n; i++) { 2406 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2407 } 2408 } 2409 b = (Mat_MPIAIJ*)B->data; 2410 2411 /* Explicitly create 2 MATSEQAIJ matrices. */ 2412 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2413 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2414 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2415 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2416 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2417 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2418 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2419 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2420 2421 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2422 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2423 2424 PetscFunctionReturn(0); 2425 } 2426 EXTERN_C_END 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2430 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2431 { 2432 Mat mat; 2433 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 *newmat = 0; 2438 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2439 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2440 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2441 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2442 a = (Mat_MPIAIJ*)mat->data; 2443 2444 mat->factor = matin->factor; 2445 mat->rmap.bs = matin->rmap.bs; 2446 mat->assembled = PETSC_TRUE; 2447 mat->insertmode = NOT_SET_VALUES; 2448 mat->preallocated = PETSC_TRUE; 2449 2450 a->size = oldmat->size; 2451 a->rank = oldmat->rank; 2452 a->donotstash = oldmat->donotstash; 2453 a->roworiented = oldmat->roworiented; 2454 a->rowindices = 0; 2455 a->rowvalues = 0; 2456 a->getrowactive = PETSC_FALSE; 2457 2458 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2459 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2460 2461 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2462 if (oldmat->colmap) { 2463 #if defined (PETSC_USE_CTABLE) 2464 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2465 #else 2466 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2467 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2468 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2469 #endif 2470 } else a->colmap = 0; 2471 if (oldmat->garray) { 2472 PetscInt len; 2473 len = oldmat->B->cmap.n; 2474 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2476 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2477 } else a->garray = 0; 2478 2479 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2480 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2481 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2482 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2483 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2484 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2485 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2486 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2487 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2488 *newmat = mat; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #include "petscsys.h" 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "MatLoad_MPIAIJ" 2496 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2497 { 2498 Mat A; 2499 PetscScalar *vals,*svals; 2500 MPI_Comm comm = ((PetscObject)viewer)->comm; 2501 MPI_Status status; 2502 PetscErrorCode ierr; 2503 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2504 PetscInt i,nz,j,rstart,rend,mmax; 2505 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2506 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2507 PetscInt cend,cstart,n,*rowners; 2508 int fd; 2509 2510 PetscFunctionBegin; 2511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2513 if (!rank) { 2514 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2515 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2516 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2517 } 2518 2519 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2520 M = header[1]; N = header[2]; 2521 /* determine ownership of all rows */ 2522 m = M/size + ((M % size) > rank); 2523 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2524 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2525 2526 /* First process needs enough room for process with most rows */ 2527 if (!rank) { 2528 mmax = rowners[1]; 2529 for (i=2; i<size; i++) { 2530 mmax = PetscMax(mmax,rowners[i]); 2531 } 2532 } else mmax = m; 2533 2534 rowners[0] = 0; 2535 for (i=2; i<=size; i++) { 2536 rowners[i] += rowners[i-1]; 2537 } 2538 rstart = rowners[rank]; 2539 rend = rowners[rank+1]; 2540 2541 /* distribute row lengths to all processors */ 2542 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2543 if (!rank) { 2544 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2545 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2546 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2547 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2548 for (j=0; j<m; j++) { 2549 procsnz[0] += ourlens[j]; 2550 } 2551 for (i=1; i<size; i++) { 2552 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2553 /* calculate the number of nonzeros on each processor */ 2554 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2555 procsnz[i] += rowlengths[j]; 2556 } 2557 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2558 } 2559 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2560 } else { 2561 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2562 } 2563 2564 if (!rank) { 2565 /* determine max buffer needed and allocate it */ 2566 maxnz = 0; 2567 for (i=0; i<size; i++) { 2568 maxnz = PetscMax(maxnz,procsnz[i]); 2569 } 2570 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2571 2572 /* read in my part of the matrix column indices */ 2573 nz = procsnz[0]; 2574 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2575 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2576 2577 /* read in every one elses and ship off */ 2578 for (i=1; i<size; i++) { 2579 nz = procsnz[i]; 2580 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2581 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2582 } 2583 ierr = PetscFree(cols);CHKERRQ(ierr); 2584 } else { 2585 /* determine buffer space needed for message */ 2586 nz = 0; 2587 for (i=0; i<m; i++) { 2588 nz += ourlens[i]; 2589 } 2590 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2591 2592 /* receive message of column indices*/ 2593 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2594 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2595 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2596 } 2597 2598 /* determine column ownership if matrix is not square */ 2599 if (N != M) { 2600 n = N/size + ((N % size) > rank); 2601 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2602 cstart = cend - n; 2603 } else { 2604 cstart = rstart; 2605 cend = rend; 2606 n = cend - cstart; 2607 } 2608 2609 /* loop over local rows, determining number of off diagonal entries */ 2610 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2611 jj = 0; 2612 for (i=0; i<m; i++) { 2613 for (j=0; j<ourlens[i]; j++) { 2614 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2615 jj++; 2616 } 2617 } 2618 2619 /* create our matrix */ 2620 for (i=0; i<m; i++) { 2621 ourlens[i] -= offlens[i]; 2622 } 2623 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2624 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2625 ierr = MatSetType(A,type);CHKERRQ(ierr); 2626 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2627 2628 for (i=0; i<m; i++) { 2629 ourlens[i] += offlens[i]; 2630 } 2631 2632 if (!rank) { 2633 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2634 2635 /* read in my part of the matrix numerical values */ 2636 nz = procsnz[0]; 2637 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2638 2639 /* insert into matrix */ 2640 jj = rstart; 2641 smycols = mycols; 2642 svals = vals; 2643 for (i=0; i<m; i++) { 2644 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2645 smycols += ourlens[i]; 2646 svals += ourlens[i]; 2647 jj++; 2648 } 2649 2650 /* read in other processors and ship out */ 2651 for (i=1; i<size; i++) { 2652 nz = procsnz[i]; 2653 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2654 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2655 } 2656 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2657 } else { 2658 /* receive numeric values */ 2659 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2660 2661 /* receive message of values*/ 2662 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2663 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2664 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2665 2666 /* insert into matrix */ 2667 jj = rstart; 2668 smycols = mycols; 2669 svals = vals; 2670 for (i=0; i<m; i++) { 2671 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2672 smycols += ourlens[i]; 2673 svals += ourlens[i]; 2674 jj++; 2675 } 2676 } 2677 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2678 ierr = PetscFree(vals);CHKERRQ(ierr); 2679 ierr = PetscFree(mycols);CHKERRQ(ierr); 2680 ierr = PetscFree(rowners);CHKERRQ(ierr); 2681 2682 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 *newmat = A; 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2690 /* 2691 Not great since it makes two copies of the submatrix, first an SeqAIJ 2692 in local and then by concatenating the local matrices the end result. 2693 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2694 */ 2695 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2696 { 2697 PetscErrorCode ierr; 2698 PetscMPIInt rank,size; 2699 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2700 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2701 Mat *local,M,Mreuse; 2702 PetscScalar *vwork,*aa; 2703 MPI_Comm comm = ((PetscObject)mat)->comm; 2704 Mat_SeqAIJ *aij; 2705 2706 2707 PetscFunctionBegin; 2708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2709 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2710 2711 if (call == MAT_REUSE_MATRIX) { 2712 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2713 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2714 local = &Mreuse; 2715 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2716 } else { 2717 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2718 Mreuse = *local; 2719 ierr = PetscFree(local);CHKERRQ(ierr); 2720 } 2721 2722 /* 2723 m - number of local rows 2724 n - number of columns (same on all processors) 2725 rstart - first row in new global matrix generated 2726 */ 2727 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2728 if (call == MAT_INITIAL_MATRIX) { 2729 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2730 ii = aij->i; 2731 jj = aij->j; 2732 2733 /* 2734 Determine the number of non-zeros in the diagonal and off-diagonal 2735 portions of the matrix in order to do correct preallocation 2736 */ 2737 2738 /* first get start and end of "diagonal" columns */ 2739 if (csize == PETSC_DECIDE) { 2740 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2741 if (mglobal == n) { /* square matrix */ 2742 nlocal = m; 2743 } else { 2744 nlocal = n/size + ((n % size) > rank); 2745 } 2746 } else { 2747 nlocal = csize; 2748 } 2749 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2750 rstart = rend - nlocal; 2751 if (rank == size - 1 && rend != n) { 2752 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2753 } 2754 2755 /* next, compute all the lengths */ 2756 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2757 olens = dlens + m; 2758 for (i=0; i<m; i++) { 2759 jend = ii[i+1] - ii[i]; 2760 olen = 0; 2761 dlen = 0; 2762 for (j=0; j<jend; j++) { 2763 if (*jj < rstart || *jj >= rend) olen++; 2764 else dlen++; 2765 jj++; 2766 } 2767 olens[i] = olen; 2768 dlens[i] = dlen; 2769 } 2770 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2771 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2772 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2773 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2774 ierr = PetscFree(dlens);CHKERRQ(ierr); 2775 } else { 2776 PetscInt ml,nl; 2777 2778 M = *newmat; 2779 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2780 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2781 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2782 /* 2783 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2784 rather than the slower MatSetValues(). 2785 */ 2786 M->was_assembled = PETSC_TRUE; 2787 M->assembled = PETSC_FALSE; 2788 } 2789 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2790 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2791 ii = aij->i; 2792 jj = aij->j; 2793 aa = aij->a; 2794 for (i=0; i<m; i++) { 2795 row = rstart + i; 2796 nz = ii[i+1] - ii[i]; 2797 cwork = jj; jj += nz; 2798 vwork = aa; aa += nz; 2799 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2800 } 2801 2802 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2803 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 *newmat = M; 2805 2806 /* save submatrix used in processor for next request */ 2807 if (call == MAT_INITIAL_MATRIX) { 2808 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2809 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2810 } 2811 2812 PetscFunctionReturn(0); 2813 } 2814 2815 EXTERN_C_BEGIN 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2818 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2819 { 2820 PetscInt m,cstart, cend,j,nnz,i,d; 2821 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2822 const PetscInt *JJ; 2823 PetscScalar *values; 2824 PetscErrorCode ierr; 2825 2826 PetscFunctionBegin; 2827 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2828 2829 B->rmap.bs = B->cmap.bs = 1; 2830 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2831 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2832 m = B->rmap.n; 2833 cstart = B->cmap.rstart; 2834 cend = B->cmap.rend; 2835 rstart = B->rmap.rstart; 2836 2837 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2838 o_nnz = d_nnz + m; 2839 2840 #if defined(PETSC_USE_DEBUGGING) 2841 for (i=0; i<m; i++) { 2842 nnz = Ii[i+1]- Ii[i]; 2843 JJ = J + Ii[i]; 2844 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2845 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 2846 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 2847 for (j=1; j<nnz; j++) { 2848 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 2849 } 2850 } 2851 #endif 2852 2853 for (i=0; i<m; i++) { 2854 nnz = Ii[i+1]- Ii[i]; 2855 JJ = J + Ii[i]; 2856 nnz_max = PetscMax(nnz_max,nnz); 2857 for (j=0; j<nnz; j++) { 2858 if (*JJ >= cstart) break; 2859 JJ++; 2860 } 2861 d = 0; 2862 for (; j<nnz; j++) { 2863 if (*JJ++ >= cend) break; 2864 d++; 2865 } 2866 d_nnz[i] = d; 2867 o_nnz[i] = nnz - d; 2868 } 2869 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2870 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2871 2872 if (v) values = (PetscScalar*)v; 2873 else { 2874 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2875 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2876 } 2877 2878 for (i=0; i<m; i++) { 2879 ii = i + rstart; 2880 nnz = Ii[i+1]- Ii[i]; 2881 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2882 } 2883 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 2886 if (!v) { 2887 ierr = PetscFree(values);CHKERRQ(ierr); 2888 } 2889 PetscFunctionReturn(0); 2890 } 2891 EXTERN_C_END 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2895 /*@ 2896 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2897 (the default parallel PETSc format). 2898 2899 Collective on MPI_Comm 2900 2901 Input Parameters: 2902 + B - the matrix 2903 . i - the indices into j for the start of each local row (starts with zero) 2904 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2905 - v - optional values in the matrix 2906 2907 Level: developer 2908 2909 Notes: 2910 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2911 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2912 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2913 2914 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2915 2916 The format which is used for the sparse matrix input, is equivalent to a 2917 row-major ordering.. i.e for the following matrix, the input data expected is 2918 as shown: 2919 2920 1 0 0 2921 2 0 3 P0 2922 ------- 2923 4 5 6 P1 2924 2925 Process0 [P0]: rows_owned=[0,1] 2926 i = {0,1,3} [size = nrow+1 = 2+1] 2927 j = {0,0,2} [size = nz = 6] 2928 v = {1,2,3} [size = nz = 6] 2929 2930 Process1 [P1]: rows_owned=[2] 2931 i = {0,3} [size = nrow+1 = 1+1] 2932 j = {0,1,2} [size = nz = 6] 2933 v = {4,5,6} [size = nz = 6] 2934 2935 The column indices for each row MUST be sorted. 2936 2937 .keywords: matrix, aij, compressed row, sparse, parallel 2938 2939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2940 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2941 @*/ 2942 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2943 { 2944 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2945 2946 PetscFunctionBegin; 2947 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2948 if (f) { 2949 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2950 } 2951 PetscFunctionReturn(0); 2952 } 2953 2954 #undef __FUNCT__ 2955 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2956 /*@C 2957 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2958 (the default parallel PETSc format). For good matrix assembly performance 2959 the user should preallocate the matrix storage by setting the parameters 2960 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2961 performance can be increased by more than a factor of 50. 2962 2963 Collective on MPI_Comm 2964 2965 Input Parameters: 2966 + A - the matrix 2967 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2968 (same value is used for all local rows) 2969 . d_nnz - array containing the number of nonzeros in the various rows of the 2970 DIAGONAL portion of the local submatrix (possibly different for each row) 2971 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2972 The size of this array is equal to the number of local rows, i.e 'm'. 2973 You must leave room for the diagonal entry even if it is zero. 2974 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2975 submatrix (same value is used for all local rows). 2976 - o_nnz - array containing the number of nonzeros in the various rows of the 2977 OFF-DIAGONAL portion of the local submatrix (possibly different for 2978 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2979 structure. The size of this array is equal to the number 2980 of local rows, i.e 'm'. 2981 2982 If the *_nnz parameter is given then the *_nz parameter is ignored 2983 2984 The AIJ format (also called the Yale sparse matrix format or 2985 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2986 storage. The stored row and column indices begin with zero. See the users manual for details. 2987 2988 The parallel matrix is partitioned such that the first m0 rows belong to 2989 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2990 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2991 2992 The DIAGONAL portion of the local submatrix of a processor can be defined 2993 as the submatrix which is obtained by extraction the part corresponding 2994 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2995 first row that belongs to the processor, and r2 is the last row belonging 2996 to the this processor. This is a square mxm matrix. The remaining portion 2997 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2998 2999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3000 3001 You can call MatGetInfo() to get information on how effective the preallocation was; 3002 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3003 You can also run with the option -info and look for messages with the string 3004 malloc in them to see if additional memory allocation was needed. 3005 3006 Example usage: 3007 3008 Consider the following 8x8 matrix with 34 non-zero values, that is 3009 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3010 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3011 as follows: 3012 3013 .vb 3014 1 2 0 | 0 3 0 | 0 4 3015 Proc0 0 5 6 | 7 0 0 | 8 0 3016 9 0 10 | 11 0 0 | 12 0 3017 ------------------------------------- 3018 13 0 14 | 15 16 17 | 0 0 3019 Proc1 0 18 0 | 19 20 21 | 0 0 3020 0 0 0 | 22 23 0 | 24 0 3021 ------------------------------------- 3022 Proc2 25 26 27 | 0 0 28 | 29 0 3023 30 0 0 | 31 32 33 | 0 34 3024 .ve 3025 3026 This can be represented as a collection of submatrices as: 3027 3028 .vb 3029 A B C 3030 D E F 3031 G H I 3032 .ve 3033 3034 Where the submatrices A,B,C are owned by proc0, D,E,F are 3035 owned by proc1, G,H,I are owned by proc2. 3036 3037 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3038 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3039 The 'M','N' parameters are 8,8, and have the same values on all procs. 3040 3041 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3042 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3043 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3044 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3045 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3046 matrix, ans [DF] as another SeqAIJ matrix. 3047 3048 When d_nz, o_nz parameters are specified, d_nz storage elements are 3049 allocated for every row of the local diagonal submatrix, and o_nz 3050 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3051 One way to choose d_nz and o_nz is to use the max nonzerors per local 3052 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3053 In this case, the values of d_nz,o_nz are: 3054 .vb 3055 proc0 : dnz = 2, o_nz = 2 3056 proc1 : dnz = 3, o_nz = 2 3057 proc2 : dnz = 1, o_nz = 4 3058 .ve 3059 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3060 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3061 for proc3. i.e we are using 12+15+10=37 storage locations to store 3062 34 values. 3063 3064 When d_nnz, o_nnz parameters are specified, the storage is specified 3065 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3066 In the above case the values for d_nnz,o_nnz are: 3067 .vb 3068 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3069 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3070 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3071 .ve 3072 Here the space allocated is sum of all the above values i.e 34, and 3073 hence pre-allocation is perfect. 3074 3075 Level: intermediate 3076 3077 .keywords: matrix, aij, compressed row, sparse, parallel 3078 3079 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3080 MPIAIJ, MatGetInfo() 3081 @*/ 3082 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3083 { 3084 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3085 3086 PetscFunctionBegin; 3087 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3088 if (f) { 3089 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3090 } 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3096 /*@ 3097 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3098 CSR format the local rows. 3099 3100 Collective on MPI_Comm 3101 3102 Input Parameters: 3103 + comm - MPI communicator 3104 . m - number of local rows (Cannot be PETSC_DECIDE) 3105 . n - This value should be the same as the local size used in creating the 3106 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3107 calculated if N is given) For square matrices n is almost always m. 3108 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3109 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3110 . i - row indices 3111 . j - column indices 3112 - a - matrix values 3113 3114 Output Parameter: 3115 . mat - the matrix 3116 3117 Level: intermediate 3118 3119 Notes: 3120 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3121 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3122 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3123 3124 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3125 3126 The format which is used for the sparse matrix input, is equivalent to a 3127 row-major ordering.. i.e for the following matrix, the input data expected is 3128 as shown: 3129 3130 1 0 0 3131 2 0 3 P0 3132 ------- 3133 4 5 6 P1 3134 3135 Process0 [P0]: rows_owned=[0,1] 3136 i = {0,1,3} [size = nrow+1 = 2+1] 3137 j = {0,0,2} [size = nz = 6] 3138 v = {1,2,3} [size = nz = 6] 3139 3140 Process1 [P1]: rows_owned=[2] 3141 i = {0,3} [size = nrow+1 = 1+1] 3142 j = {0,1,2} [size = nz = 6] 3143 v = {4,5,6} [size = nz = 6] 3144 3145 .keywords: matrix, aij, compressed row, sparse, parallel 3146 3147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3148 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3149 @*/ 3150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3151 { 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 if (i[0]) { 3156 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3157 } 3158 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3159 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3160 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3161 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3162 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatCreateMPIAIJ" 3168 /*@C 3169 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3170 (the default parallel PETSc format). For good matrix assembly performance 3171 the user should preallocate the matrix storage by setting the parameters 3172 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3173 performance can be increased by more than a factor of 50. 3174 3175 Collective on MPI_Comm 3176 3177 Input Parameters: 3178 + comm - MPI communicator 3179 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3180 This value should be the same as the local size used in creating the 3181 y vector for the matrix-vector product y = Ax. 3182 . n - This value should be the same as the local size used in creating the 3183 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3184 calculated if N is given) For square matrices n is almost always m. 3185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3187 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3188 (same value is used for all local rows) 3189 . d_nnz - array containing the number of nonzeros in the various rows of the 3190 DIAGONAL portion of the local submatrix (possibly different for each row) 3191 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3192 The size of this array is equal to the number of local rows, i.e 'm'. 3193 You must leave room for the diagonal entry even if it is zero. 3194 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3195 submatrix (same value is used for all local rows). 3196 - o_nnz - array containing the number of nonzeros in the various rows of the 3197 OFF-DIAGONAL portion of the local submatrix (possibly different for 3198 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3199 structure. The size of this array is equal to the number 3200 of local rows, i.e 'm'. 3201 3202 Output Parameter: 3203 . A - the matrix 3204 3205 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3206 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3207 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3208 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3209 3210 Notes: 3211 If the *_nnz parameter is given then the *_nz parameter is ignored 3212 3213 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3214 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3215 storage requirements for this matrix. 3216 3217 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3218 processor than it must be used on all processors that share the object for 3219 that argument. 3220 3221 The user MUST specify either the local or global matrix dimensions 3222 (possibly both). 3223 3224 The parallel matrix is partitioned across processors such that the 3225 first m0 rows belong to process 0, the next m1 rows belong to 3226 process 1, the next m2 rows belong to process 2 etc.. where 3227 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3228 values corresponding to [m x N] submatrix. 3229 3230 The columns are logically partitioned with the n0 columns belonging 3231 to 0th partition, the next n1 columns belonging to the next 3232 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3233 3234 The DIAGONAL portion of the local submatrix on any given processor 3235 is the submatrix corresponding to the rows and columns m,n 3236 corresponding to the given processor. i.e diagonal matrix on 3237 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3238 etc. The remaining portion of the local submatrix [m x (N-n)] 3239 constitute the OFF-DIAGONAL portion. The example below better 3240 illustrates this concept. 3241 3242 For a square global matrix we define each processor's diagonal portion 3243 to be its local rows and the corresponding columns (a square submatrix); 3244 each processor's off-diagonal portion encompasses the remainder of the 3245 local matrix (a rectangular submatrix). 3246 3247 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3248 3249 When calling this routine with a single process communicator, a matrix of 3250 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3251 type of communicator, use the construction mechanism: 3252 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3253 3254 By default, this format uses inodes (identical nodes) when possible. 3255 We search for consecutive rows with the same nonzero structure, thereby 3256 reusing matrix information to achieve increased efficiency. 3257 3258 Options Database Keys: 3259 + -mat_no_inode - Do not use inodes 3260 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3261 - -mat_aij_oneindex - Internally use indexing starting at 1 3262 rather than 0. Note that when calling MatSetValues(), 3263 the user still MUST index entries starting at 0! 3264 3265 3266 Example usage: 3267 3268 Consider the following 8x8 matrix with 34 non-zero values, that is 3269 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3270 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3271 as follows: 3272 3273 .vb 3274 1 2 0 | 0 3 0 | 0 4 3275 Proc0 0 5 6 | 7 0 0 | 8 0 3276 9 0 10 | 11 0 0 | 12 0 3277 ------------------------------------- 3278 13 0 14 | 15 16 17 | 0 0 3279 Proc1 0 18 0 | 19 20 21 | 0 0 3280 0 0 0 | 22 23 0 | 24 0 3281 ------------------------------------- 3282 Proc2 25 26 27 | 0 0 28 | 29 0 3283 30 0 0 | 31 32 33 | 0 34 3284 .ve 3285 3286 This can be represented as a collection of submatrices as: 3287 3288 .vb 3289 A B C 3290 D E F 3291 G H I 3292 .ve 3293 3294 Where the submatrices A,B,C are owned by proc0, D,E,F are 3295 owned by proc1, G,H,I are owned by proc2. 3296 3297 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3298 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3299 The 'M','N' parameters are 8,8, and have the same values on all procs. 3300 3301 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3302 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3303 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3304 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3305 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3306 matrix, ans [DF] as another SeqAIJ matrix. 3307 3308 When d_nz, o_nz parameters are specified, d_nz storage elements are 3309 allocated for every row of the local diagonal submatrix, and o_nz 3310 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3311 One way to choose d_nz and o_nz is to use the max nonzerors per local 3312 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3313 In this case, the values of d_nz,o_nz are: 3314 .vb 3315 proc0 : dnz = 2, o_nz = 2 3316 proc1 : dnz = 3, o_nz = 2 3317 proc2 : dnz = 1, o_nz = 4 3318 .ve 3319 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3320 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3321 for proc3. i.e we are using 12+15+10=37 storage locations to store 3322 34 values. 3323 3324 When d_nnz, o_nnz parameters are specified, the storage is specified 3325 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3326 In the above case the values for d_nnz,o_nnz are: 3327 .vb 3328 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3329 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3330 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3331 .ve 3332 Here the space allocated is sum of all the above values i.e 34, and 3333 hence pre-allocation is perfect. 3334 3335 Level: intermediate 3336 3337 .keywords: matrix, aij, compressed row, sparse, parallel 3338 3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3340 MPIAIJ, MatCreateMPIAIJWithArrays() 3341 @*/ 3342 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3343 { 3344 PetscErrorCode ierr; 3345 PetscMPIInt size; 3346 3347 PetscFunctionBegin; 3348 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3349 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3350 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3351 if (size > 1) { 3352 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3353 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3354 } else { 3355 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3356 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3357 } 3358 PetscFunctionReturn(0); 3359 } 3360 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3363 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3364 { 3365 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3366 3367 PetscFunctionBegin; 3368 *Ad = a->A; 3369 *Ao = a->B; 3370 *colmap = a->garray; 3371 PetscFunctionReturn(0); 3372 } 3373 3374 #undef __FUNCT__ 3375 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3376 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3377 { 3378 PetscErrorCode ierr; 3379 PetscInt i; 3380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3381 3382 PetscFunctionBegin; 3383 if (coloring->ctype == IS_COLORING_GLOBAL) { 3384 ISColoringValue *allcolors,*colors; 3385 ISColoring ocoloring; 3386 3387 /* set coloring for diagonal portion */ 3388 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3389 3390 /* set coloring for off-diagonal portion */ 3391 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3392 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3393 for (i=0; i<a->B->cmap.n; i++) { 3394 colors[i] = allcolors[a->garray[i]]; 3395 } 3396 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3397 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3398 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3399 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3400 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3401 ISColoringValue *colors; 3402 PetscInt *larray; 3403 ISColoring ocoloring; 3404 3405 /* set coloring for diagonal portion */ 3406 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3407 for (i=0; i<a->A->cmap.n; i++) { 3408 larray[i] = i + A->cmap.rstart; 3409 } 3410 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3411 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3412 for (i=0; i<a->A->cmap.n; i++) { 3413 colors[i] = coloring->colors[larray[i]]; 3414 } 3415 ierr = PetscFree(larray);CHKERRQ(ierr); 3416 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3417 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3418 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3419 3420 /* set coloring for off-diagonal portion */ 3421 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3422 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3423 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3424 for (i=0; i<a->B->cmap.n; i++) { 3425 colors[i] = coloring->colors[larray[i]]; 3426 } 3427 ierr = PetscFree(larray);CHKERRQ(ierr); 3428 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3429 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3430 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3431 } else { 3432 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3433 } 3434 3435 PetscFunctionReturn(0); 3436 } 3437 3438 #if defined(PETSC_HAVE_ADIC) 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3441 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3442 { 3443 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3448 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } 3451 #endif 3452 3453 #undef __FUNCT__ 3454 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3455 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3456 { 3457 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3458 PetscErrorCode ierr; 3459 3460 PetscFunctionBegin; 3461 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3462 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "MatMerge" 3468 /*@ 3469 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3470 matrices from each processor 3471 3472 Collective on MPI_Comm 3473 3474 Input Parameters: 3475 + comm - the communicators the parallel matrix will live on 3476 . inmat - the input sequential matrices 3477 . n - number of local columns (or PETSC_DECIDE) 3478 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3479 3480 Output Parameter: 3481 . outmat - the parallel matrix generated 3482 3483 Level: advanced 3484 3485 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3486 3487 @*/ 3488 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3489 { 3490 PetscErrorCode ierr; 3491 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3492 PetscInt *indx; 3493 PetscScalar *values; 3494 3495 PetscFunctionBegin; 3496 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3497 if (scall == MAT_INITIAL_MATRIX){ 3498 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3499 if (n == PETSC_DECIDE){ 3500 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3501 } 3502 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3503 rstart -= m; 3504 3505 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3506 for (i=0;i<m;i++) { 3507 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3508 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3509 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3510 } 3511 /* This routine will ONLY return MPIAIJ type matrix */ 3512 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3513 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3514 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3515 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3516 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3517 3518 } else if (scall == MAT_REUSE_MATRIX){ 3519 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3520 } else { 3521 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3522 } 3523 3524 for (i=0;i<m;i++) { 3525 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3526 Ii = i + rstart; 3527 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3528 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3529 } 3530 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3531 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatFileSplit" 3539 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3540 { 3541 PetscErrorCode ierr; 3542 PetscMPIInt rank; 3543 PetscInt m,N,i,rstart,nnz; 3544 size_t len; 3545 const PetscInt *indx; 3546 PetscViewer out; 3547 char *name; 3548 Mat B; 3549 const PetscScalar *values; 3550 3551 PetscFunctionBegin; 3552 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3553 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3554 /* Should this be the type of the diagonal block of A? */ 3555 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3556 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3557 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3558 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3559 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3560 for (i=0;i<m;i++) { 3561 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3562 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3563 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3564 } 3565 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3566 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 3568 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3569 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3570 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3571 sprintf(name,"%s.%d",outfile,rank); 3572 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3573 ierr = PetscFree(name); 3574 ierr = MatView(B,out);CHKERRQ(ierr); 3575 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3576 ierr = MatDestroy(B);CHKERRQ(ierr); 3577 PetscFunctionReturn(0); 3578 } 3579 3580 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3583 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3584 { 3585 PetscErrorCode ierr; 3586 Mat_Merge_SeqsToMPI *merge; 3587 PetscContainer container; 3588 3589 PetscFunctionBegin; 3590 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3591 if (container) { 3592 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3593 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3594 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3595 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3596 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3597 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3598 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3599 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3600 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3601 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3602 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3603 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3604 3605 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3606 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3607 } 3608 ierr = PetscFree(merge);CHKERRQ(ierr); 3609 3610 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #include "src/mat/utils/freespace.h" 3615 #include "petscbt.h" 3616 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3619 /*@C 3620 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3621 matrices from each processor 3622 3623 Collective on MPI_Comm 3624 3625 Input Parameters: 3626 + comm - the communicators the parallel matrix will live on 3627 . seqmat - the input sequential matrices 3628 . m - number of local rows (or PETSC_DECIDE) 3629 . n - number of local columns (or PETSC_DECIDE) 3630 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3631 3632 Output Parameter: 3633 . mpimat - the parallel matrix generated 3634 3635 Level: advanced 3636 3637 Notes: 3638 The dimensions of the sequential matrix in each processor MUST be the same. 3639 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3640 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3641 @*/ 3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3643 { 3644 PetscErrorCode ierr; 3645 MPI_Comm comm=((PetscObject)mpimat)->comm; 3646 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3647 PetscMPIInt size,rank,taga,*len_s; 3648 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3649 PetscInt proc,m; 3650 PetscInt **buf_ri,**buf_rj; 3651 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3652 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3653 MPI_Request *s_waits,*r_waits; 3654 MPI_Status *status; 3655 MatScalar *aa=a->a,**abuf_r,*ba_i; 3656 Mat_Merge_SeqsToMPI *merge; 3657 PetscContainer container; 3658 3659 PetscFunctionBegin; 3660 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3661 3662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 3665 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3666 if (container) { 3667 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3668 } 3669 bi = merge->bi; 3670 bj = merge->bj; 3671 buf_ri = merge->buf_ri; 3672 buf_rj = merge->buf_rj; 3673 3674 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3675 owners = merge->rowmap.range; 3676 len_s = merge->len_s; 3677 3678 /* send and recv matrix values */ 3679 /*-----------------------------*/ 3680 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3681 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3682 3683 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3684 for (proc=0,k=0; proc<size; proc++){ 3685 if (!len_s[proc]) continue; 3686 i = owners[proc]; 3687 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3688 k++; 3689 } 3690 3691 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3692 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3693 ierr = PetscFree(status);CHKERRQ(ierr); 3694 3695 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3696 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3697 3698 /* insert mat values of mpimat */ 3699 /*----------------------------*/ 3700 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3701 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3702 nextrow = buf_ri_k + merge->nrecv; 3703 nextai = nextrow + merge->nrecv; 3704 3705 for (k=0; k<merge->nrecv; k++){ 3706 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3707 nrows = *(buf_ri_k[k]); 3708 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3709 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3710 } 3711 3712 /* set values of ba */ 3713 m = merge->rowmap.n; 3714 for (i=0; i<m; i++) { 3715 arow = owners[rank] + i; 3716 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3717 bnzi = bi[i+1] - bi[i]; 3718 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3719 3720 /* add local non-zero vals of this proc's seqmat into ba */ 3721 anzi = ai[arow+1] - ai[arow]; 3722 aj = a->j + ai[arow]; 3723 aa = a->a + ai[arow]; 3724 nextaj = 0; 3725 for (j=0; nextaj<anzi; j++){ 3726 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3727 ba_i[j] += aa[nextaj++]; 3728 } 3729 } 3730 3731 /* add received vals into ba */ 3732 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3733 /* i-th row */ 3734 if (i == *nextrow[k]) { 3735 anzi = *(nextai[k]+1) - *nextai[k]; 3736 aj = buf_rj[k] + *(nextai[k]); 3737 aa = abuf_r[k] + *(nextai[k]); 3738 nextaj = 0; 3739 for (j=0; nextaj<anzi; j++){ 3740 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3741 ba_i[j] += aa[nextaj++]; 3742 } 3743 } 3744 nextrow[k]++; nextai[k]++; 3745 } 3746 } 3747 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3748 } 3749 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3750 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 3752 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3753 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3754 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3755 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3762 { 3763 PetscErrorCode ierr; 3764 Mat B_mpi; 3765 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3766 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3767 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3768 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3769 PetscInt len,proc,*dnz,*onz; 3770 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3771 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3772 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3773 MPI_Status *status; 3774 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3775 PetscBT lnkbt; 3776 Mat_Merge_SeqsToMPI *merge; 3777 PetscContainer container; 3778 3779 PetscFunctionBegin; 3780 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3781 3782 /* make sure it is a PETSc comm */ 3783 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3784 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3785 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3786 3787 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3788 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3789 3790 /* determine row ownership */ 3791 /*---------------------------------------------------------*/ 3792 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3793 merge->rowmap.n = m; 3794 merge->rowmap.N = M; 3795 merge->rowmap.bs = 1; 3796 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3797 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3798 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3799 3800 m = merge->rowmap.n; 3801 M = merge->rowmap.N; 3802 owners = merge->rowmap.range; 3803 3804 /* determine the number of messages to send, their lengths */ 3805 /*---------------------------------------------------------*/ 3806 len_s = merge->len_s; 3807 3808 len = 0; /* length of buf_si[] */ 3809 merge->nsend = 0; 3810 for (proc=0; proc<size; proc++){ 3811 len_si[proc] = 0; 3812 if (proc == rank){ 3813 len_s[proc] = 0; 3814 } else { 3815 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3816 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3817 } 3818 if (len_s[proc]) { 3819 merge->nsend++; 3820 nrows = 0; 3821 for (i=owners[proc]; i<owners[proc+1]; i++){ 3822 if (ai[i+1] > ai[i]) nrows++; 3823 } 3824 len_si[proc] = 2*(nrows+1); 3825 len += len_si[proc]; 3826 } 3827 } 3828 3829 /* determine the number and length of messages to receive for ij-structure */ 3830 /*-------------------------------------------------------------------------*/ 3831 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3832 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3833 3834 /* post the Irecv of j-structure */ 3835 /*-------------------------------*/ 3836 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3837 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3838 3839 /* post the Isend of j-structure */ 3840 /*--------------------------------*/ 3841 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3842 sj_waits = si_waits + merge->nsend; 3843 3844 for (proc=0, k=0; proc<size; proc++){ 3845 if (!len_s[proc]) continue; 3846 i = owners[proc]; 3847 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3848 k++; 3849 } 3850 3851 /* receives and sends of j-structure are complete */ 3852 /*------------------------------------------------*/ 3853 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3854 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3855 3856 /* send and recv i-structure */ 3857 /*---------------------------*/ 3858 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3859 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3860 3861 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3862 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3863 for (proc=0,k=0; proc<size; proc++){ 3864 if (!len_s[proc]) continue; 3865 /* form outgoing message for i-structure: 3866 buf_si[0]: nrows to be sent 3867 [1:nrows]: row index (global) 3868 [nrows+1:2*nrows+1]: i-structure index 3869 */ 3870 /*-------------------------------------------*/ 3871 nrows = len_si[proc]/2 - 1; 3872 buf_si_i = buf_si + nrows+1; 3873 buf_si[0] = nrows; 3874 buf_si_i[0] = 0; 3875 nrows = 0; 3876 for (i=owners[proc]; i<owners[proc+1]; i++){ 3877 anzi = ai[i+1] - ai[i]; 3878 if (anzi) { 3879 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3880 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3881 nrows++; 3882 } 3883 } 3884 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3885 k++; 3886 buf_si += len_si[proc]; 3887 } 3888 3889 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3890 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3891 3892 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3893 for (i=0; i<merge->nrecv; i++){ 3894 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3895 } 3896 3897 ierr = PetscFree(len_si);CHKERRQ(ierr); 3898 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3899 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3900 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3901 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3902 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3903 ierr = PetscFree(status);CHKERRQ(ierr); 3904 3905 /* compute a local seq matrix in each processor */ 3906 /*----------------------------------------------*/ 3907 /* allocate bi array and free space for accumulating nonzero column info */ 3908 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3909 bi[0] = 0; 3910 3911 /* create and initialize a linked list */ 3912 nlnk = N+1; 3913 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3914 3915 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3916 len = 0; 3917 len = ai[owners[rank+1]] - ai[owners[rank]]; 3918 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3919 current_space = free_space; 3920 3921 /* determine symbolic info for each local row */ 3922 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3923 nextrow = buf_ri_k + merge->nrecv; 3924 nextai = nextrow + merge->nrecv; 3925 for (k=0; k<merge->nrecv; k++){ 3926 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3927 nrows = *buf_ri_k[k]; 3928 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3929 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3930 } 3931 3932 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3933 len = 0; 3934 for (i=0;i<m;i++) { 3935 bnzi = 0; 3936 /* add local non-zero cols of this proc's seqmat into lnk */ 3937 arow = owners[rank] + i; 3938 anzi = ai[arow+1] - ai[arow]; 3939 aj = a->j + ai[arow]; 3940 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3941 bnzi += nlnk; 3942 /* add received col data into lnk */ 3943 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3944 if (i == *nextrow[k]) { /* i-th row */ 3945 anzi = *(nextai[k]+1) - *nextai[k]; 3946 aj = buf_rj[k] + *nextai[k]; 3947 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3948 bnzi += nlnk; 3949 nextrow[k]++; nextai[k]++; 3950 } 3951 } 3952 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3953 3954 /* if free space is not available, make more free space */ 3955 if (current_space->local_remaining<bnzi) { 3956 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3957 nspacedouble++; 3958 } 3959 /* copy data into free space, then initialize lnk */ 3960 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3961 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3962 3963 current_space->array += bnzi; 3964 current_space->local_used += bnzi; 3965 current_space->local_remaining -= bnzi; 3966 3967 bi[i+1] = bi[i] + bnzi; 3968 } 3969 3970 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3971 3972 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3973 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3974 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3975 3976 /* create symbolic parallel matrix B_mpi */ 3977 /*---------------------------------------*/ 3978 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3979 if (n==PETSC_DECIDE) { 3980 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3981 } else { 3982 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3983 } 3984 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3985 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3986 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3987 3988 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3989 B_mpi->assembled = PETSC_FALSE; 3990 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3991 merge->bi = bi; 3992 merge->bj = bj; 3993 merge->buf_ri = buf_ri; 3994 merge->buf_rj = buf_rj; 3995 merge->coi = PETSC_NULL; 3996 merge->coj = PETSC_NULL; 3997 merge->owners_co = PETSC_NULL; 3998 3999 /* attach the supporting struct to B_mpi for reuse */ 4000 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4001 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4002 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4003 *mpimat = B_mpi; 4004 4005 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4006 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 #undef __FUNCT__ 4011 #define __FUNCT__ "MatMerge_SeqsToMPI" 4012 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4013 { 4014 PetscErrorCode ierr; 4015 4016 PetscFunctionBegin; 4017 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4018 if (scall == MAT_INITIAL_MATRIX){ 4019 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4020 } 4021 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4022 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4023 PetscFunctionReturn(0); 4024 } 4025 4026 #undef __FUNCT__ 4027 #define __FUNCT__ "MatGetLocalMat" 4028 /*@ 4029 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4030 4031 Not Collective 4032 4033 Input Parameters: 4034 + A - the matrix 4035 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4036 4037 Output Parameter: 4038 . A_loc - the local sequential matrix generated 4039 4040 Level: developer 4041 4042 @*/ 4043 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4044 { 4045 PetscErrorCode ierr; 4046 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4047 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4048 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4049 PetscScalar *aa=a->a,*ba=b->a,*ca; 4050 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4051 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4052 4053 PetscFunctionBegin; 4054 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4055 if (scall == MAT_INITIAL_MATRIX){ 4056 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4057 ci[0] = 0; 4058 for (i=0; i<am; i++){ 4059 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4060 } 4061 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4062 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4063 k = 0; 4064 for (i=0; i<am; i++) { 4065 ncols_o = bi[i+1] - bi[i]; 4066 ncols_d = ai[i+1] - ai[i]; 4067 /* off-diagonal portion of A */ 4068 for (jo=0; jo<ncols_o; jo++) { 4069 col = cmap[*bj]; 4070 if (col >= cstart) break; 4071 cj[k] = col; bj++; 4072 ca[k++] = *ba++; 4073 } 4074 /* diagonal portion of A */ 4075 for (j=0; j<ncols_d; j++) { 4076 cj[k] = cstart + *aj++; 4077 ca[k++] = *aa++; 4078 } 4079 /* off-diagonal portion of A */ 4080 for (j=jo; j<ncols_o; j++) { 4081 cj[k] = cmap[*bj++]; 4082 ca[k++] = *ba++; 4083 } 4084 } 4085 /* put together the new matrix */ 4086 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4087 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4088 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4089 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4090 mat->free_a = PETSC_TRUE; 4091 mat->free_ij = PETSC_TRUE; 4092 mat->nonew = 0; 4093 } else if (scall == MAT_REUSE_MATRIX){ 4094 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4095 ci = mat->i; cj = mat->j; ca = mat->a; 4096 for (i=0; i<am; i++) { 4097 /* off-diagonal portion of A */ 4098 ncols_o = bi[i+1] - bi[i]; 4099 for (jo=0; jo<ncols_o; jo++) { 4100 col = cmap[*bj]; 4101 if (col >= cstart) break; 4102 *ca++ = *ba++; bj++; 4103 } 4104 /* diagonal portion of A */ 4105 ncols_d = ai[i+1] - ai[i]; 4106 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4107 /* off-diagonal portion of A */ 4108 for (j=jo; j<ncols_o; j++) { 4109 *ca++ = *ba++; bj++; 4110 } 4111 } 4112 } else { 4113 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4114 } 4115 4116 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4117 PetscFunctionReturn(0); 4118 } 4119 4120 #undef __FUNCT__ 4121 #define __FUNCT__ "MatGetLocalMatCondensed" 4122 /*@C 4123 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4124 4125 Not Collective 4126 4127 Input Parameters: 4128 + A - the matrix 4129 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4130 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4131 4132 Output Parameter: 4133 . A_loc - the local sequential matrix generated 4134 4135 Level: developer 4136 4137 @*/ 4138 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4139 { 4140 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4141 PetscErrorCode ierr; 4142 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4143 IS isrowa,iscola; 4144 Mat *aloc; 4145 4146 PetscFunctionBegin; 4147 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4148 if (!row){ 4149 start = A->rmap.rstart; end = A->rmap.rend; 4150 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4151 } else { 4152 isrowa = *row; 4153 } 4154 if (!col){ 4155 start = A->cmap.rstart; 4156 cmap = a->garray; 4157 nzA = a->A->cmap.n; 4158 nzB = a->B->cmap.n; 4159 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4160 ncols = 0; 4161 for (i=0; i<nzB; i++) { 4162 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4163 else break; 4164 } 4165 imark = i; 4166 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4167 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4168 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4169 ierr = PetscFree(idx);CHKERRQ(ierr); 4170 } else { 4171 iscola = *col; 4172 } 4173 if (scall != MAT_INITIAL_MATRIX){ 4174 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4175 aloc[0] = *A_loc; 4176 } 4177 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4178 *A_loc = aloc[0]; 4179 ierr = PetscFree(aloc);CHKERRQ(ierr); 4180 if (!row){ 4181 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4182 } 4183 if (!col){ 4184 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4185 } 4186 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4187 PetscFunctionReturn(0); 4188 } 4189 4190 #undef __FUNCT__ 4191 #define __FUNCT__ "MatGetBrowsOfAcols" 4192 /*@C 4193 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4194 4195 Collective on Mat 4196 4197 Input Parameters: 4198 + A,B - the matrices in mpiaij format 4199 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4200 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4201 4202 Output Parameter: 4203 + rowb, colb - index sets of rows and columns of B to extract 4204 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4205 - B_seq - the sequential matrix generated 4206 4207 Level: developer 4208 4209 @*/ 4210 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4211 { 4212 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4213 PetscErrorCode ierr; 4214 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4215 IS isrowb,iscolb; 4216 Mat *bseq; 4217 4218 PetscFunctionBegin; 4219 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4220 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4221 } 4222 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4223 4224 if (scall == MAT_INITIAL_MATRIX){ 4225 start = A->cmap.rstart; 4226 cmap = a->garray; 4227 nzA = a->A->cmap.n; 4228 nzB = a->B->cmap.n; 4229 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4230 ncols = 0; 4231 for (i=0; i<nzB; i++) { /* row < local row index */ 4232 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4233 else break; 4234 } 4235 imark = i; 4236 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4237 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4238 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4239 ierr = PetscFree(idx);CHKERRQ(ierr); 4240 *brstart = imark; 4241 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4242 } else { 4243 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4244 isrowb = *rowb; iscolb = *colb; 4245 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4246 bseq[0] = *B_seq; 4247 } 4248 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4249 *B_seq = bseq[0]; 4250 ierr = PetscFree(bseq);CHKERRQ(ierr); 4251 if (!rowb){ 4252 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4253 } else { 4254 *rowb = isrowb; 4255 } 4256 if (!colb){ 4257 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4258 } else { 4259 *colb = iscolb; 4260 } 4261 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4262 PetscFunctionReturn(0); 4263 } 4264 4265 #undef __FUNCT__ 4266 #define __FUNCT__ "MatGetBrowsOfAoCols" 4267 /*@C 4268 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4269 of the OFF-DIAGONAL portion of local A 4270 4271 Collective on Mat 4272 4273 Input Parameters: 4274 + A,B - the matrices in mpiaij format 4275 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4276 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4277 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4278 4279 Output Parameter: 4280 + B_oth - the sequential matrix generated 4281 4282 Level: developer 4283 4284 @*/ 4285 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4286 { 4287 VecScatter_MPI_General *gen_to,*gen_from; 4288 PetscErrorCode ierr; 4289 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4290 Mat_SeqAIJ *b_oth; 4291 VecScatter ctx=a->Mvctx; 4292 MPI_Comm comm=((PetscObject)ctx)->comm; 4293 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4294 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4295 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4296 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4297 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4298 MPI_Status *sstatus,rstatus; 4299 PetscMPIInt jj; 4300 PetscInt *cols,sbs,rbs; 4301 PetscScalar *vals; 4302 4303 PetscFunctionBegin; 4304 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4305 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4306 } 4307 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4308 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4309 4310 gen_to = (VecScatter_MPI_General*)ctx->todata; 4311 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4312 rvalues = gen_from->values; /* holds the length of receiving row */ 4313 svalues = gen_to->values; /* holds the length of sending row */ 4314 nrecvs = gen_from->n; 4315 nsends = gen_to->n; 4316 4317 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4318 srow = gen_to->indices; /* local row index to be sent */ 4319 sstarts = gen_to->starts; 4320 sprocs = gen_to->procs; 4321 sstatus = gen_to->sstatus; 4322 sbs = gen_to->bs; 4323 rstarts = gen_from->starts; 4324 rprocs = gen_from->procs; 4325 rbs = gen_from->bs; 4326 4327 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4328 if (scall == MAT_INITIAL_MATRIX){ 4329 /* i-array */ 4330 /*---------*/ 4331 /* post receives */ 4332 for (i=0; i<nrecvs; i++){ 4333 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4334 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4335 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4336 } 4337 4338 /* pack the outgoing message */ 4339 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4340 rstartsj = sstartsj + nsends +1; 4341 sstartsj[0] = 0; rstartsj[0] = 0; 4342 len = 0; /* total length of j or a array to be sent */ 4343 k = 0; 4344 for (i=0; i<nsends; i++){ 4345 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4347 for (j=0; j<nrows; j++) { 4348 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4349 for (l=0; l<sbs; l++){ 4350 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4351 rowlen[j*sbs+l] = ncols; 4352 len += ncols; 4353 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4354 } 4355 k++; 4356 } 4357 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4358 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4359 } 4360 /* recvs and sends of i-array are completed */ 4361 i = nrecvs; 4362 while (i--) { 4363 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4364 } 4365 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4366 4367 /* allocate buffers for sending j and a arrays */ 4368 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4369 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4370 4371 /* create i-array of B_oth */ 4372 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4373 b_othi[0] = 0; 4374 len = 0; /* total length of j or a array to be received */ 4375 k = 0; 4376 for (i=0; i<nrecvs; i++){ 4377 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4378 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4379 for (j=0; j<nrows; j++) { 4380 b_othi[k+1] = b_othi[k] + rowlen[j]; 4381 len += rowlen[j]; k++; 4382 } 4383 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4384 } 4385 4386 /* allocate space for j and a arrrays of B_oth */ 4387 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4388 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4389 4390 /* j-array */ 4391 /*---------*/ 4392 /* post receives of j-array */ 4393 for (i=0; i<nrecvs; i++){ 4394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4395 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4396 } 4397 4398 /* pack the outgoing message j-array */ 4399 k = 0; 4400 for (i=0; i<nsends; i++){ 4401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4402 bufJ = bufj+sstartsj[i]; 4403 for (j=0; j<nrows; j++) { 4404 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4405 for (ll=0; ll<sbs; ll++){ 4406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4407 for (l=0; l<ncols; l++){ 4408 *bufJ++ = cols[l]; 4409 } 4410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4411 } 4412 } 4413 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4414 } 4415 4416 /* recvs and sends of j-array are completed */ 4417 i = nrecvs; 4418 while (i--) { 4419 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4420 } 4421 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4422 } else if (scall == MAT_REUSE_MATRIX){ 4423 sstartsj = *startsj; 4424 rstartsj = sstartsj + nsends +1; 4425 bufa = *bufa_ptr; 4426 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4427 b_otha = b_oth->a; 4428 } else { 4429 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4430 } 4431 4432 /* a-array */ 4433 /*---------*/ 4434 /* post receives of a-array */ 4435 for (i=0; i<nrecvs; i++){ 4436 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4437 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4438 } 4439 4440 /* pack the outgoing message a-array */ 4441 k = 0; 4442 for (i=0; i<nsends; i++){ 4443 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4444 bufA = bufa+sstartsj[i]; 4445 for (j=0; j<nrows; j++) { 4446 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4447 for (ll=0; ll<sbs; ll++){ 4448 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4449 for (l=0; l<ncols; l++){ 4450 *bufA++ = vals[l]; 4451 } 4452 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4453 } 4454 } 4455 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4456 } 4457 /* recvs and sends of a-array are completed */ 4458 i = nrecvs; 4459 while (i--) { 4460 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4461 } 4462 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4463 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4464 4465 if (scall == MAT_INITIAL_MATRIX){ 4466 /* put together the new matrix */ 4467 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4468 4469 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4470 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4471 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4472 b_oth->free_a = PETSC_TRUE; 4473 b_oth->free_ij = PETSC_TRUE; 4474 b_oth->nonew = 0; 4475 4476 ierr = PetscFree(bufj);CHKERRQ(ierr); 4477 if (!startsj || !bufa_ptr){ 4478 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4479 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4480 } else { 4481 *startsj = sstartsj; 4482 *bufa_ptr = bufa; 4483 } 4484 } 4485 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4486 PetscFunctionReturn(0); 4487 } 4488 4489 #undef __FUNCT__ 4490 #define __FUNCT__ "MatGetCommunicationStructs" 4491 /*@C 4492 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4493 4494 Not Collective 4495 4496 Input Parameters: 4497 . A - The matrix in mpiaij format 4498 4499 Output Parameter: 4500 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4501 . colmap - A map from global column index to local index into lvec 4502 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4503 4504 Level: developer 4505 4506 @*/ 4507 #if defined (PETSC_USE_CTABLE) 4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4509 #else 4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4511 #endif 4512 { 4513 Mat_MPIAIJ *a; 4514 4515 PetscFunctionBegin; 4516 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4517 PetscValidPointer(lvec, 2) 4518 PetscValidPointer(colmap, 3) 4519 PetscValidPointer(multScatter, 4) 4520 a = (Mat_MPIAIJ *) A->data; 4521 if (lvec) *lvec = a->lvec; 4522 if (colmap) *colmap = a->colmap; 4523 if (multScatter) *multScatter = a->Mvctx; 4524 PetscFunctionReturn(0); 4525 } 4526 4527 EXTERN_C_BEGIN 4528 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4529 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4530 EXTERN_C_END 4531 4532 /*MC 4533 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4534 4535 Options Database Keys: 4536 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4537 4538 Level: beginner 4539 4540 .seealso: MatCreateMPIAIJ() 4541 M*/ 4542 4543 EXTERN_C_BEGIN 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "MatCreate_MPIAIJ" 4546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4547 { 4548 Mat_MPIAIJ *b; 4549 PetscErrorCode ierr; 4550 PetscMPIInt size; 4551 4552 PetscFunctionBegin; 4553 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4554 4555 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4556 B->data = (void*)b; 4557 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4558 B->factor = 0; 4559 B->rmap.bs = 1; 4560 B->assembled = PETSC_FALSE; 4561 B->mapping = 0; 4562 4563 B->insertmode = NOT_SET_VALUES; 4564 b->size = size; 4565 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4566 4567 /* build cache for off array entries formed */ 4568 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4569 b->donotstash = PETSC_FALSE; 4570 b->colmap = 0; 4571 b->garray = 0; 4572 b->roworiented = PETSC_TRUE; 4573 4574 /* stuff used for matrix vector multiply */ 4575 b->lvec = PETSC_NULL; 4576 b->Mvctx = PETSC_NULL; 4577 4578 /* stuff for MatGetRow() */ 4579 b->rowindices = 0; 4580 b->rowvalues = 0; 4581 b->getrowactive = PETSC_FALSE; 4582 4583 4584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4585 "MatStoreValues_MPIAIJ", 4586 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4588 "MatRetrieveValues_MPIAIJ", 4589 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4591 "MatGetDiagonalBlock_MPIAIJ", 4592 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4594 "MatIsTranspose_MPIAIJ", 4595 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4597 "MatMPIAIJSetPreallocation_MPIAIJ", 4598 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4600 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4601 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4603 "MatDiagonalScaleLocal_MPIAIJ", 4604 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4606 "MatConvert_MPIAIJ_MPICSRPERM", 4607 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4609 "MatConvert_MPIAIJ_MPICRL", 4610 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4611 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 EXTERN_C_END 4615 4616 #undef __FUNCT__ 4617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4618 /*@ 4619 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4620 and "off-diagonal" part of the matrix in CSR format. 4621 4622 Collective on MPI_Comm 4623 4624 Input Parameters: 4625 + comm - MPI communicator 4626 . m - number of local rows (Cannot be PETSC_DECIDE) 4627 . n - This value should be the same as the local size used in creating the 4628 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4629 calculated if N is given) For square matrices n is almost always m. 4630 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4631 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4632 . i - row indices for "diagonal" portion of matrix 4633 . j - column indices 4634 . a - matrix values 4635 . oi - row indices for "off-diagonal" portion of matrix 4636 . oj - column indices 4637 - oa - matrix values 4638 4639 Output Parameter: 4640 . mat - the matrix 4641 4642 Level: advanced 4643 4644 Notes: 4645 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4646 4647 The i and j indices are 0 based 4648 4649 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4650 4651 4652 .keywords: matrix, aij, compressed row, sparse, parallel 4653 4654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4655 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4656 @*/ 4657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4658 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4659 { 4660 PetscErrorCode ierr; 4661 Mat_MPIAIJ *maij; 4662 4663 PetscFunctionBegin; 4664 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4665 if (i[0]) { 4666 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4667 } 4668 if (oi[0]) { 4669 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4670 } 4671 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4672 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4673 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4674 maij = (Mat_MPIAIJ*) (*mat)->data; 4675 maij->donotstash = PETSC_TRUE; 4676 (*mat)->preallocated = PETSC_TRUE; 4677 4678 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4679 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4680 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4681 4682 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4684 4685 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4688 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4689 4690 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4692 PetscFunctionReturn(0); 4693 } 4694 4695 /* 4696 Special version for direct calls from Fortran 4697 */ 4698 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4699 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4700 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4701 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4702 #endif 4703 4704 /* Change these macros so can be used in void function */ 4705 #undef CHKERRQ 4706 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4707 #undef SETERRQ2 4708 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4709 #undef SETERRQ 4710 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4711 4712 EXTERN_C_BEGIN 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "matsetvaluesmpiaij_" 4715 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4716 { 4717 Mat mat = *mmat; 4718 PetscInt m = *mm, n = *mn; 4719 InsertMode addv = *maddv; 4720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4721 PetscScalar value; 4722 PetscErrorCode ierr; 4723 4724 MatPreallocated(mat); 4725 if (mat->insertmode == NOT_SET_VALUES) { 4726 mat->insertmode = addv; 4727 } 4728 #if defined(PETSC_USE_DEBUG) 4729 else if (mat->insertmode != addv) { 4730 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4731 } 4732 #endif 4733 { 4734 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4735 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4736 PetscTruth roworiented = aij->roworiented; 4737 4738 /* Some Variables required in the macro */ 4739 Mat A = aij->A; 4740 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4741 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4742 PetscScalar *aa = a->a; 4743 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4744 Mat B = aij->B; 4745 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4746 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4747 PetscScalar *ba = b->a; 4748 4749 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4750 PetscInt nonew = a->nonew; 4751 PetscScalar *ap1,*ap2; 4752 4753 PetscFunctionBegin; 4754 for (i=0; i<m; i++) { 4755 if (im[i] < 0) continue; 4756 #if defined(PETSC_USE_DEBUG) 4757 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4758 #endif 4759 if (im[i] >= rstart && im[i] < rend) { 4760 row = im[i] - rstart; 4761 lastcol1 = -1; 4762 rp1 = aj + ai[row]; 4763 ap1 = aa + ai[row]; 4764 rmax1 = aimax[row]; 4765 nrow1 = ailen[row]; 4766 low1 = 0; 4767 high1 = nrow1; 4768 lastcol2 = -1; 4769 rp2 = bj + bi[row]; 4770 ap2 = ba + bi[row]; 4771 rmax2 = bimax[row]; 4772 nrow2 = bilen[row]; 4773 low2 = 0; 4774 high2 = nrow2; 4775 4776 for (j=0; j<n; j++) { 4777 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4778 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4779 if (in[j] >= cstart && in[j] < cend){ 4780 col = in[j] - cstart; 4781 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4782 } else if (in[j] < 0) continue; 4783 #if defined(PETSC_USE_DEBUG) 4784 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4785 #endif 4786 else { 4787 if (mat->was_assembled) { 4788 if (!aij->colmap) { 4789 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4790 } 4791 #if defined (PETSC_USE_CTABLE) 4792 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4793 col--; 4794 #else 4795 col = aij->colmap[in[j]] - 1; 4796 #endif 4797 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4798 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4799 col = in[j]; 4800 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4801 B = aij->B; 4802 b = (Mat_SeqAIJ*)B->data; 4803 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4804 rp2 = bj + bi[row]; 4805 ap2 = ba + bi[row]; 4806 rmax2 = bimax[row]; 4807 nrow2 = bilen[row]; 4808 low2 = 0; 4809 high2 = nrow2; 4810 bm = aij->B->rmap.n; 4811 ba = b->a; 4812 } 4813 } else col = in[j]; 4814 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4815 } 4816 } 4817 } else { 4818 if (!aij->donotstash) { 4819 if (roworiented) { 4820 if (ignorezeroentries && v[i*n] == 0.0) continue; 4821 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4822 } else { 4823 if (ignorezeroentries && v[i] == 0.0) continue; 4824 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4825 } 4826 } 4827 } 4828 }} 4829 PetscFunctionReturnVoid(); 4830 } 4831 EXTERN_C_END 4832 4833