1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 56 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 90 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col; 319 PetscTruth other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = ((PetscObject)A)->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 728 ierr = PetscFree(aij);CHKERRQ(ierr); 729 730 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIAIJ_Binary" 743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 747 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 748 PetscErrorCode ierr; 749 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 750 int fd; 751 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 752 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (!rank) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->rmap.N; 762 header[2] = mat->cmap.N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->rmap.n; 768 range = mat->rmap.range; 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 774 rlen = mat->rmap.n; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->rmap.n; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (!rank) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->rmap.n; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (!rank) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->rmap.n; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (!rank) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscMPIInt rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,iascii,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 PetscTruth inodes; 887 888 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 889 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 890 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 891 if (!inodes) { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 893 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 896 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } 898 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 903 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 904 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } else if (format == PETSC_VIEWER_ASCII_INFO) { 907 PetscInt inodecount,inodelimit,*inodes; 908 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 909 if (inodes) { 910 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 911 } else { 912 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(0); 917 } 918 } else if (isbinary) { 919 if (size == 1) { 920 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 921 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 922 } else { 923 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } else if (isdraw) { 927 PetscDraw draw; 928 PetscTruth isnull; 929 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 930 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 931 } 932 933 if (size == 1) { 934 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 935 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 936 } else { 937 /* assemble the entire matrix onto first processor. */ 938 Mat A; 939 Mat_SeqAIJ *Aloc; 940 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 941 PetscScalar *a; 942 943 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 944 945 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = mat->rmap.rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = mat->rmap.rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatRelax_MPIAIJ" 1019 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1022 PetscErrorCode ierr; 1023 Vec bb1; 1024 1025 PetscFunctionBegin; 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1076 } 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1079 } 1080 1081 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatPermute_MPIAIJ" 1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1088 { 1089 MPI_Comm comm,pcomm; 1090 PetscInt first,local_size,nrows,*rows; 1091 int ntids; 1092 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1097 /* make a collective version of 'rowp' */ 1098 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1099 if (pcomm==comm) { 1100 crowp = rowp; 1101 } else { 1102 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1103 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1105 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1106 } 1107 /* collect the global row permutation and invert it */ 1108 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1109 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1110 if (pcomm!=comm) { 1111 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1112 } 1113 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1114 /* get the local target indices */ 1115 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1121 /* the column permutation is so much easier; 1122 make a local version of 'colp' and invert it */ 1123 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1125 if (ntids==1) { 1126 lcolp = colp; 1127 } else { 1128 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1129 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1131 } 1132 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1133 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1134 if (ntids>1) { 1135 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1137 } 1138 /* now we just get the submatrix */ 1139 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1140 /* clean up */ 1141 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1142 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1151 Mat A = mat->A,B = mat->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = 1.0; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 info->rows_global = (double)matin->rmap.N; 1188 info->columns_global = (double)matin->cmap.N; 1189 info->rows_local = (double)matin->rmap.n; 1190 info->columns_local = (double)matin->cmap.N; 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSetOption_MPIAIJ" 1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NEW_NONZERO_LOCATIONS: 1205 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1206 case MAT_KEEP_ZEROED_ROWS: 1207 case MAT_NEW_NONZERO_LOCATION_ERR: 1208 case MAT_USE_INODES: 1209 case MAT_IGNORE_ZERO_ENTRIES: 1210 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1211 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1212 break; 1213 case MAT_ROW_ORIENTED: 1214 a->roworiented = flg; 1215 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1216 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1217 break; 1218 case MAT_NEW_DIAGONALS: 1219 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1220 break; 1221 case MAT_IGNORE_OFF_PROC_ENTRIES: 1222 a->donotstash = PETSC_TRUE; 1223 break; 1224 case MAT_SYMMETRIC: 1225 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1226 break; 1227 case MAT_STRUCTURALLY_SYMMETRIC: 1228 case MAT_HERMITIAN: 1229 case MAT_SYMMETRY_ETERNAL: 1230 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1231 break; 1232 default: 1233 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatGetRow_MPIAIJ" 1240 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1241 { 1242 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1243 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1244 PetscErrorCode ierr; 1245 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1246 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1247 PetscInt *cmap,*idx_p; 1248 1249 PetscFunctionBegin; 1250 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1251 mat->getrowactive = PETSC_TRUE; 1252 1253 if (!mat->rowvalues && (idx || v)) { 1254 /* 1255 allocate enough space to hold information from the longest row. 1256 */ 1257 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1258 PetscInt max = 1,tmp; 1259 for (i=0; i<matin->rmap.n; i++) { 1260 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1261 if (max < tmp) { max = tmp; } 1262 } 1263 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1264 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1265 } 1266 1267 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1268 lrow = row - rstart; 1269 1270 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1271 if (!v) {pvA = 0; pvB = 0;} 1272 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1273 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1274 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1275 nztot = nzA + nzB; 1276 1277 cmap = mat->garray; 1278 if (v || idx) { 1279 if (nztot) { 1280 /* Sort by increasing column numbers, assuming A and B already sorted */ 1281 PetscInt imark = -1; 1282 if (v) { 1283 *v = v_p = mat->rowvalues; 1284 for (i=0; i<nzB; i++) { 1285 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1286 else break; 1287 } 1288 imark = i; 1289 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1290 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1291 } 1292 if (idx) { 1293 *idx = idx_p = mat->rowindices; 1294 if (imark > -1) { 1295 for (i=0; i<imark; i++) { 1296 idx_p[i] = cmap[cworkB[i]]; 1297 } 1298 } else { 1299 for (i=0; i<nzB; i++) { 1300 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1301 else break; 1302 } 1303 imark = i; 1304 } 1305 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1306 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1307 } 1308 } else { 1309 if (idx) *idx = 0; 1310 if (v) *v = 0; 1311 } 1312 } 1313 *nz = nztot; 1314 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1315 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1321 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1324 1325 PetscFunctionBegin; 1326 if (!aij->getrowactive) { 1327 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1328 } 1329 aij->getrowactive = PETSC_FALSE; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatNorm_MPIAIJ" 1335 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1336 { 1337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1338 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1339 PetscErrorCode ierr; 1340 PetscInt i,j,cstart = mat->cmap.rstart; 1341 PetscReal sum = 0.0; 1342 PetscScalar *v; 1343 1344 PetscFunctionBegin; 1345 if (aij->size == 1) { 1346 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1347 } else { 1348 if (type == NORM_FROBENIUS) { 1349 v = amat->a; 1350 for (i=0; i<amat->nz; i++) { 1351 #if defined(PETSC_USE_COMPLEX) 1352 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1353 #else 1354 sum += (*v)*(*v); v++; 1355 #endif 1356 } 1357 v = bmat->a; 1358 for (i=0; i<bmat->nz; i++) { 1359 #if defined(PETSC_USE_COMPLEX) 1360 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1361 #else 1362 sum += (*v)*(*v); v++; 1363 #endif 1364 } 1365 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1366 *norm = sqrt(*norm); 1367 } else if (type == NORM_1) { /* max column norm */ 1368 PetscReal *tmp,*tmp2; 1369 PetscInt *jj,*garray = aij->garray; 1370 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1371 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1372 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1373 *norm = 0.0; 1374 v = amat->a; jj = amat->j; 1375 for (j=0; j<amat->nz; j++) { 1376 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1377 } 1378 v = bmat->a; jj = bmat->j; 1379 for (j=0; j<bmat->nz; j++) { 1380 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1381 } 1382 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 for (j=0; j<mat->cmap.N; j++) { 1384 if (tmp2[j] > *norm) *norm = tmp2[j]; 1385 } 1386 ierr = PetscFree(tmp);CHKERRQ(ierr); 1387 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1388 } else if (type == NORM_INFINITY) { /* max row norm */ 1389 PetscReal ntemp = 0.0; 1390 for (j=0; j<aij->A->rmap.n; j++) { 1391 v = amat->a + amat->i[j]; 1392 sum = 0.0; 1393 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1394 sum += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a + bmat->i[j]; 1397 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1398 sum += PetscAbsScalar(*v); v++; 1399 } 1400 if (sum > ntemp) ntemp = sum; 1401 } 1402 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1403 } else { 1404 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1405 } 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatTranspose_MPIAIJ" 1412 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1415 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1416 PetscErrorCode ierr; 1417 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1418 PetscInt cstart=A->cmap.rstart,ncol; 1419 Mat B; 1420 PetscScalar *array; 1421 1422 PetscFunctionBegin; 1423 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1424 1425 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1426 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1427 ai = Aloc->i; aj = Aloc->j; 1428 bi = Bloc->i; bj = Bloc->j; 1429 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1430 cols = d_nnz + na + 1; /* work space to be used by B part */ 1431 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1432 for (i=0; i<ai[ma]; i++){ 1433 d_nnz[aj[i]] ++; 1434 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1435 } 1436 1437 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1438 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1439 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1441 1442 /* copy over the A part */ 1443 array = Aloc->a; 1444 row = A->rmap.rstart; 1445 for (i=0; i<ma; i++) { 1446 ncol = ai[i+1]-ai[i]; 1447 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1448 row++; array += ncol; aj += ncol; 1449 } 1450 aj = Aloc->j; 1451 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1452 1453 /* copy over the B part */ 1454 array = Bloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1457 for (i=0; i<mb; i++) { 1458 ncol = bi[i+1]-bi[i]; 1459 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1460 row++; array += ncol; cols += ncol; 1461 } 1462 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1463 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1465 if (matout) { 1466 *matout = B; 1467 } else { 1468 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1476 { 1477 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1478 Mat a = aij->A,b = aij->B; 1479 PetscErrorCode ierr; 1480 PetscInt s1,s2,s3; 1481 1482 PetscFunctionBegin; 1483 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1484 if (rr) { 1485 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1486 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1487 /* Overlap communication with computation. */ 1488 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 } 1490 if (ll) { 1491 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1492 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1493 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1494 } 1495 /* scale the diagonal block */ 1496 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1497 1498 if (rr) { 1499 /* Do a scatter end and then right scale the off-diagonal block */ 1500 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1502 } 1503 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1509 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1510 { 1511 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1516 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1521 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1522 { 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatEqual_MPIAIJ" 1533 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1534 { 1535 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1536 Mat a,b,c,d; 1537 PetscTruth flg; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 a = matA->A; b = matA->B; 1542 c = matB->A; d = matB->B; 1543 1544 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1545 if (flg) { 1546 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1547 } 1548 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatCopy_MPIAIJ" 1554 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1555 { 1556 PetscErrorCode ierr; 1557 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1558 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1559 1560 PetscFunctionBegin; 1561 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1562 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1563 /* because of the column compression in the off-processor part of the matrix a->B, 1564 the number of columns in a->B and b->B may be different, hence we cannot call 1565 the MatCopy() directly on the two parts. If need be, we can provide a more 1566 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1567 then copying the submatrices */ 1568 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1569 } else { 1570 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1571 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1578 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1579 { 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #include "petscblaslapack.h" 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatAXPY_MPIAIJ" 1590 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1591 { 1592 PetscErrorCode ierr; 1593 PetscInt i; 1594 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1595 PetscBLASInt bnz,one=1; 1596 Mat_SeqAIJ *x,*y; 1597 1598 PetscFunctionBegin; 1599 if (str == SAME_NONZERO_PATTERN) { 1600 PetscScalar alpha = a; 1601 x = (Mat_SeqAIJ *)xx->A->data; 1602 y = (Mat_SeqAIJ *)yy->A->data; 1603 bnz = PetscBLASIntCast(x->nz); 1604 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1605 x = (Mat_SeqAIJ *)xx->B->data; 1606 y = (Mat_SeqAIJ *)yy->B->data; 1607 bnz = PetscBLASIntCast(x->nz); 1608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1609 } else if (str == SUBSET_NONZERO_PATTERN) { 1610 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1611 1612 x = (Mat_SeqAIJ *)xx->B->data; 1613 y = (Mat_SeqAIJ *)yy->B->data; 1614 if (y->xtoy && y->XtoY != xx->B) { 1615 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1616 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1617 } 1618 if (!y->xtoy) { /* get xtoy */ 1619 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1620 y->XtoY = xx->B; 1621 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1622 } 1623 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1624 } else { 1625 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatConjugate_MPIAIJ" 1634 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1635 { 1636 #if defined(PETSC_USE_COMPLEX) 1637 PetscErrorCode ierr; 1638 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1639 1640 PetscFunctionBegin; 1641 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1642 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1643 #else 1644 PetscFunctionBegin; 1645 #endif 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatRealPart_MPIAIJ" 1651 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1652 { 1653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1654 PetscErrorCode ierr; 1655 1656 PetscFunctionBegin; 1657 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1658 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1664 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1671 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #ifdef PETSC_HAVE_PBGL 1676 1677 #include <boost/parallel/mpi/bsp_process_group.hpp> 1678 #include <boost/graph/distributed/ilu_default_graph.hpp> 1679 #include <boost/graph/distributed/ilu_0_block.hpp> 1680 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1681 #include <boost/graph/distributed/petsc/interface.hpp> 1682 #include <boost/multi_array.hpp> 1683 #include <boost/parallel/distributed_property_map.hpp> 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1687 /* 1688 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1689 */ 1690 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1691 { 1692 namespace petsc = boost::distributed::petsc; 1693 1694 namespace graph_dist = boost::graph::distributed; 1695 using boost::graph::distributed::ilu_default::process_group_type; 1696 using boost::graph::ilu_permuted; 1697 1698 PetscTruth row_identity, col_identity; 1699 PetscContainer c; 1700 PetscInt m, n, M, N; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1705 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1706 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1707 if (!row_identity || !col_identity) { 1708 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1709 } 1710 1711 process_group_type pg; 1712 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1713 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1714 lgraph_type& level_graph = *lgraph_p; 1715 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1716 1717 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1718 ilu_permuted(level_graph); 1719 1720 /* put together the new matrix */ 1721 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1722 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1723 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1724 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1725 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1726 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 (*fact)->factor = FACTOR_LU; 1729 1730 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1731 ierr = PetscContainerSetPointer(c, lgraph_p); 1732 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1738 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1739 { 1740 PetscFunctionBegin; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSolve_MPIAIJ" 1746 /* 1747 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1748 */ 1749 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1750 { 1751 namespace graph_dist = boost::graph::distributed; 1752 1753 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1754 lgraph_type* lgraph_p; 1755 PetscContainer c; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1760 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1761 ierr = VecCopy(b, x); CHKERRQ(ierr); 1762 1763 PetscScalar* array_x; 1764 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1765 PetscInt sx; 1766 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1767 1768 PetscScalar* array_b; 1769 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1770 PetscInt sb; 1771 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1772 1773 lgraph_type& level_graph = *lgraph_p; 1774 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1775 1776 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1777 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1778 ref_x(array_x, boost::extents[num_vertices(graph)]); 1779 1780 typedef boost::iterator_property_map<array_ref_type::iterator, 1781 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1782 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1783 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1784 1785 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1786 1787 PetscFunctionReturn(0); 1788 } 1789 #endif 1790 1791 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1792 PetscInt nzlocal,nsends,nrecvs; 1793 PetscMPIInt *send_rank; 1794 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1795 PetscScalar *sbuf_a,**rbuf_a; 1796 PetscErrorCode (*MatDestroy)(Mat); 1797 } Mat_Redundant; 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1801 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1805 PetscInt i; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1809 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1810 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1811 for (i=0; i<redund->nrecvs; i++){ 1812 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1813 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1814 } 1815 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1816 ierr = PetscFree(redund);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatDestroy_MatRedundant" 1822 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1823 { 1824 PetscErrorCode ierr; 1825 PetscContainer container; 1826 Mat_Redundant *redund=PETSC_NULL; 1827 1828 PetscFunctionBegin; 1829 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1830 if (container) { 1831 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1832 } else { 1833 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1834 } 1835 A->ops->destroy = redund->MatDestroy; 1836 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1837 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1838 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1844 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1845 { 1846 PetscMPIInt rank,size; 1847 MPI_Comm comm=((PetscObject)mat)->comm; 1848 PetscErrorCode ierr; 1849 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1850 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1851 PetscInt *rowrange=mat->rmap.range; 1852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1853 Mat A=aij->A,B=aij->B,C=*matredundant; 1854 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1855 PetscScalar *sbuf_a; 1856 PetscInt nzlocal=a->nz+b->nz; 1857 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1858 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1859 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1860 PetscScalar *vals,*aworkA,*aworkB; 1861 PetscMPIInt tag1,tag2,tag3,imdex; 1862 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1863 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1864 MPI_Status recv_status,*send_status; 1865 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1866 PetscInt **rbuf_j=PETSC_NULL; 1867 PetscScalar **rbuf_a=PETSC_NULL; 1868 Mat_Redundant *redund=PETSC_NULL; 1869 PetscContainer container; 1870 1871 PetscFunctionBegin; 1872 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1874 1875 if (reuse == MAT_REUSE_MATRIX) { 1876 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1877 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1878 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1879 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1880 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1881 if (container) { 1882 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1883 } else { 1884 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1885 } 1886 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1887 1888 nsends = redund->nsends; 1889 nrecvs = redund->nrecvs; 1890 send_rank = redund->send_rank; recv_rank = send_rank + size; 1891 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1892 sbuf_j = redund->sbuf_j; 1893 sbuf_a = redund->sbuf_a; 1894 rbuf_j = redund->rbuf_j; 1895 rbuf_a = redund->rbuf_a; 1896 } 1897 1898 if (reuse == MAT_INITIAL_MATRIX){ 1899 PetscMPIInt subrank,subsize; 1900 PetscInt nleftover,np_subcomm; 1901 /* get the destination processors' id send_rank, nsends and nrecvs */ 1902 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1903 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1904 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1905 recv_rank = send_rank + size; 1906 np_subcomm = size/nsubcomm; 1907 nleftover = size - nsubcomm*np_subcomm; 1908 nsends = 0; nrecvs = 0; 1909 for (i=0; i<size; i++){ /* i=rank*/ 1910 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1911 send_rank[nsends] = i; nsends++; 1912 recv_rank[nrecvs++] = i; 1913 } 1914 } 1915 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1916 i = size-nleftover-1; 1917 j = 0; 1918 while (j < nsubcomm - nleftover){ 1919 send_rank[nsends++] = i; 1920 i--; j++; 1921 } 1922 } 1923 1924 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1925 for (i=0; i<nleftover; i++){ 1926 recv_rank[nrecvs++] = size-nleftover+i; 1927 } 1928 } 1929 1930 /* allocate sbuf_j, sbuf_a */ 1931 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1932 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1933 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1934 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1935 1936 /* copy mat's local entries into the buffers */ 1937 if (reuse == MAT_INITIAL_MATRIX){ 1938 rownz_max = 0; 1939 rptr = sbuf_j; 1940 cols = sbuf_j + rend-rstart + 1; 1941 vals = sbuf_a; 1942 rptr[0] = 0; 1943 for (i=0; i<rend-rstart; i++){ 1944 row = i + rstart; 1945 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1946 ncols = nzA + nzB; 1947 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1948 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1949 /* load the column indices for this row into cols */ 1950 lwrite = 0; 1951 for (l=0; l<nzB; l++) { 1952 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1953 vals[lwrite] = aworkB[l]; 1954 cols[lwrite++] = ctmp; 1955 } 1956 } 1957 for (l=0; l<nzA; l++){ 1958 vals[lwrite] = aworkA[l]; 1959 cols[lwrite++] = cstart + cworkA[l]; 1960 } 1961 for (l=0; l<nzB; l++) { 1962 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1963 vals[lwrite] = aworkB[l]; 1964 cols[lwrite++] = ctmp; 1965 } 1966 } 1967 vals += ncols; 1968 cols += ncols; 1969 rptr[i+1] = rptr[i] + ncols; 1970 if (rownz_max < ncols) rownz_max = ncols; 1971 } 1972 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1973 } else { /* only copy matrix values into sbuf_a */ 1974 rptr = sbuf_j; 1975 vals = sbuf_a; 1976 rptr[0] = 0; 1977 for (i=0; i<rend-rstart; i++){ 1978 row = i + rstart; 1979 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1980 ncols = nzA + nzB; 1981 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1982 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1983 lwrite = 0; 1984 for (l=0; l<nzB; l++) { 1985 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1986 } 1987 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1988 for (l=0; l<nzB; l++) { 1989 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1990 } 1991 vals += ncols; 1992 rptr[i+1] = rptr[i] + ncols; 1993 } 1994 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1995 1996 /* send nzlocal to others, and recv other's nzlocal */ 1997 /*--------------------------------------------------*/ 1998 if (reuse == MAT_INITIAL_MATRIX){ 1999 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2000 s_waits2 = s_waits3 + nsends; 2001 s_waits1 = s_waits2 + nsends; 2002 r_waits1 = s_waits1 + nsends; 2003 r_waits2 = r_waits1 + nrecvs; 2004 r_waits3 = r_waits2 + nrecvs; 2005 } else { 2006 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2007 r_waits3 = s_waits3 + nsends; 2008 } 2009 2010 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2011 if (reuse == MAT_INITIAL_MATRIX){ 2012 /* get new tags to keep the communication clean */ 2013 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2014 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2015 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2016 rbuf_nz = sbuf_nz + nsends; 2017 2018 /* post receives of other's nzlocal */ 2019 for (i=0; i<nrecvs; i++){ 2020 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2021 } 2022 /* send nzlocal to others */ 2023 for (i=0; i<nsends; i++){ 2024 sbuf_nz[i] = nzlocal; 2025 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2026 } 2027 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2028 count = nrecvs; 2029 while (count) { 2030 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2031 recv_rank[imdex] = recv_status.MPI_SOURCE; 2032 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2033 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2034 2035 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2036 rbuf_nz[imdex] += i + 2; 2037 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2038 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2039 count--; 2040 } 2041 /* wait on sends of nzlocal */ 2042 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2043 /* send mat->i,j to others, and recv from other's */ 2044 /*------------------------------------------------*/ 2045 for (i=0; i<nsends; i++){ 2046 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2047 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2048 } 2049 /* wait on receives of mat->i,j */ 2050 /*------------------------------*/ 2051 count = nrecvs; 2052 while (count) { 2053 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2054 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2055 count--; 2056 } 2057 /* wait on sends of mat->i,j */ 2058 /*---------------------------*/ 2059 if (nsends) { 2060 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2061 } 2062 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2063 2064 /* post receives, send and receive mat->a */ 2065 /*----------------------------------------*/ 2066 for (imdex=0; imdex<nrecvs; imdex++) { 2067 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2068 } 2069 for (i=0; i<nsends; i++){ 2070 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2071 } 2072 count = nrecvs; 2073 while (count) { 2074 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2075 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2076 count--; 2077 } 2078 if (nsends) { 2079 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2080 } 2081 2082 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2083 2084 /* create redundant matrix */ 2085 /*-------------------------*/ 2086 if (reuse == MAT_INITIAL_MATRIX){ 2087 /* compute rownz_max for preallocation */ 2088 for (imdex=0; imdex<nrecvs; imdex++){ 2089 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2090 rptr = rbuf_j[imdex]; 2091 for (i=0; i<j; i++){ 2092 ncols = rptr[i+1] - rptr[i]; 2093 if (rownz_max < ncols) rownz_max = ncols; 2094 } 2095 } 2096 2097 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2098 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2099 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2100 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2101 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2102 } else { 2103 C = *matredundant; 2104 } 2105 2106 /* insert local matrix entries */ 2107 rptr = sbuf_j; 2108 cols = sbuf_j + rend-rstart + 1; 2109 vals = sbuf_a; 2110 for (i=0; i<rend-rstart; i++){ 2111 row = i + rstart; 2112 ncols = rptr[i+1] - rptr[i]; 2113 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2114 vals += ncols; 2115 cols += ncols; 2116 } 2117 /* insert received matrix entries */ 2118 for (imdex=0; imdex<nrecvs; imdex++){ 2119 rstart = rowrange[recv_rank[imdex]]; 2120 rend = rowrange[recv_rank[imdex]+1]; 2121 rptr = rbuf_j[imdex]; 2122 cols = rbuf_j[imdex] + rend-rstart + 1; 2123 vals = rbuf_a[imdex]; 2124 for (i=0; i<rend-rstart; i++){ 2125 row = i + rstart; 2126 ncols = rptr[i+1] - rptr[i]; 2127 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2128 vals += ncols; 2129 cols += ncols; 2130 } 2131 } 2132 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2133 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2135 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2136 if (reuse == MAT_INITIAL_MATRIX){ 2137 PetscContainer container; 2138 *matredundant = C; 2139 /* create a supporting struct and attach it to C for reuse */ 2140 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2141 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2142 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2143 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2144 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2145 2146 redund->nzlocal = nzlocal; 2147 redund->nsends = nsends; 2148 redund->nrecvs = nrecvs; 2149 redund->send_rank = send_rank; 2150 redund->sbuf_nz = sbuf_nz; 2151 redund->sbuf_j = sbuf_j; 2152 redund->sbuf_a = sbuf_a; 2153 redund->rbuf_j = rbuf_j; 2154 redund->rbuf_a = rbuf_a; 2155 2156 redund->MatDestroy = C->ops->destroy; 2157 C->ops->destroy = MatDestroy_MatRedundant; 2158 } 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2164 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2165 { 2166 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2167 PetscInt n = A->rmap.n; 2168 PetscInt cstart = A->cmap.rstart; 2169 PetscInt *cmap = mat->garray; 2170 PetscInt *diagIdx, *offdiagIdx; 2171 Vec diagV, offdiagV; 2172 PetscScalar *a, *diagA, *offdiagA; 2173 PetscInt r; 2174 PetscErrorCode ierr; 2175 2176 PetscFunctionBegin; 2177 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2178 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2179 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2180 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2181 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2182 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2183 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2184 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2185 for(r = 0; r < n; ++r) { 2186 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2187 a[r] = diagA[r]; 2188 idx[r] = cstart + diagIdx[r]; 2189 } else { 2190 a[r] = offdiagA[r]; 2191 idx[r] = cmap[offdiagIdx[r]]; 2192 } 2193 } 2194 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2195 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2196 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2197 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2198 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2199 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2205 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2206 { 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /* -------------------------------------------------------------------*/ 2215 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2216 MatGetRow_MPIAIJ, 2217 MatRestoreRow_MPIAIJ, 2218 MatMult_MPIAIJ, 2219 /* 4*/ MatMultAdd_MPIAIJ, 2220 MatMultTranspose_MPIAIJ, 2221 MatMultTransposeAdd_MPIAIJ, 2222 #ifdef PETSC_HAVE_PBGL 2223 MatSolve_MPIAIJ, 2224 #else 2225 0, 2226 #endif 2227 0, 2228 0, 2229 /*10*/ 0, 2230 0, 2231 0, 2232 MatRelax_MPIAIJ, 2233 MatTranspose_MPIAIJ, 2234 /*15*/ MatGetInfo_MPIAIJ, 2235 MatEqual_MPIAIJ, 2236 MatGetDiagonal_MPIAIJ, 2237 MatDiagonalScale_MPIAIJ, 2238 MatNorm_MPIAIJ, 2239 /*20*/ MatAssemblyBegin_MPIAIJ, 2240 MatAssemblyEnd_MPIAIJ, 2241 0, 2242 MatSetOption_MPIAIJ, 2243 MatZeroEntries_MPIAIJ, 2244 /*25*/ MatZeroRows_MPIAIJ, 2245 0, 2246 #ifdef PETSC_HAVE_PBGL 2247 MatLUFactorNumeric_MPIAIJ, 2248 #else 2249 0, 2250 #endif 2251 0, 2252 0, 2253 /*30*/ MatSetUpPreallocation_MPIAIJ, 2254 #ifdef PETSC_HAVE_PBGL 2255 MatILUFactorSymbolic_MPIAIJ, 2256 #else 2257 0, 2258 #endif 2259 0, 2260 0, 2261 0, 2262 /*35*/ MatDuplicate_MPIAIJ, 2263 0, 2264 0, 2265 0, 2266 0, 2267 /*40*/ MatAXPY_MPIAIJ, 2268 MatGetSubMatrices_MPIAIJ, 2269 MatIncreaseOverlap_MPIAIJ, 2270 MatGetValues_MPIAIJ, 2271 MatCopy_MPIAIJ, 2272 /*45*/ 0, 2273 MatScale_MPIAIJ, 2274 0, 2275 0, 2276 0, 2277 /*50*/ MatSetBlockSize_MPIAIJ, 2278 0, 2279 0, 2280 0, 2281 0, 2282 /*55*/ MatFDColoringCreate_MPIAIJ, 2283 0, 2284 MatSetUnfactored_MPIAIJ, 2285 MatPermute_MPIAIJ, 2286 0, 2287 /*60*/ MatGetSubMatrix_MPIAIJ, 2288 MatDestroy_MPIAIJ, 2289 MatView_MPIAIJ, 2290 0, 2291 0, 2292 /*65*/ 0, 2293 0, 2294 0, 2295 0, 2296 0, 2297 /*70*/ 0, 2298 0, 2299 MatSetColoring_MPIAIJ, 2300 #if defined(PETSC_HAVE_ADIC) 2301 MatSetValuesAdic_MPIAIJ, 2302 #else 2303 0, 2304 #endif 2305 MatSetValuesAdifor_MPIAIJ, 2306 /*75*/ 0, 2307 0, 2308 0, 2309 0, 2310 0, 2311 /*80*/ 0, 2312 0, 2313 0, 2314 0, 2315 /*84*/ MatLoad_MPIAIJ, 2316 0, 2317 0, 2318 0, 2319 0, 2320 0, 2321 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2322 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2323 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2324 MatPtAP_Basic, 2325 MatPtAPSymbolic_MPIAIJ, 2326 /*95*/ MatPtAPNumeric_MPIAIJ, 2327 0, 2328 0, 2329 0, 2330 0, 2331 /*100*/0, 2332 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2333 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2334 MatConjugate_MPIAIJ, 2335 0, 2336 /*105*/MatSetValuesRow_MPIAIJ, 2337 MatRealPart_MPIAIJ, 2338 MatImaginaryPart_MPIAIJ, 2339 0, 2340 0, 2341 /*110*/0, 2342 MatGetRedundantMatrix_MPIAIJ, 2343 MatGetRowMin_MPIAIJ, 2344 0, 2345 0, 2346 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2347 2348 /* ----------------------------------------------------------------------------------------*/ 2349 2350 EXTERN_C_BEGIN 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2353 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2354 { 2355 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2360 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 EXTERN_C_END 2364 2365 EXTERN_C_BEGIN 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2368 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2369 { 2370 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2375 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 EXTERN_C_END 2379 2380 #include "petscpc.h" 2381 EXTERN_C_BEGIN 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2384 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2385 { 2386 Mat_MPIAIJ *b; 2387 PetscErrorCode ierr; 2388 PetscInt i; 2389 2390 PetscFunctionBegin; 2391 B->preallocated = PETSC_TRUE; 2392 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2393 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2394 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2395 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2396 2397 B->rmap.bs = B->cmap.bs = 1; 2398 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2399 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2400 if (d_nnz) { 2401 for (i=0; i<B->rmap.n; i++) { 2402 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2403 } 2404 } 2405 if (o_nnz) { 2406 for (i=0; i<B->rmap.n; i++) { 2407 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2408 } 2409 } 2410 b = (Mat_MPIAIJ*)B->data; 2411 2412 /* Explicitly create 2 MATSEQAIJ matrices. */ 2413 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2414 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2415 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2416 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2417 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2418 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2419 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2420 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2421 2422 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2423 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2424 2425 PetscFunctionReturn(0); 2426 } 2427 EXTERN_C_END 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2431 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2432 { 2433 Mat mat; 2434 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 *newmat = 0; 2439 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2440 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2441 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2442 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2443 a = (Mat_MPIAIJ*)mat->data; 2444 2445 mat->factor = matin->factor; 2446 mat->rmap.bs = matin->rmap.bs; 2447 mat->assembled = PETSC_TRUE; 2448 mat->insertmode = NOT_SET_VALUES; 2449 mat->preallocated = PETSC_TRUE; 2450 2451 a->size = oldmat->size; 2452 a->rank = oldmat->rank; 2453 a->donotstash = oldmat->donotstash; 2454 a->roworiented = oldmat->roworiented; 2455 a->rowindices = 0; 2456 a->rowvalues = 0; 2457 a->getrowactive = PETSC_FALSE; 2458 2459 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2460 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2461 2462 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2463 if (oldmat->colmap) { 2464 #if defined (PETSC_USE_CTABLE) 2465 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2466 #else 2467 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2468 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2469 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2470 #endif 2471 } else a->colmap = 0; 2472 if (oldmat->garray) { 2473 PetscInt len; 2474 len = oldmat->B->cmap.n; 2475 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2476 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2477 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2478 } else a->garray = 0; 2479 2480 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2481 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2482 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2483 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2484 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2485 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2486 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2487 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2488 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2489 *newmat = mat; 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #include "petscsys.h" 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatLoad_MPIAIJ" 2497 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2498 { 2499 Mat A; 2500 PetscScalar *vals,*svals; 2501 MPI_Comm comm = ((PetscObject)viewer)->comm; 2502 MPI_Status status; 2503 PetscErrorCode ierr; 2504 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2505 PetscInt i,nz,j,rstart,rend,mmax; 2506 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2507 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2508 PetscInt cend,cstart,n,*rowners; 2509 int fd; 2510 2511 PetscFunctionBegin; 2512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2513 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2514 if (!rank) { 2515 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2516 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2517 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2518 } 2519 2520 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2521 M = header[1]; N = header[2]; 2522 /* determine ownership of all rows */ 2523 m = M/size + ((M % size) > rank); 2524 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2525 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2526 2527 /* First process needs enough room for process with most rows */ 2528 if (!rank) { 2529 mmax = rowners[1]; 2530 for (i=2; i<size; i++) { 2531 mmax = PetscMax(mmax,rowners[i]); 2532 } 2533 } else mmax = m; 2534 2535 rowners[0] = 0; 2536 for (i=2; i<=size; i++) { 2537 rowners[i] += rowners[i-1]; 2538 } 2539 rstart = rowners[rank]; 2540 rend = rowners[rank+1]; 2541 2542 /* distribute row lengths to all processors */ 2543 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2544 if (!rank) { 2545 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2546 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2547 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2548 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2549 for (j=0; j<m; j++) { 2550 procsnz[0] += ourlens[j]; 2551 } 2552 for (i=1; i<size; i++) { 2553 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2554 /* calculate the number of nonzeros on each processor */ 2555 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2556 procsnz[i] += rowlengths[j]; 2557 } 2558 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2559 } 2560 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2561 } else { 2562 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2563 } 2564 2565 if (!rank) { 2566 /* determine max buffer needed and allocate it */ 2567 maxnz = 0; 2568 for (i=0; i<size; i++) { 2569 maxnz = PetscMax(maxnz,procsnz[i]); 2570 } 2571 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2572 2573 /* read in my part of the matrix column indices */ 2574 nz = procsnz[0]; 2575 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2576 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2577 2578 /* read in every one elses and ship off */ 2579 for (i=1; i<size; i++) { 2580 nz = procsnz[i]; 2581 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2582 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2583 } 2584 ierr = PetscFree(cols);CHKERRQ(ierr); 2585 } else { 2586 /* determine buffer space needed for message */ 2587 nz = 0; 2588 for (i=0; i<m; i++) { 2589 nz += ourlens[i]; 2590 } 2591 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2592 2593 /* receive message of column indices*/ 2594 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2595 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2596 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2597 } 2598 2599 /* determine column ownership if matrix is not square */ 2600 if (N != M) { 2601 n = N/size + ((N % size) > rank); 2602 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2603 cstart = cend - n; 2604 } else { 2605 cstart = rstart; 2606 cend = rend; 2607 n = cend - cstart; 2608 } 2609 2610 /* loop over local rows, determining number of off diagonal entries */ 2611 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2612 jj = 0; 2613 for (i=0; i<m; i++) { 2614 for (j=0; j<ourlens[i]; j++) { 2615 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2616 jj++; 2617 } 2618 } 2619 2620 /* create our matrix */ 2621 for (i=0; i<m; i++) { 2622 ourlens[i] -= offlens[i]; 2623 } 2624 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2625 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2626 ierr = MatSetType(A,type);CHKERRQ(ierr); 2627 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2628 2629 for (i=0; i<m; i++) { 2630 ourlens[i] += offlens[i]; 2631 } 2632 2633 if (!rank) { 2634 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2635 2636 /* read in my part of the matrix numerical values */ 2637 nz = procsnz[0]; 2638 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2639 2640 /* insert into matrix */ 2641 jj = rstart; 2642 smycols = mycols; 2643 svals = vals; 2644 for (i=0; i<m; i++) { 2645 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2646 smycols += ourlens[i]; 2647 svals += ourlens[i]; 2648 jj++; 2649 } 2650 2651 /* read in other processors and ship out */ 2652 for (i=1; i<size; i++) { 2653 nz = procsnz[i]; 2654 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2655 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2656 } 2657 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2658 } else { 2659 /* receive numeric values */ 2660 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2661 2662 /* receive message of values*/ 2663 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2664 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2665 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2666 2667 /* insert into matrix */ 2668 jj = rstart; 2669 smycols = mycols; 2670 svals = vals; 2671 for (i=0; i<m; i++) { 2672 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2673 smycols += ourlens[i]; 2674 svals += ourlens[i]; 2675 jj++; 2676 } 2677 } 2678 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2679 ierr = PetscFree(vals);CHKERRQ(ierr); 2680 ierr = PetscFree(mycols);CHKERRQ(ierr); 2681 ierr = PetscFree(rowners);CHKERRQ(ierr); 2682 2683 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 *newmat = A; 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2691 /* 2692 Not great since it makes two copies of the submatrix, first an SeqAIJ 2693 in local and then by concatenating the local matrices the end result. 2694 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2695 */ 2696 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2697 { 2698 PetscErrorCode ierr; 2699 PetscMPIInt rank,size; 2700 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2701 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2702 Mat *local,M,Mreuse; 2703 PetscScalar *vwork,*aa; 2704 MPI_Comm comm = ((PetscObject)mat)->comm; 2705 Mat_SeqAIJ *aij; 2706 2707 2708 PetscFunctionBegin; 2709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2710 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2711 2712 if (call == MAT_REUSE_MATRIX) { 2713 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2714 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2715 local = &Mreuse; 2716 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2717 } else { 2718 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2719 Mreuse = *local; 2720 ierr = PetscFree(local);CHKERRQ(ierr); 2721 } 2722 2723 /* 2724 m - number of local rows 2725 n - number of columns (same on all processors) 2726 rstart - first row in new global matrix generated 2727 */ 2728 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2729 if (call == MAT_INITIAL_MATRIX) { 2730 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2731 ii = aij->i; 2732 jj = aij->j; 2733 2734 /* 2735 Determine the number of non-zeros in the diagonal and off-diagonal 2736 portions of the matrix in order to do correct preallocation 2737 */ 2738 2739 /* first get start and end of "diagonal" columns */ 2740 if (csize == PETSC_DECIDE) { 2741 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2742 if (mglobal == n) { /* square matrix */ 2743 nlocal = m; 2744 } else { 2745 nlocal = n/size + ((n % size) > rank); 2746 } 2747 } else { 2748 nlocal = csize; 2749 } 2750 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2751 rstart = rend - nlocal; 2752 if (rank == size - 1 && rend != n) { 2753 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2754 } 2755 2756 /* next, compute all the lengths */ 2757 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2758 olens = dlens + m; 2759 for (i=0; i<m; i++) { 2760 jend = ii[i+1] - ii[i]; 2761 olen = 0; 2762 dlen = 0; 2763 for (j=0; j<jend; j++) { 2764 if (*jj < rstart || *jj >= rend) olen++; 2765 else dlen++; 2766 jj++; 2767 } 2768 olens[i] = olen; 2769 dlens[i] = dlen; 2770 } 2771 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2772 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2773 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2774 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2775 ierr = PetscFree(dlens);CHKERRQ(ierr); 2776 } else { 2777 PetscInt ml,nl; 2778 2779 M = *newmat; 2780 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2781 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2782 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2783 /* 2784 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2785 rather than the slower MatSetValues(). 2786 */ 2787 M->was_assembled = PETSC_TRUE; 2788 M->assembled = PETSC_FALSE; 2789 } 2790 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2791 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2792 ii = aij->i; 2793 jj = aij->j; 2794 aa = aij->a; 2795 for (i=0; i<m; i++) { 2796 row = rstart + i; 2797 nz = ii[i+1] - ii[i]; 2798 cwork = jj; jj += nz; 2799 vwork = aa; aa += nz; 2800 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2801 } 2802 2803 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 *newmat = M; 2806 2807 /* save submatrix used in processor for next request */ 2808 if (call == MAT_INITIAL_MATRIX) { 2809 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2810 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2811 } 2812 2813 PetscFunctionReturn(0); 2814 } 2815 2816 EXTERN_C_BEGIN 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2819 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2820 { 2821 PetscInt m,cstart, cend,j,nnz,i,d; 2822 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2823 const PetscInt *JJ; 2824 PetscScalar *values; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2829 2830 B->rmap.bs = B->cmap.bs = 1; 2831 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2832 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2833 m = B->rmap.n; 2834 cstart = B->cmap.rstart; 2835 cend = B->cmap.rend; 2836 rstart = B->rmap.rstart; 2837 2838 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2839 o_nnz = d_nnz + m; 2840 2841 #if defined(PETSC_USE_DEBUGGING) 2842 for (i=0; i<m; i++) { 2843 nnz = Ii[i+1]- Ii[i]; 2844 JJ = J + Ii[i]; 2845 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2846 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 2847 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 2848 for (j=1; j<nnz; j++) { 2849 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 2850 } 2851 } 2852 #endif 2853 2854 for (i=0; i<m; i++) { 2855 nnz = Ii[i+1]- Ii[i]; 2856 JJ = J + Ii[i]; 2857 nnz_max = PetscMax(nnz_max,nnz); 2858 for (j=0; j<nnz; j++) { 2859 if (*JJ >= cstart) break; 2860 JJ++; 2861 } 2862 d = 0; 2863 for (; j<nnz; j++) { 2864 if (*JJ++ >= cend) break; 2865 d++; 2866 } 2867 d_nnz[i] = d; 2868 o_nnz[i] = nnz - d; 2869 } 2870 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2871 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2872 2873 if (v) values = (PetscScalar*)v; 2874 else { 2875 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2876 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2877 } 2878 2879 for (i=0; i<m; i++) { 2880 ii = i + rstart; 2881 nnz = Ii[i+1]- Ii[i]; 2882 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 2887 if (!v) { 2888 ierr = PetscFree(values);CHKERRQ(ierr); 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 EXTERN_C_END 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2896 /*@ 2897 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2898 (the default parallel PETSc format). 2899 2900 Collective on MPI_Comm 2901 2902 Input Parameters: 2903 + B - the matrix 2904 . i - the indices into j for the start of each local row (starts with zero) 2905 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2906 - v - optional values in the matrix 2907 2908 Level: developer 2909 2910 Notes: 2911 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2912 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2913 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2914 2915 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2916 2917 The format which is used for the sparse matrix input, is equivalent to a 2918 row-major ordering.. i.e for the following matrix, the input data expected is 2919 as shown: 2920 2921 1 0 0 2922 2 0 3 P0 2923 ------- 2924 4 5 6 P1 2925 2926 Process0 [P0]: rows_owned=[0,1] 2927 i = {0,1,3} [size = nrow+1 = 2+1] 2928 j = {0,0,2} [size = nz = 6] 2929 v = {1,2,3} [size = nz = 6] 2930 2931 Process1 [P1]: rows_owned=[2] 2932 i = {0,3} [size = nrow+1 = 1+1] 2933 j = {0,1,2} [size = nz = 6] 2934 v = {4,5,6} [size = nz = 6] 2935 2936 The column indices for each row MUST be sorted. 2937 2938 .keywords: matrix, aij, compressed row, sparse, parallel 2939 2940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2941 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2942 @*/ 2943 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2944 { 2945 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2946 2947 PetscFunctionBegin; 2948 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2949 if (f) { 2950 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2951 } 2952 PetscFunctionReturn(0); 2953 } 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2957 /*@C 2958 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2959 (the default parallel PETSc format). For good matrix assembly performance 2960 the user should preallocate the matrix storage by setting the parameters 2961 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2962 performance can be increased by more than a factor of 50. 2963 2964 Collective on MPI_Comm 2965 2966 Input Parameters: 2967 + A - the matrix 2968 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2969 (same value is used for all local rows) 2970 . d_nnz - array containing the number of nonzeros in the various rows of the 2971 DIAGONAL portion of the local submatrix (possibly different for each row) 2972 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2973 The size of this array is equal to the number of local rows, i.e 'm'. 2974 You must leave room for the diagonal entry even if it is zero. 2975 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2976 submatrix (same value is used for all local rows). 2977 - o_nnz - array containing the number of nonzeros in the various rows of the 2978 OFF-DIAGONAL portion of the local submatrix (possibly different for 2979 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2980 structure. The size of this array is equal to the number 2981 of local rows, i.e 'm'. 2982 2983 If the *_nnz parameter is given then the *_nz parameter is ignored 2984 2985 The AIJ format (also called the Yale sparse matrix format or 2986 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2987 storage. The stored row and column indices begin with zero. See the users manual for details. 2988 2989 The parallel matrix is partitioned such that the first m0 rows belong to 2990 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2991 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2992 2993 The DIAGONAL portion of the local submatrix of a processor can be defined 2994 as the submatrix which is obtained by extraction the part corresponding 2995 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2996 first row that belongs to the processor, and r2 is the last row belonging 2997 to the this processor. This is a square mxm matrix. The remaining portion 2998 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2999 3000 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3001 3002 You can call MatGetInfo() to get information on how effective the preallocation was; 3003 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3004 You can also run with the option -info and look for messages with the string 3005 malloc in them to see if additional memory allocation was needed. 3006 3007 Example usage: 3008 3009 Consider the following 8x8 matrix with 34 non-zero values, that is 3010 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3011 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3012 as follows: 3013 3014 .vb 3015 1 2 0 | 0 3 0 | 0 4 3016 Proc0 0 5 6 | 7 0 0 | 8 0 3017 9 0 10 | 11 0 0 | 12 0 3018 ------------------------------------- 3019 13 0 14 | 15 16 17 | 0 0 3020 Proc1 0 18 0 | 19 20 21 | 0 0 3021 0 0 0 | 22 23 0 | 24 0 3022 ------------------------------------- 3023 Proc2 25 26 27 | 0 0 28 | 29 0 3024 30 0 0 | 31 32 33 | 0 34 3025 .ve 3026 3027 This can be represented as a collection of submatrices as: 3028 3029 .vb 3030 A B C 3031 D E F 3032 G H I 3033 .ve 3034 3035 Where the submatrices A,B,C are owned by proc0, D,E,F are 3036 owned by proc1, G,H,I are owned by proc2. 3037 3038 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3039 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3040 The 'M','N' parameters are 8,8, and have the same values on all procs. 3041 3042 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3043 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3044 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3045 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3046 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3047 matrix, ans [DF] as another SeqAIJ matrix. 3048 3049 When d_nz, o_nz parameters are specified, d_nz storage elements are 3050 allocated for every row of the local diagonal submatrix, and o_nz 3051 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3052 One way to choose d_nz and o_nz is to use the max nonzerors per local 3053 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3054 In this case, the values of d_nz,o_nz are: 3055 .vb 3056 proc0 : dnz = 2, o_nz = 2 3057 proc1 : dnz = 3, o_nz = 2 3058 proc2 : dnz = 1, o_nz = 4 3059 .ve 3060 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3061 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3062 for proc3. i.e we are using 12+15+10=37 storage locations to store 3063 34 values. 3064 3065 When d_nnz, o_nnz parameters are specified, the storage is specified 3066 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3067 In the above case the values for d_nnz,o_nnz are: 3068 .vb 3069 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3070 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3071 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3072 .ve 3073 Here the space allocated is sum of all the above values i.e 34, and 3074 hence pre-allocation is perfect. 3075 3076 Level: intermediate 3077 3078 .keywords: matrix, aij, compressed row, sparse, parallel 3079 3080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3081 MPIAIJ, MatGetInfo() 3082 @*/ 3083 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3084 { 3085 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3086 3087 PetscFunctionBegin; 3088 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3089 if (f) { 3090 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3091 } 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3097 /*@ 3098 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3099 CSR format the local rows. 3100 3101 Collective on MPI_Comm 3102 3103 Input Parameters: 3104 + comm - MPI communicator 3105 . m - number of local rows (Cannot be PETSC_DECIDE) 3106 . n - This value should be the same as the local size used in creating the 3107 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3108 calculated if N is given) For square matrices n is almost always m. 3109 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3110 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3111 . i - row indices 3112 . j - column indices 3113 - a - matrix values 3114 3115 Output Parameter: 3116 . mat - the matrix 3117 3118 Level: intermediate 3119 3120 Notes: 3121 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3122 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3123 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3124 3125 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3126 3127 The format which is used for the sparse matrix input, is equivalent to a 3128 row-major ordering.. i.e for the following matrix, the input data expected is 3129 as shown: 3130 3131 1 0 0 3132 2 0 3 P0 3133 ------- 3134 4 5 6 P1 3135 3136 Process0 [P0]: rows_owned=[0,1] 3137 i = {0,1,3} [size = nrow+1 = 2+1] 3138 j = {0,0,2} [size = nz = 6] 3139 v = {1,2,3} [size = nz = 6] 3140 3141 Process1 [P1]: rows_owned=[2] 3142 i = {0,3} [size = nrow+1 = 1+1] 3143 j = {0,1,2} [size = nz = 6] 3144 v = {4,5,6} [size = nz = 6] 3145 3146 .keywords: matrix, aij, compressed row, sparse, parallel 3147 3148 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3149 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3150 @*/ 3151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3152 { 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 if (i[0]) { 3157 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3158 } 3159 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3160 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3161 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3162 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3163 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatCreateMPIAIJ" 3169 /*@C 3170 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3171 (the default parallel PETSc format). For good matrix assembly performance 3172 the user should preallocate the matrix storage by setting the parameters 3173 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3174 performance can be increased by more than a factor of 50. 3175 3176 Collective on MPI_Comm 3177 3178 Input Parameters: 3179 + comm - MPI communicator 3180 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3181 This value should be the same as the local size used in creating the 3182 y vector for the matrix-vector product y = Ax. 3183 . n - This value should be the same as the local size used in creating the 3184 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3185 calculated if N is given) For square matrices n is almost always m. 3186 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3187 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3188 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3189 (same value is used for all local rows) 3190 . d_nnz - array containing the number of nonzeros in the various rows of the 3191 DIAGONAL portion of the local submatrix (possibly different for each row) 3192 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3193 The size of this array is equal to the number of local rows, i.e 'm'. 3194 You must leave room for the diagonal entry even if it is zero. 3195 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3196 submatrix (same value is used for all local rows). 3197 - o_nnz - array containing the number of nonzeros in the various rows of the 3198 OFF-DIAGONAL portion of the local submatrix (possibly different for 3199 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3200 structure. The size of this array is equal to the number 3201 of local rows, i.e 'm'. 3202 3203 Output Parameter: 3204 . A - the matrix 3205 3206 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3207 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3208 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3209 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3210 3211 Notes: 3212 If the *_nnz parameter is given then the *_nz parameter is ignored 3213 3214 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3215 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3216 storage requirements for this matrix. 3217 3218 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3219 processor than it must be used on all processors that share the object for 3220 that argument. 3221 3222 The user MUST specify either the local or global matrix dimensions 3223 (possibly both). 3224 3225 The parallel matrix is partitioned across processors such that the 3226 first m0 rows belong to process 0, the next m1 rows belong to 3227 process 1, the next m2 rows belong to process 2 etc.. where 3228 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3229 values corresponding to [m x N] submatrix. 3230 3231 The columns are logically partitioned with the n0 columns belonging 3232 to 0th partition, the next n1 columns belonging to the next 3233 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3234 3235 The DIAGONAL portion of the local submatrix on any given processor 3236 is the submatrix corresponding to the rows and columns m,n 3237 corresponding to the given processor. i.e diagonal matrix on 3238 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3239 etc. The remaining portion of the local submatrix [m x (N-n)] 3240 constitute the OFF-DIAGONAL portion. The example below better 3241 illustrates this concept. 3242 3243 For a square global matrix we define each processor's diagonal portion 3244 to be its local rows and the corresponding columns (a square submatrix); 3245 each processor's off-diagonal portion encompasses the remainder of the 3246 local matrix (a rectangular submatrix). 3247 3248 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3249 3250 When calling this routine with a single process communicator, a matrix of 3251 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3252 type of communicator, use the construction mechanism: 3253 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3254 3255 By default, this format uses inodes (identical nodes) when possible. 3256 We search for consecutive rows with the same nonzero structure, thereby 3257 reusing matrix information to achieve increased efficiency. 3258 3259 Options Database Keys: 3260 + -mat_no_inode - Do not use inodes 3261 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3262 - -mat_aij_oneindex - Internally use indexing starting at 1 3263 rather than 0. Note that when calling MatSetValues(), 3264 the user still MUST index entries starting at 0! 3265 3266 3267 Example usage: 3268 3269 Consider the following 8x8 matrix with 34 non-zero values, that is 3270 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3271 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3272 as follows: 3273 3274 .vb 3275 1 2 0 | 0 3 0 | 0 4 3276 Proc0 0 5 6 | 7 0 0 | 8 0 3277 9 0 10 | 11 0 0 | 12 0 3278 ------------------------------------- 3279 13 0 14 | 15 16 17 | 0 0 3280 Proc1 0 18 0 | 19 20 21 | 0 0 3281 0 0 0 | 22 23 0 | 24 0 3282 ------------------------------------- 3283 Proc2 25 26 27 | 0 0 28 | 29 0 3284 30 0 0 | 31 32 33 | 0 34 3285 .ve 3286 3287 This can be represented as a collection of submatrices as: 3288 3289 .vb 3290 A B C 3291 D E F 3292 G H I 3293 .ve 3294 3295 Where the submatrices A,B,C are owned by proc0, D,E,F are 3296 owned by proc1, G,H,I are owned by proc2. 3297 3298 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3299 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3300 The 'M','N' parameters are 8,8, and have the same values on all procs. 3301 3302 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3303 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3304 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3305 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3306 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3307 matrix, ans [DF] as another SeqAIJ matrix. 3308 3309 When d_nz, o_nz parameters are specified, d_nz storage elements are 3310 allocated for every row of the local diagonal submatrix, and o_nz 3311 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3312 One way to choose d_nz and o_nz is to use the max nonzerors per local 3313 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3314 In this case, the values of d_nz,o_nz are: 3315 .vb 3316 proc0 : dnz = 2, o_nz = 2 3317 proc1 : dnz = 3, o_nz = 2 3318 proc2 : dnz = 1, o_nz = 4 3319 .ve 3320 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3321 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3322 for proc3. i.e we are using 12+15+10=37 storage locations to store 3323 34 values. 3324 3325 When d_nnz, o_nnz parameters are specified, the storage is specified 3326 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3327 In the above case the values for d_nnz,o_nnz are: 3328 .vb 3329 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3330 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3331 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3332 .ve 3333 Here the space allocated is sum of all the above values i.e 34, and 3334 hence pre-allocation is perfect. 3335 3336 Level: intermediate 3337 3338 .keywords: matrix, aij, compressed row, sparse, parallel 3339 3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3341 MPIAIJ, MatCreateMPIAIJWithArrays() 3342 @*/ 3343 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3344 { 3345 PetscErrorCode ierr; 3346 PetscMPIInt size; 3347 3348 PetscFunctionBegin; 3349 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3350 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3351 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3352 if (size > 1) { 3353 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3354 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3355 } else { 3356 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3357 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3358 } 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3364 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3365 { 3366 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3367 3368 PetscFunctionBegin; 3369 *Ad = a->A; 3370 *Ao = a->B; 3371 *colmap = a->garray; 3372 PetscFunctionReturn(0); 3373 } 3374 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3377 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3378 { 3379 PetscErrorCode ierr; 3380 PetscInt i; 3381 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3382 3383 PetscFunctionBegin; 3384 if (coloring->ctype == IS_COLORING_GLOBAL) { 3385 ISColoringValue *allcolors,*colors; 3386 ISColoring ocoloring; 3387 3388 /* set coloring for diagonal portion */ 3389 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3390 3391 /* set coloring for off-diagonal portion */ 3392 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3393 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3394 for (i=0; i<a->B->cmap.n; i++) { 3395 colors[i] = allcolors[a->garray[i]]; 3396 } 3397 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3398 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3399 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3400 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3401 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3402 ISColoringValue *colors; 3403 PetscInt *larray; 3404 ISColoring ocoloring; 3405 3406 /* set coloring for diagonal portion */ 3407 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3408 for (i=0; i<a->A->cmap.n; i++) { 3409 larray[i] = i + A->cmap.rstart; 3410 } 3411 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3412 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3413 for (i=0; i<a->A->cmap.n; i++) { 3414 colors[i] = coloring->colors[larray[i]]; 3415 } 3416 ierr = PetscFree(larray);CHKERRQ(ierr); 3417 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3418 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3419 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3420 3421 /* set coloring for off-diagonal portion */ 3422 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3423 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3424 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3425 for (i=0; i<a->B->cmap.n; i++) { 3426 colors[i] = coloring->colors[larray[i]]; 3427 } 3428 ierr = PetscFree(larray);CHKERRQ(ierr); 3429 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3430 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3431 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3432 } else { 3433 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3434 } 3435 3436 PetscFunctionReturn(0); 3437 } 3438 3439 #if defined(PETSC_HAVE_ADIC) 3440 #undef __FUNCT__ 3441 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3442 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3443 { 3444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3445 PetscErrorCode ierr; 3446 3447 PetscFunctionBegin; 3448 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3449 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 #endif 3453 3454 #undef __FUNCT__ 3455 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3456 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3457 { 3458 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3459 PetscErrorCode ierr; 3460 3461 PetscFunctionBegin; 3462 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3463 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 #undef __FUNCT__ 3468 #define __FUNCT__ "MatMerge" 3469 /*@ 3470 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3471 matrices from each processor 3472 3473 Collective on MPI_Comm 3474 3475 Input Parameters: 3476 + comm - the communicators the parallel matrix will live on 3477 . inmat - the input sequential matrices 3478 . n - number of local columns (or PETSC_DECIDE) 3479 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3480 3481 Output Parameter: 3482 . outmat - the parallel matrix generated 3483 3484 Level: advanced 3485 3486 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3487 3488 @*/ 3489 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3490 { 3491 PetscErrorCode ierr; 3492 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3493 PetscInt *indx; 3494 PetscScalar *values; 3495 3496 PetscFunctionBegin; 3497 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3498 if (scall == MAT_INITIAL_MATRIX){ 3499 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3500 if (n == PETSC_DECIDE){ 3501 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3502 } 3503 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3504 rstart -= m; 3505 3506 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3507 for (i=0;i<m;i++) { 3508 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3509 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3510 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3511 } 3512 /* This routine will ONLY return MPIAIJ type matrix */ 3513 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3514 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3515 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3516 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3517 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3518 3519 } else if (scall == MAT_REUSE_MATRIX){ 3520 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3521 } else { 3522 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3523 } 3524 3525 for (i=0;i<m;i++) { 3526 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3527 Ii = i + rstart; 3528 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3529 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3530 } 3531 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3532 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatFileSplit" 3540 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3541 { 3542 PetscErrorCode ierr; 3543 PetscMPIInt rank; 3544 PetscInt m,N,i,rstart,nnz; 3545 size_t len; 3546 const PetscInt *indx; 3547 PetscViewer out; 3548 char *name; 3549 Mat B; 3550 const PetscScalar *values; 3551 3552 PetscFunctionBegin; 3553 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3554 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3555 /* Should this be the type of the diagonal block of A? */ 3556 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3557 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3558 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3559 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3560 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3561 for (i=0;i<m;i++) { 3562 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3563 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3564 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3565 } 3566 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 3569 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3570 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3571 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3572 sprintf(name,"%s.%d",outfile,rank); 3573 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3574 ierr = PetscFree(name); 3575 ierr = MatView(B,out);CHKERRQ(ierr); 3576 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3577 ierr = MatDestroy(B);CHKERRQ(ierr); 3578 PetscFunctionReturn(0); 3579 } 3580 3581 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3582 #undef __FUNCT__ 3583 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3584 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3585 { 3586 PetscErrorCode ierr; 3587 Mat_Merge_SeqsToMPI *merge; 3588 PetscContainer container; 3589 3590 PetscFunctionBegin; 3591 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3592 if (container) { 3593 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3594 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3595 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3596 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3597 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3598 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3599 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3600 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3601 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3602 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3603 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3604 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3605 3606 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3607 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3608 } 3609 ierr = PetscFree(merge);CHKERRQ(ierr); 3610 3611 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614 3615 #include "src/mat/utils/freespace.h" 3616 #include "petscbt.h" 3617 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3620 /*@C 3621 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3622 matrices from each processor 3623 3624 Collective on MPI_Comm 3625 3626 Input Parameters: 3627 + comm - the communicators the parallel matrix will live on 3628 . seqmat - the input sequential matrices 3629 . m - number of local rows (or PETSC_DECIDE) 3630 . n - number of local columns (or PETSC_DECIDE) 3631 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3632 3633 Output Parameter: 3634 . mpimat - the parallel matrix generated 3635 3636 Level: advanced 3637 3638 Notes: 3639 The dimensions of the sequential matrix in each processor MUST be the same. 3640 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3641 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3644 { 3645 PetscErrorCode ierr; 3646 MPI_Comm comm=((PetscObject)mpimat)->comm; 3647 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3648 PetscMPIInt size,rank,taga,*len_s; 3649 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3650 PetscInt proc,m; 3651 PetscInt **buf_ri,**buf_rj; 3652 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3653 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3654 MPI_Request *s_waits,*r_waits; 3655 MPI_Status *status; 3656 MatScalar *aa=a->a,**abuf_r,*ba_i; 3657 Mat_Merge_SeqsToMPI *merge; 3658 PetscContainer container; 3659 3660 PetscFunctionBegin; 3661 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3662 3663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3665 3666 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3667 if (container) { 3668 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3669 } 3670 bi = merge->bi; 3671 bj = merge->bj; 3672 buf_ri = merge->buf_ri; 3673 buf_rj = merge->buf_rj; 3674 3675 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3676 owners = merge->rowmap.range; 3677 len_s = merge->len_s; 3678 3679 /* send and recv matrix values */ 3680 /*-----------------------------*/ 3681 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3682 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3683 3684 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3685 for (proc=0,k=0; proc<size; proc++){ 3686 if (!len_s[proc]) continue; 3687 i = owners[proc]; 3688 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3689 k++; 3690 } 3691 3692 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3693 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3694 ierr = PetscFree(status);CHKERRQ(ierr); 3695 3696 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3697 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3698 3699 /* insert mat values of mpimat */ 3700 /*----------------------------*/ 3701 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3702 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3703 nextrow = buf_ri_k + merge->nrecv; 3704 nextai = nextrow + merge->nrecv; 3705 3706 for (k=0; k<merge->nrecv; k++){ 3707 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3708 nrows = *(buf_ri_k[k]); 3709 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3710 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3711 } 3712 3713 /* set values of ba */ 3714 m = merge->rowmap.n; 3715 for (i=0; i<m; i++) { 3716 arow = owners[rank] + i; 3717 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3718 bnzi = bi[i+1] - bi[i]; 3719 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3720 3721 /* add local non-zero vals of this proc's seqmat into ba */ 3722 anzi = ai[arow+1] - ai[arow]; 3723 aj = a->j + ai[arow]; 3724 aa = a->a + ai[arow]; 3725 nextaj = 0; 3726 for (j=0; nextaj<anzi; j++){ 3727 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3728 ba_i[j] += aa[nextaj++]; 3729 } 3730 } 3731 3732 /* add received vals into ba */ 3733 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3734 /* i-th row */ 3735 if (i == *nextrow[k]) { 3736 anzi = *(nextai[k]+1) - *nextai[k]; 3737 aj = buf_rj[k] + *(nextai[k]); 3738 aa = abuf_r[k] + *(nextai[k]); 3739 nextaj = 0; 3740 for (j=0; nextaj<anzi; j++){ 3741 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3742 ba_i[j] += aa[nextaj++]; 3743 } 3744 } 3745 nextrow[k]++; nextai[k]++; 3746 } 3747 } 3748 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 3753 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3754 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3755 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3756 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3763 { 3764 PetscErrorCode ierr; 3765 Mat B_mpi; 3766 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3767 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3768 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3769 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3770 PetscInt len,proc,*dnz,*onz; 3771 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3772 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3773 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3774 MPI_Status *status; 3775 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3776 PetscBT lnkbt; 3777 Mat_Merge_SeqsToMPI *merge; 3778 PetscContainer container; 3779 3780 PetscFunctionBegin; 3781 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3782 3783 /* make sure it is a PETSc comm */ 3784 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3785 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3786 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3787 3788 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3789 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3790 3791 /* determine row ownership */ 3792 /*---------------------------------------------------------*/ 3793 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3794 merge->rowmap.n = m; 3795 merge->rowmap.N = M; 3796 merge->rowmap.bs = 1; 3797 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3798 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3799 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3800 3801 m = merge->rowmap.n; 3802 M = merge->rowmap.N; 3803 owners = merge->rowmap.range; 3804 3805 /* determine the number of messages to send, their lengths */ 3806 /*---------------------------------------------------------*/ 3807 len_s = merge->len_s; 3808 3809 len = 0; /* length of buf_si[] */ 3810 merge->nsend = 0; 3811 for (proc=0; proc<size; proc++){ 3812 len_si[proc] = 0; 3813 if (proc == rank){ 3814 len_s[proc] = 0; 3815 } else { 3816 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3817 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3818 } 3819 if (len_s[proc]) { 3820 merge->nsend++; 3821 nrows = 0; 3822 for (i=owners[proc]; i<owners[proc+1]; i++){ 3823 if (ai[i+1] > ai[i]) nrows++; 3824 } 3825 len_si[proc] = 2*(nrows+1); 3826 len += len_si[proc]; 3827 } 3828 } 3829 3830 /* determine the number and length of messages to receive for ij-structure */ 3831 /*-------------------------------------------------------------------------*/ 3832 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3833 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3834 3835 /* post the Irecv of j-structure */ 3836 /*-------------------------------*/ 3837 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3838 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3839 3840 /* post the Isend of j-structure */ 3841 /*--------------------------------*/ 3842 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3843 sj_waits = si_waits + merge->nsend; 3844 3845 for (proc=0, k=0; proc<size; proc++){ 3846 if (!len_s[proc]) continue; 3847 i = owners[proc]; 3848 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3849 k++; 3850 } 3851 3852 /* receives and sends of j-structure are complete */ 3853 /*------------------------------------------------*/ 3854 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3855 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3856 3857 /* send and recv i-structure */ 3858 /*---------------------------*/ 3859 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3860 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3861 3862 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3863 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3864 for (proc=0,k=0; proc<size; proc++){ 3865 if (!len_s[proc]) continue; 3866 /* form outgoing message for i-structure: 3867 buf_si[0]: nrows to be sent 3868 [1:nrows]: row index (global) 3869 [nrows+1:2*nrows+1]: i-structure index 3870 */ 3871 /*-------------------------------------------*/ 3872 nrows = len_si[proc]/2 - 1; 3873 buf_si_i = buf_si + nrows+1; 3874 buf_si[0] = nrows; 3875 buf_si_i[0] = 0; 3876 nrows = 0; 3877 for (i=owners[proc]; i<owners[proc+1]; i++){ 3878 anzi = ai[i+1] - ai[i]; 3879 if (anzi) { 3880 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3881 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3882 nrows++; 3883 } 3884 } 3885 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3886 k++; 3887 buf_si += len_si[proc]; 3888 } 3889 3890 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3891 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3892 3893 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3894 for (i=0; i<merge->nrecv; i++){ 3895 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3896 } 3897 3898 ierr = PetscFree(len_si);CHKERRQ(ierr); 3899 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3900 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3901 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3902 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3903 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3904 ierr = PetscFree(status);CHKERRQ(ierr); 3905 3906 /* compute a local seq matrix in each processor */ 3907 /*----------------------------------------------*/ 3908 /* allocate bi array and free space for accumulating nonzero column info */ 3909 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3910 bi[0] = 0; 3911 3912 /* create and initialize a linked list */ 3913 nlnk = N+1; 3914 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3915 3916 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3917 len = 0; 3918 len = ai[owners[rank+1]] - ai[owners[rank]]; 3919 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3920 current_space = free_space; 3921 3922 /* determine symbolic info for each local row */ 3923 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3924 nextrow = buf_ri_k + merge->nrecv; 3925 nextai = nextrow + merge->nrecv; 3926 for (k=0; k<merge->nrecv; k++){ 3927 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3928 nrows = *buf_ri_k[k]; 3929 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3930 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3931 } 3932 3933 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3934 len = 0; 3935 for (i=0;i<m;i++) { 3936 bnzi = 0; 3937 /* add local non-zero cols of this proc's seqmat into lnk */ 3938 arow = owners[rank] + i; 3939 anzi = ai[arow+1] - ai[arow]; 3940 aj = a->j + ai[arow]; 3941 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3942 bnzi += nlnk; 3943 /* add received col data into lnk */ 3944 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3945 if (i == *nextrow[k]) { /* i-th row */ 3946 anzi = *(nextai[k]+1) - *nextai[k]; 3947 aj = buf_rj[k] + *nextai[k]; 3948 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3949 bnzi += nlnk; 3950 nextrow[k]++; nextai[k]++; 3951 } 3952 } 3953 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3954 3955 /* if free space is not available, make more free space */ 3956 if (current_space->local_remaining<bnzi) { 3957 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3958 nspacedouble++; 3959 } 3960 /* copy data into free space, then initialize lnk */ 3961 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3962 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3963 3964 current_space->array += bnzi; 3965 current_space->local_used += bnzi; 3966 current_space->local_remaining -= bnzi; 3967 3968 bi[i+1] = bi[i] + bnzi; 3969 } 3970 3971 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3972 3973 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3974 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3975 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3976 3977 /* create symbolic parallel matrix B_mpi */ 3978 /*---------------------------------------*/ 3979 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3980 if (n==PETSC_DECIDE) { 3981 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3982 } else { 3983 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3984 } 3985 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3986 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3987 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3988 3989 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3990 B_mpi->assembled = PETSC_FALSE; 3991 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3992 merge->bi = bi; 3993 merge->bj = bj; 3994 merge->buf_ri = buf_ri; 3995 merge->buf_rj = buf_rj; 3996 merge->coi = PETSC_NULL; 3997 merge->coj = PETSC_NULL; 3998 merge->owners_co = PETSC_NULL; 3999 4000 /* attach the supporting struct to B_mpi for reuse */ 4001 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4002 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4003 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4004 *mpimat = B_mpi; 4005 4006 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4007 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4008 PetscFunctionReturn(0); 4009 } 4010 4011 #undef __FUNCT__ 4012 #define __FUNCT__ "MatMerge_SeqsToMPI" 4013 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4014 { 4015 PetscErrorCode ierr; 4016 4017 PetscFunctionBegin; 4018 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4019 if (scall == MAT_INITIAL_MATRIX){ 4020 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4021 } 4022 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4023 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4024 PetscFunctionReturn(0); 4025 } 4026 4027 #undef __FUNCT__ 4028 #define __FUNCT__ "MatGetLocalMat" 4029 /*@ 4030 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4031 4032 Not Collective 4033 4034 Input Parameters: 4035 + A - the matrix 4036 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4037 4038 Output Parameter: 4039 . A_loc - the local sequential matrix generated 4040 4041 Level: developer 4042 4043 @*/ 4044 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4045 { 4046 PetscErrorCode ierr; 4047 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4048 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4049 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4050 PetscScalar *aa=a->a,*ba=b->a,*ca; 4051 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4052 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4053 4054 PetscFunctionBegin; 4055 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4056 if (scall == MAT_INITIAL_MATRIX){ 4057 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4058 ci[0] = 0; 4059 for (i=0; i<am; i++){ 4060 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4061 } 4062 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4063 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4064 k = 0; 4065 for (i=0; i<am; i++) { 4066 ncols_o = bi[i+1] - bi[i]; 4067 ncols_d = ai[i+1] - ai[i]; 4068 /* off-diagonal portion of A */ 4069 for (jo=0; jo<ncols_o; jo++) { 4070 col = cmap[*bj]; 4071 if (col >= cstart) break; 4072 cj[k] = col; bj++; 4073 ca[k++] = *ba++; 4074 } 4075 /* diagonal portion of A */ 4076 for (j=0; j<ncols_d; j++) { 4077 cj[k] = cstart + *aj++; 4078 ca[k++] = *aa++; 4079 } 4080 /* off-diagonal portion of A */ 4081 for (j=jo; j<ncols_o; j++) { 4082 cj[k] = cmap[*bj++]; 4083 ca[k++] = *ba++; 4084 } 4085 } 4086 /* put together the new matrix */ 4087 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4088 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4089 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4090 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4091 mat->free_a = PETSC_TRUE; 4092 mat->free_ij = PETSC_TRUE; 4093 mat->nonew = 0; 4094 } else if (scall == MAT_REUSE_MATRIX){ 4095 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4096 ci = mat->i; cj = mat->j; ca = mat->a; 4097 for (i=0; i<am; i++) { 4098 /* off-diagonal portion of A */ 4099 ncols_o = bi[i+1] - bi[i]; 4100 for (jo=0; jo<ncols_o; jo++) { 4101 col = cmap[*bj]; 4102 if (col >= cstart) break; 4103 *ca++ = *ba++; bj++; 4104 } 4105 /* diagonal portion of A */ 4106 ncols_d = ai[i+1] - ai[i]; 4107 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4108 /* off-diagonal portion of A */ 4109 for (j=jo; j<ncols_o; j++) { 4110 *ca++ = *ba++; bj++; 4111 } 4112 } 4113 } else { 4114 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4115 } 4116 4117 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120 4121 #undef __FUNCT__ 4122 #define __FUNCT__ "MatGetLocalMatCondensed" 4123 /*@C 4124 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4125 4126 Not Collective 4127 4128 Input Parameters: 4129 + A - the matrix 4130 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4131 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4132 4133 Output Parameter: 4134 . A_loc - the local sequential matrix generated 4135 4136 Level: developer 4137 4138 @*/ 4139 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4140 { 4141 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4142 PetscErrorCode ierr; 4143 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4144 IS isrowa,iscola; 4145 Mat *aloc; 4146 4147 PetscFunctionBegin; 4148 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4149 if (!row){ 4150 start = A->rmap.rstart; end = A->rmap.rend; 4151 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4152 } else { 4153 isrowa = *row; 4154 } 4155 if (!col){ 4156 start = A->cmap.rstart; 4157 cmap = a->garray; 4158 nzA = a->A->cmap.n; 4159 nzB = a->B->cmap.n; 4160 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4161 ncols = 0; 4162 for (i=0; i<nzB; i++) { 4163 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4164 else break; 4165 } 4166 imark = i; 4167 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4168 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4169 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4170 ierr = PetscFree(idx);CHKERRQ(ierr); 4171 } else { 4172 iscola = *col; 4173 } 4174 if (scall != MAT_INITIAL_MATRIX){ 4175 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4176 aloc[0] = *A_loc; 4177 } 4178 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4179 *A_loc = aloc[0]; 4180 ierr = PetscFree(aloc);CHKERRQ(ierr); 4181 if (!row){ 4182 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4183 } 4184 if (!col){ 4185 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4186 } 4187 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4188 PetscFunctionReturn(0); 4189 } 4190 4191 #undef __FUNCT__ 4192 #define __FUNCT__ "MatGetBrowsOfAcols" 4193 /*@C 4194 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4195 4196 Collective on Mat 4197 4198 Input Parameters: 4199 + A,B - the matrices in mpiaij format 4200 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4201 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4202 4203 Output Parameter: 4204 + rowb, colb - index sets of rows and columns of B to extract 4205 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4206 - B_seq - the sequential matrix generated 4207 4208 Level: developer 4209 4210 @*/ 4211 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4212 { 4213 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4214 PetscErrorCode ierr; 4215 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4216 IS isrowb,iscolb; 4217 Mat *bseq; 4218 4219 PetscFunctionBegin; 4220 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4221 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4222 } 4223 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4224 4225 if (scall == MAT_INITIAL_MATRIX){ 4226 start = A->cmap.rstart; 4227 cmap = a->garray; 4228 nzA = a->A->cmap.n; 4229 nzB = a->B->cmap.n; 4230 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4231 ncols = 0; 4232 for (i=0; i<nzB; i++) { /* row < local row index */ 4233 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4234 else break; 4235 } 4236 imark = i; 4237 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4238 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4239 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4240 ierr = PetscFree(idx);CHKERRQ(ierr); 4241 *brstart = imark; 4242 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4243 } else { 4244 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4245 isrowb = *rowb; iscolb = *colb; 4246 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4247 bseq[0] = *B_seq; 4248 } 4249 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4250 *B_seq = bseq[0]; 4251 ierr = PetscFree(bseq);CHKERRQ(ierr); 4252 if (!rowb){ 4253 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4254 } else { 4255 *rowb = isrowb; 4256 } 4257 if (!colb){ 4258 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4259 } else { 4260 *colb = iscolb; 4261 } 4262 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4263 PetscFunctionReturn(0); 4264 } 4265 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatGetBrowsOfAoCols" 4268 /*@C 4269 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4270 of the OFF-DIAGONAL portion of local A 4271 4272 Collective on Mat 4273 4274 Input Parameters: 4275 + A,B - the matrices in mpiaij format 4276 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4277 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4278 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4279 4280 Output Parameter: 4281 + B_oth - the sequential matrix generated 4282 4283 Level: developer 4284 4285 @*/ 4286 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4287 { 4288 VecScatter_MPI_General *gen_to,*gen_from; 4289 PetscErrorCode ierr; 4290 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4291 Mat_SeqAIJ *b_oth; 4292 VecScatter ctx=a->Mvctx; 4293 MPI_Comm comm=((PetscObject)ctx)->comm; 4294 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4295 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4296 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4297 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4298 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4299 MPI_Status *sstatus,rstatus; 4300 PetscMPIInt jj; 4301 PetscInt *cols,sbs,rbs; 4302 PetscScalar *vals; 4303 4304 PetscFunctionBegin; 4305 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4306 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4307 } 4308 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4309 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4310 4311 gen_to = (VecScatter_MPI_General*)ctx->todata; 4312 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4313 rvalues = gen_from->values; /* holds the length of receiving row */ 4314 svalues = gen_to->values; /* holds the length of sending row */ 4315 nrecvs = gen_from->n; 4316 nsends = gen_to->n; 4317 4318 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4319 srow = gen_to->indices; /* local row index to be sent */ 4320 sstarts = gen_to->starts; 4321 sprocs = gen_to->procs; 4322 sstatus = gen_to->sstatus; 4323 sbs = gen_to->bs; 4324 rstarts = gen_from->starts; 4325 rprocs = gen_from->procs; 4326 rbs = gen_from->bs; 4327 4328 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4329 if (scall == MAT_INITIAL_MATRIX){ 4330 /* i-array */ 4331 /*---------*/ 4332 /* post receives */ 4333 for (i=0; i<nrecvs; i++){ 4334 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4335 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4336 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4337 } 4338 4339 /* pack the outgoing message */ 4340 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4341 rstartsj = sstartsj + nsends +1; 4342 sstartsj[0] = 0; rstartsj[0] = 0; 4343 len = 0; /* total length of j or a array to be sent */ 4344 k = 0; 4345 for (i=0; i<nsends; i++){ 4346 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4348 for (j=0; j<nrows; j++) { 4349 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4350 for (l=0; l<sbs; l++){ 4351 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4352 rowlen[j*sbs+l] = ncols; 4353 len += ncols; 4354 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4355 } 4356 k++; 4357 } 4358 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4359 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4360 } 4361 /* recvs and sends of i-array are completed */ 4362 i = nrecvs; 4363 while (i--) { 4364 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4365 } 4366 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4367 4368 /* allocate buffers for sending j and a arrays */ 4369 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4370 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4371 4372 /* create i-array of B_oth */ 4373 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4374 b_othi[0] = 0; 4375 len = 0; /* total length of j or a array to be received */ 4376 k = 0; 4377 for (i=0; i<nrecvs; i++){ 4378 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4379 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4380 for (j=0; j<nrows; j++) { 4381 b_othi[k+1] = b_othi[k] + rowlen[j]; 4382 len += rowlen[j]; k++; 4383 } 4384 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4385 } 4386 4387 /* allocate space for j and a arrrays of B_oth */ 4388 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4389 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4390 4391 /* j-array */ 4392 /*---------*/ 4393 /* post receives of j-array */ 4394 for (i=0; i<nrecvs; i++){ 4395 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4396 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4397 } 4398 4399 /* pack the outgoing message j-array */ 4400 k = 0; 4401 for (i=0; i<nsends; i++){ 4402 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4403 bufJ = bufj+sstartsj[i]; 4404 for (j=0; j<nrows; j++) { 4405 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4406 for (ll=0; ll<sbs; ll++){ 4407 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4408 for (l=0; l<ncols; l++){ 4409 *bufJ++ = cols[l]; 4410 } 4411 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4412 } 4413 } 4414 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4415 } 4416 4417 /* recvs and sends of j-array are completed */ 4418 i = nrecvs; 4419 while (i--) { 4420 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4421 } 4422 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4423 } else if (scall == MAT_REUSE_MATRIX){ 4424 sstartsj = *startsj; 4425 rstartsj = sstartsj + nsends +1; 4426 bufa = *bufa_ptr; 4427 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4428 b_otha = b_oth->a; 4429 } else { 4430 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4431 } 4432 4433 /* a-array */ 4434 /*---------*/ 4435 /* post receives of a-array */ 4436 for (i=0; i<nrecvs; i++){ 4437 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4438 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4439 } 4440 4441 /* pack the outgoing message a-array */ 4442 k = 0; 4443 for (i=0; i<nsends; i++){ 4444 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4445 bufA = bufa+sstartsj[i]; 4446 for (j=0; j<nrows; j++) { 4447 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4448 for (ll=0; ll<sbs; ll++){ 4449 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4450 for (l=0; l<ncols; l++){ 4451 *bufA++ = vals[l]; 4452 } 4453 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4454 } 4455 } 4456 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4457 } 4458 /* recvs and sends of a-array are completed */ 4459 i = nrecvs; 4460 while (i--) { 4461 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4462 } 4463 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4464 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4465 4466 if (scall == MAT_INITIAL_MATRIX){ 4467 /* put together the new matrix */ 4468 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4469 4470 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4471 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4472 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4473 b_oth->free_a = PETSC_TRUE; 4474 b_oth->free_ij = PETSC_TRUE; 4475 b_oth->nonew = 0; 4476 4477 ierr = PetscFree(bufj);CHKERRQ(ierr); 4478 if (!startsj || !bufa_ptr){ 4479 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4480 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4481 } else { 4482 *startsj = sstartsj; 4483 *bufa_ptr = bufa; 4484 } 4485 } 4486 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4487 PetscFunctionReturn(0); 4488 } 4489 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "MatGetCommunicationStructs" 4492 /*@C 4493 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4494 4495 Not Collective 4496 4497 Input Parameters: 4498 . A - The matrix in mpiaij format 4499 4500 Output Parameter: 4501 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4502 . colmap - A map from global column index to local index into lvec 4503 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4504 4505 Level: developer 4506 4507 @*/ 4508 #if defined (PETSC_USE_CTABLE) 4509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4510 #else 4511 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4512 #endif 4513 { 4514 Mat_MPIAIJ *a; 4515 4516 PetscFunctionBegin; 4517 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4518 PetscValidPointer(lvec, 2) 4519 PetscValidPointer(colmap, 3) 4520 PetscValidPointer(multScatter, 4) 4521 a = (Mat_MPIAIJ *) A->data; 4522 if (lvec) *lvec = a->lvec; 4523 if (colmap) *colmap = a->colmap; 4524 if (multScatter) *multScatter = a->Mvctx; 4525 PetscFunctionReturn(0); 4526 } 4527 4528 EXTERN_C_BEGIN 4529 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4530 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4531 EXTERN_C_END 4532 4533 /*MC 4534 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4535 4536 Options Database Keys: 4537 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4538 4539 Level: beginner 4540 4541 .seealso: MatCreateMPIAIJ() 4542 M*/ 4543 4544 EXTERN_C_BEGIN 4545 #undef __FUNCT__ 4546 #define __FUNCT__ "MatCreate_MPIAIJ" 4547 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4548 { 4549 Mat_MPIAIJ *b; 4550 PetscErrorCode ierr; 4551 PetscMPIInt size; 4552 4553 PetscFunctionBegin; 4554 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4555 4556 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4557 B->data = (void*)b; 4558 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4559 B->factor = 0; 4560 B->rmap.bs = 1; 4561 B->assembled = PETSC_FALSE; 4562 B->mapping = 0; 4563 4564 B->insertmode = NOT_SET_VALUES; 4565 b->size = size; 4566 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4567 4568 /* build cache for off array entries formed */ 4569 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4570 b->donotstash = PETSC_FALSE; 4571 b->colmap = 0; 4572 b->garray = 0; 4573 b->roworiented = PETSC_TRUE; 4574 4575 /* stuff used for matrix vector multiply */ 4576 b->lvec = PETSC_NULL; 4577 b->Mvctx = PETSC_NULL; 4578 4579 /* stuff for MatGetRow() */ 4580 b->rowindices = 0; 4581 b->rowvalues = 0; 4582 b->getrowactive = PETSC_FALSE; 4583 4584 4585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4586 "MatStoreValues_MPIAIJ", 4587 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4589 "MatRetrieveValues_MPIAIJ", 4590 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4592 "MatGetDiagonalBlock_MPIAIJ", 4593 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4595 "MatIsTranspose_MPIAIJ", 4596 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4598 "MatMPIAIJSetPreallocation_MPIAIJ", 4599 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4601 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4602 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4604 "MatDiagonalScaleLocal_MPIAIJ", 4605 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4607 "MatConvert_MPIAIJ_MPICSRPERM", 4608 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4610 "MatConvert_MPIAIJ_MPICRL", 4611 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4612 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4613 PetscFunctionReturn(0); 4614 } 4615 EXTERN_C_END 4616 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4619 /*@ 4620 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4621 and "off-diagonal" part of the matrix in CSR format. 4622 4623 Collective on MPI_Comm 4624 4625 Input Parameters: 4626 + comm - MPI communicator 4627 . m - number of local rows (Cannot be PETSC_DECIDE) 4628 . n - This value should be the same as the local size used in creating the 4629 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4630 calculated if N is given) For square matrices n is almost always m. 4631 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4632 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4633 . i - row indices for "diagonal" portion of matrix 4634 . j - column indices 4635 . a - matrix values 4636 . oi - row indices for "off-diagonal" portion of matrix 4637 . oj - column indices 4638 - oa - matrix values 4639 4640 Output Parameter: 4641 . mat - the matrix 4642 4643 Level: advanced 4644 4645 Notes: 4646 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4647 4648 The i and j indices are 0 based 4649 4650 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4651 4652 4653 .keywords: matrix, aij, compressed row, sparse, parallel 4654 4655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4656 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4657 @*/ 4658 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4659 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4660 { 4661 PetscErrorCode ierr; 4662 Mat_MPIAIJ *maij; 4663 4664 PetscFunctionBegin; 4665 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4666 if (i[0]) { 4667 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4668 } 4669 if (oi[0]) { 4670 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4671 } 4672 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4673 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4674 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4675 maij = (Mat_MPIAIJ*) (*mat)->data; 4676 maij->donotstash = PETSC_TRUE; 4677 (*mat)->preallocated = PETSC_TRUE; 4678 4679 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4680 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4681 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4682 4683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4685 4686 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4688 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4689 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4690 4691 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4692 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4693 PetscFunctionReturn(0); 4694 } 4695 4696 /* 4697 Special version for direct calls from Fortran 4698 */ 4699 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4703 #endif 4704 4705 /* Change these macros so can be used in void function */ 4706 #undef CHKERRQ 4707 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4708 #undef SETERRQ2 4709 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4710 #undef SETERRQ 4711 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4712 4713 EXTERN_C_BEGIN 4714 #undef __FUNCT__ 4715 #define __FUNCT__ "matsetvaluesmpiaij_" 4716 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4717 { 4718 Mat mat = *mmat; 4719 PetscInt m = *mm, n = *mn; 4720 InsertMode addv = *maddv; 4721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4722 PetscScalar value; 4723 PetscErrorCode ierr; 4724 4725 MatPreallocated(mat); 4726 if (mat->insertmode == NOT_SET_VALUES) { 4727 mat->insertmode = addv; 4728 } 4729 #if defined(PETSC_USE_DEBUG) 4730 else if (mat->insertmode != addv) { 4731 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4732 } 4733 #endif 4734 { 4735 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4736 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4737 PetscTruth roworiented = aij->roworiented; 4738 4739 /* Some Variables required in the macro */ 4740 Mat A = aij->A; 4741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4742 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4743 PetscScalar *aa = a->a; 4744 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4745 Mat B = aij->B; 4746 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4747 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4748 PetscScalar *ba = b->a; 4749 4750 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4751 PetscInt nonew = a->nonew; 4752 PetscScalar *ap1,*ap2; 4753 4754 PetscFunctionBegin; 4755 for (i=0; i<m; i++) { 4756 if (im[i] < 0) continue; 4757 #if defined(PETSC_USE_DEBUG) 4758 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4759 #endif 4760 if (im[i] >= rstart && im[i] < rend) { 4761 row = im[i] - rstart; 4762 lastcol1 = -1; 4763 rp1 = aj + ai[row]; 4764 ap1 = aa + ai[row]; 4765 rmax1 = aimax[row]; 4766 nrow1 = ailen[row]; 4767 low1 = 0; 4768 high1 = nrow1; 4769 lastcol2 = -1; 4770 rp2 = bj + bi[row]; 4771 ap2 = ba + bi[row]; 4772 rmax2 = bimax[row]; 4773 nrow2 = bilen[row]; 4774 low2 = 0; 4775 high2 = nrow2; 4776 4777 for (j=0; j<n; j++) { 4778 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4779 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4780 if (in[j] >= cstart && in[j] < cend){ 4781 col = in[j] - cstart; 4782 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4783 } else if (in[j] < 0) continue; 4784 #if defined(PETSC_USE_DEBUG) 4785 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4786 #endif 4787 else { 4788 if (mat->was_assembled) { 4789 if (!aij->colmap) { 4790 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4791 } 4792 #if defined (PETSC_USE_CTABLE) 4793 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4794 col--; 4795 #else 4796 col = aij->colmap[in[j]] - 1; 4797 #endif 4798 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4799 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4800 col = in[j]; 4801 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4802 B = aij->B; 4803 b = (Mat_SeqAIJ*)B->data; 4804 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4805 rp2 = bj + bi[row]; 4806 ap2 = ba + bi[row]; 4807 rmax2 = bimax[row]; 4808 nrow2 = bilen[row]; 4809 low2 = 0; 4810 high2 = nrow2; 4811 bm = aij->B->rmap.n; 4812 ba = b->a; 4813 } 4814 } else col = in[j]; 4815 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4816 } 4817 } 4818 } else { 4819 if (!aij->donotstash) { 4820 if (roworiented) { 4821 if (ignorezeroentries && v[i*n] == 0.0) continue; 4822 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4823 } else { 4824 if (ignorezeroentries && v[i] == 0.0) continue; 4825 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4826 } 4827 } 4828 } 4829 }} 4830 PetscFunctionReturnVoid(); 4831 } 4832 EXTERN_C_END 4833 4834