1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 56 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 90 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 MatScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 MatScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 MatScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col; 319 PetscTruth other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = ((PetscObject)A)->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 728 ierr = PetscFree(aij);CHKERRQ(ierr); 729 730 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIAIJ_Binary" 743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 747 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 748 PetscErrorCode ierr; 749 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 750 int fd; 751 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 752 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (!rank) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->rmap.N; 762 header[2] = mat->cmap.N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->rmap.n; 768 range = mat->rmap.range; 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 774 rlen = mat->rmap.n; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->rmap.n; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (!rank) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->rmap.n; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (!rank) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->rmap.n; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (!rank) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscMPIInt rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,iascii,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 PetscTruth inodes; 887 888 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 889 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 890 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 891 if (!inodes) { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 893 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 896 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } 898 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 903 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 904 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } else if (format == PETSC_VIEWER_ASCII_INFO) { 907 PetscInt inodecount,inodelimit,*inodes; 908 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 909 if (inodes) { 910 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 911 } else { 912 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(0); 917 } 918 } else if (isbinary) { 919 if (size == 1) { 920 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 921 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 922 } else { 923 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } else if (isdraw) { 927 PetscDraw draw; 928 PetscTruth isnull; 929 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 930 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 931 } 932 933 if (size == 1) { 934 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 935 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 936 } else { 937 /* assemble the entire matrix onto first processor. */ 938 Mat A; 939 Mat_SeqAIJ *Aloc; 940 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 941 PetscScalar *a; 942 943 if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead"); 944 945 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = mat->rmap.rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = mat->rmap.rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatRelax_MPIAIJ" 1019 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1022 PetscErrorCode ierr; 1023 Vec bb1; 1024 1025 PetscFunctionBegin; 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1076 } 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1079 } 1080 1081 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatPermute_MPIAIJ" 1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1088 { 1089 MPI_Comm comm,pcomm; 1090 PetscInt first,local_size,nrows,*rows; 1091 int ntids; 1092 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1097 /* make a collective version of 'rowp' */ 1098 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1099 if (pcomm==comm) { 1100 crowp = rowp; 1101 } else { 1102 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1103 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1105 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1106 } 1107 /* collect the global row permutation and invert it */ 1108 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1109 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1110 if (pcomm!=comm) { 1111 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1112 } 1113 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1114 /* get the local target indices */ 1115 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1121 /* the column permutation is so much easier; 1122 make a local version of 'colp' and invert it */ 1123 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1125 if (ntids==1) { 1126 lcolp = colp; 1127 } else { 1128 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1129 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1131 } 1132 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1133 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1134 if (ntids>1) { 1135 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1137 } 1138 /* now we just get the submatrix */ 1139 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1140 /* clean up */ 1141 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1142 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1151 Mat A = mat->A,B = mat->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = 1.0; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 info->rows_global = (double)matin->rmap.N; 1188 info->columns_global = (double)matin->cmap.N; 1189 info->rows_local = (double)matin->rmap.n; 1190 info->columns_local = (double)matin->cmap.N; 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSetOption_MPIAIJ" 1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NEW_NONZERO_LOCATIONS: 1205 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1206 case MAT_KEEP_ZEROED_ROWS: 1207 case MAT_NEW_NONZERO_LOCATION_ERR: 1208 case MAT_USE_INODES: 1209 case MAT_IGNORE_ZERO_ENTRIES: 1210 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1211 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1212 break; 1213 case MAT_ROW_ORIENTED: 1214 a->roworiented = flg; 1215 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1216 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1217 break; 1218 case MAT_NEW_DIAGONALS: 1219 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1220 break; 1221 case MAT_IGNORE_OFF_PROC_ENTRIES: 1222 a->donotstash = PETSC_TRUE; 1223 break; 1224 case MAT_SYMMETRIC: 1225 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1226 break; 1227 case MAT_STRUCTURALLY_SYMMETRIC: 1228 case MAT_HERMITIAN: 1229 case MAT_SYMMETRY_ETERNAL: 1230 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1231 break; 1232 default: 1233 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatGetRow_MPIAIJ" 1240 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1241 { 1242 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1243 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1244 PetscErrorCode ierr; 1245 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1246 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1247 PetscInt *cmap,*idx_p; 1248 1249 PetscFunctionBegin; 1250 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1251 mat->getrowactive = PETSC_TRUE; 1252 1253 if (!mat->rowvalues && (idx || v)) { 1254 /* 1255 allocate enough space to hold information from the longest row. 1256 */ 1257 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1258 PetscInt max = 1,tmp; 1259 for (i=0; i<matin->rmap.n; i++) { 1260 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1261 if (max < tmp) { max = tmp; } 1262 } 1263 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1264 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1265 } 1266 1267 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1268 lrow = row - rstart; 1269 1270 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1271 if (!v) {pvA = 0; pvB = 0;} 1272 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1273 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1274 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1275 nztot = nzA + nzB; 1276 1277 cmap = mat->garray; 1278 if (v || idx) { 1279 if (nztot) { 1280 /* Sort by increasing column numbers, assuming A and B already sorted */ 1281 PetscInt imark = -1; 1282 if (v) { 1283 *v = v_p = mat->rowvalues; 1284 for (i=0; i<nzB; i++) { 1285 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1286 else break; 1287 } 1288 imark = i; 1289 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1290 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1291 } 1292 if (idx) { 1293 *idx = idx_p = mat->rowindices; 1294 if (imark > -1) { 1295 for (i=0; i<imark; i++) { 1296 idx_p[i] = cmap[cworkB[i]]; 1297 } 1298 } else { 1299 for (i=0; i<nzB; i++) { 1300 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1301 else break; 1302 } 1303 imark = i; 1304 } 1305 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1306 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1307 } 1308 } else { 1309 if (idx) *idx = 0; 1310 if (v) *v = 0; 1311 } 1312 } 1313 *nz = nztot; 1314 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1315 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1321 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1324 1325 PetscFunctionBegin; 1326 if (!aij->getrowactive) { 1327 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1328 } 1329 aij->getrowactive = PETSC_FALSE; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatNorm_MPIAIJ" 1335 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1336 { 1337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1338 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1339 PetscErrorCode ierr; 1340 PetscInt i,j,cstart = mat->cmap.rstart; 1341 PetscReal sum = 0.0; 1342 MatScalar *v; 1343 1344 PetscFunctionBegin; 1345 if (aij->size == 1) { 1346 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1347 } else { 1348 if (type == NORM_FROBENIUS) { 1349 v = amat->a; 1350 for (i=0; i<amat->nz; i++) { 1351 #if defined(PETSC_USE_COMPLEX) 1352 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1353 #else 1354 sum += (*v)*(*v); v++; 1355 #endif 1356 } 1357 v = bmat->a; 1358 for (i=0; i<bmat->nz; i++) { 1359 #if defined(PETSC_USE_COMPLEX) 1360 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1361 #else 1362 sum += (*v)*(*v); v++; 1363 #endif 1364 } 1365 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1366 *norm = sqrt(*norm); 1367 } else if (type == NORM_1) { /* max column norm */ 1368 PetscReal *tmp,*tmp2; 1369 PetscInt *jj,*garray = aij->garray; 1370 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1371 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1372 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1373 *norm = 0.0; 1374 v = amat->a; jj = amat->j; 1375 for (j=0; j<amat->nz; j++) { 1376 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1377 } 1378 v = bmat->a; jj = bmat->j; 1379 for (j=0; j<bmat->nz; j++) { 1380 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1381 } 1382 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 for (j=0; j<mat->cmap.N; j++) { 1384 if (tmp2[j] > *norm) *norm = tmp2[j]; 1385 } 1386 ierr = PetscFree(tmp);CHKERRQ(ierr); 1387 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1388 } else if (type == NORM_INFINITY) { /* max row norm */ 1389 PetscReal ntemp = 0.0; 1390 for (j=0; j<aij->A->rmap.n; j++) { 1391 v = amat->a + amat->i[j]; 1392 sum = 0.0; 1393 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1394 sum += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a + bmat->i[j]; 1397 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1398 sum += PetscAbsScalar(*v); v++; 1399 } 1400 if (sum > ntemp) ntemp = sum; 1401 } 1402 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1403 } else { 1404 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1405 } 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatTranspose_MPIAIJ" 1412 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1415 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1416 PetscErrorCode ierr; 1417 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1418 PetscInt cstart=A->cmap.rstart,ncol; 1419 Mat B; 1420 MatScalar *array; 1421 1422 PetscFunctionBegin; 1423 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1424 1425 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1426 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1427 ai = Aloc->i; aj = Aloc->j; 1428 bi = Bloc->i; bj = Bloc->j; 1429 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1430 cols = d_nnz + na + 1; /* work space to be used by B part */ 1431 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1432 for (i=0; i<ai[ma]; i++){ 1433 d_nnz[aj[i]] ++; 1434 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1435 } 1436 1437 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1438 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1439 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1441 1442 /* copy over the A part */ 1443 array = Aloc->a; 1444 row = A->rmap.rstart; 1445 for (i=0; i<ma; i++) { 1446 ncol = ai[i+1]-ai[i]; 1447 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1448 row++; array += ncol; aj += ncol; 1449 } 1450 aj = Aloc->j; 1451 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1452 1453 /* copy over the B part */ 1454 array = Bloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1457 for (i=0; i<mb; i++) { 1458 ncol = bi[i+1]-bi[i]; 1459 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1460 row++; array += ncol; cols += ncol; 1461 } 1462 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1463 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1465 if (matout) { 1466 *matout = B; 1467 } else { 1468 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1476 { 1477 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1478 Mat a = aij->A,b = aij->B; 1479 PetscErrorCode ierr; 1480 PetscInt s1,s2,s3; 1481 1482 PetscFunctionBegin; 1483 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1484 if (rr) { 1485 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1486 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1487 /* Overlap communication with computation. */ 1488 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 } 1490 if (ll) { 1491 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1492 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1493 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1494 } 1495 /* scale the diagonal block */ 1496 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1497 1498 if (rr) { 1499 /* Do a scatter end and then right scale the off-diagonal block */ 1500 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1502 } 1503 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1509 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1510 { 1511 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1516 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1521 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1522 { 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatEqual_MPIAIJ" 1533 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1534 { 1535 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1536 Mat a,b,c,d; 1537 PetscTruth flg; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 a = matA->A; b = matA->B; 1542 c = matB->A; d = matB->B; 1543 1544 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1545 if (flg) { 1546 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1547 } 1548 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatCopy_MPIAIJ" 1554 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1555 { 1556 PetscErrorCode ierr; 1557 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1558 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1559 1560 PetscFunctionBegin; 1561 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1562 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1563 /* because of the column compression in the off-processor part of the matrix a->B, 1564 the number of columns in a->B and b->B may be different, hence we cannot call 1565 the MatCopy() directly on the two parts. If need be, we can provide a more 1566 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1567 then copying the submatrices */ 1568 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1569 } else { 1570 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1571 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1578 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1579 { 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #include "petscblaslapack.h" 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatAXPY_MPIAIJ" 1590 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1591 { 1592 PetscErrorCode ierr; 1593 PetscInt i; 1594 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1595 PetscBLASInt bnz,one=1; 1596 Mat_SeqAIJ *x,*y; 1597 1598 PetscFunctionBegin; 1599 if (str == SAME_NONZERO_PATTERN) { 1600 PetscScalar alpha = a; 1601 x = (Mat_SeqAIJ *)xx->A->data; 1602 y = (Mat_SeqAIJ *)yy->A->data; 1603 bnz = PetscBLASIntCast(x->nz); 1604 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1605 x = (Mat_SeqAIJ *)xx->B->data; 1606 y = (Mat_SeqAIJ *)yy->B->data; 1607 bnz = PetscBLASIntCast(x->nz); 1608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1609 } else if (str == SUBSET_NONZERO_PATTERN) { 1610 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1611 1612 x = (Mat_SeqAIJ *)xx->B->data; 1613 y = (Mat_SeqAIJ *)yy->B->data; 1614 if (y->xtoy && y->XtoY != xx->B) { 1615 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1616 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1617 } 1618 if (!y->xtoy) { /* get xtoy */ 1619 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1620 y->XtoY = xx->B; 1621 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1622 } 1623 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1624 } else { 1625 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatConjugate_MPIAIJ" 1634 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1635 { 1636 #if defined(PETSC_USE_COMPLEX) 1637 PetscErrorCode ierr; 1638 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1639 1640 PetscFunctionBegin; 1641 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1642 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1643 #else 1644 PetscFunctionBegin; 1645 #endif 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatRealPart_MPIAIJ" 1651 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1652 { 1653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1654 PetscErrorCode ierr; 1655 1656 PetscFunctionBegin; 1657 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1658 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1664 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1671 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #ifdef PETSC_HAVE_PBGL 1676 1677 #include <boost/parallel/mpi/bsp_process_group.hpp> 1678 #include <boost/graph/distributed/ilu_default_graph.hpp> 1679 #include <boost/graph/distributed/ilu_0_block.hpp> 1680 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1681 #include <boost/graph/distributed/petsc/interface.hpp> 1682 #include <boost/multi_array.hpp> 1683 #include <boost/parallel/distributed_property_map.hpp> 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1687 /* 1688 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1689 */ 1690 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1691 { 1692 namespace petsc = boost::distributed::petsc; 1693 1694 namespace graph_dist = boost::graph::distributed; 1695 using boost::graph::distributed::ilu_default::process_group_type; 1696 using boost::graph::ilu_permuted; 1697 1698 PetscTruth row_identity, col_identity; 1699 PetscContainer c; 1700 PetscInt m, n, M, N; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1705 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1706 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1707 if (!row_identity || !col_identity) { 1708 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1709 } 1710 1711 process_group_type pg; 1712 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1713 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1714 lgraph_type& level_graph = *lgraph_p; 1715 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1716 1717 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1718 ilu_permuted(level_graph); 1719 1720 /* put together the new matrix */ 1721 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1722 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1723 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1724 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1725 ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1726 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 (*fact)->factor = FACTOR_LU; 1729 1730 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1731 ierr = PetscContainerSetPointer(c, lgraph_p); 1732 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1738 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1739 { 1740 PetscFunctionBegin; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSolve_MPIAIJ" 1746 /* 1747 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1748 */ 1749 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1750 { 1751 namespace graph_dist = boost::graph::distributed; 1752 1753 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1754 lgraph_type* lgraph_p; 1755 PetscContainer c; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1760 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1761 ierr = VecCopy(b, x); CHKERRQ(ierr); 1762 1763 PetscScalar* array_x; 1764 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1765 PetscInt sx; 1766 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1767 1768 PetscScalar* array_b; 1769 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1770 PetscInt sb; 1771 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1772 1773 lgraph_type& level_graph = *lgraph_p; 1774 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1775 1776 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1777 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1778 ref_x(array_x, boost::extents[num_vertices(graph)]); 1779 1780 typedef boost::iterator_property_map<array_ref_type::iterator, 1781 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1782 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1783 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1784 1785 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1786 1787 PetscFunctionReturn(0); 1788 } 1789 #endif 1790 1791 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1792 PetscInt nzlocal,nsends,nrecvs; 1793 PetscMPIInt *send_rank; 1794 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1795 PetscScalar *sbuf_a,**rbuf_a; 1796 PetscErrorCode (*MatDestroy)(Mat); 1797 } Mat_Redundant; 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1801 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1805 PetscInt i; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1809 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1810 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1811 for (i=0; i<redund->nrecvs; i++){ 1812 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1813 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1814 } 1815 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1816 ierr = PetscFree(redund);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatDestroy_MatRedundant" 1822 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1823 { 1824 PetscErrorCode ierr; 1825 PetscContainer container; 1826 Mat_Redundant *redund=PETSC_NULL; 1827 1828 PetscFunctionBegin; 1829 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1830 if (container) { 1831 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1832 } else { 1833 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1834 } 1835 A->ops->destroy = redund->MatDestroy; 1836 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1837 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1838 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1844 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1845 { 1846 PetscMPIInt rank,size; 1847 MPI_Comm comm=((PetscObject)mat)->comm; 1848 PetscErrorCode ierr; 1849 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1850 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1851 PetscInt *rowrange=mat->rmap.range; 1852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1853 Mat A=aij->A,B=aij->B,C=*matredundant; 1854 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1855 PetscScalar *sbuf_a; 1856 PetscInt nzlocal=a->nz+b->nz; 1857 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1858 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1859 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1860 MatScalar *aworkA,*aworkB; 1861 PetscScalar *vals; 1862 PetscMPIInt tag1,tag2,tag3,imdex; 1863 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1864 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1865 MPI_Status recv_status,*send_status; 1866 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1867 PetscInt **rbuf_j=PETSC_NULL; 1868 PetscScalar **rbuf_a=PETSC_NULL; 1869 Mat_Redundant *redund=PETSC_NULL; 1870 PetscContainer container; 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 if (reuse == MAT_REUSE_MATRIX) { 1877 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1878 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1879 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1880 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1881 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1882 if (container) { 1883 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1884 } else { 1885 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1886 } 1887 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1888 1889 nsends = redund->nsends; 1890 nrecvs = redund->nrecvs; 1891 send_rank = redund->send_rank; recv_rank = send_rank + size; 1892 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1893 sbuf_j = redund->sbuf_j; 1894 sbuf_a = redund->sbuf_a; 1895 rbuf_j = redund->rbuf_j; 1896 rbuf_a = redund->rbuf_a; 1897 } 1898 1899 if (reuse == MAT_INITIAL_MATRIX){ 1900 PetscMPIInt subrank,subsize; 1901 PetscInt nleftover,np_subcomm; 1902 /* get the destination processors' id send_rank, nsends and nrecvs */ 1903 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1904 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1905 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1906 recv_rank = send_rank + size; 1907 np_subcomm = size/nsubcomm; 1908 nleftover = size - nsubcomm*np_subcomm; 1909 nsends = 0; nrecvs = 0; 1910 for (i=0; i<size; i++){ /* i=rank*/ 1911 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1912 send_rank[nsends] = i; nsends++; 1913 recv_rank[nrecvs++] = i; 1914 } 1915 } 1916 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1917 i = size-nleftover-1; 1918 j = 0; 1919 while (j < nsubcomm - nleftover){ 1920 send_rank[nsends++] = i; 1921 i--; j++; 1922 } 1923 } 1924 1925 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1926 for (i=0; i<nleftover; i++){ 1927 recv_rank[nrecvs++] = size-nleftover+i; 1928 } 1929 } 1930 1931 /* allocate sbuf_j, sbuf_a */ 1932 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1933 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1934 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1935 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1936 1937 /* copy mat's local entries into the buffers */ 1938 if (reuse == MAT_INITIAL_MATRIX){ 1939 rownz_max = 0; 1940 rptr = sbuf_j; 1941 cols = sbuf_j + rend-rstart + 1; 1942 vals = sbuf_a; 1943 rptr[0] = 0; 1944 for (i=0; i<rend-rstart; i++){ 1945 row = i + rstart; 1946 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1947 ncols = nzA + nzB; 1948 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1949 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1950 /* load the column indices for this row into cols */ 1951 lwrite = 0; 1952 for (l=0; l<nzB; l++) { 1953 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1954 vals[lwrite] = aworkB[l]; 1955 cols[lwrite++] = ctmp; 1956 } 1957 } 1958 for (l=0; l<nzA; l++){ 1959 vals[lwrite] = aworkA[l]; 1960 cols[lwrite++] = cstart + cworkA[l]; 1961 } 1962 for (l=0; l<nzB; l++) { 1963 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1964 vals[lwrite] = aworkB[l]; 1965 cols[lwrite++] = ctmp; 1966 } 1967 } 1968 vals += ncols; 1969 cols += ncols; 1970 rptr[i+1] = rptr[i] + ncols; 1971 if (rownz_max < ncols) rownz_max = ncols; 1972 } 1973 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1974 } else { /* only copy matrix values into sbuf_a */ 1975 rptr = sbuf_j; 1976 vals = sbuf_a; 1977 rptr[0] = 0; 1978 for (i=0; i<rend-rstart; i++){ 1979 row = i + rstart; 1980 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1981 ncols = nzA + nzB; 1982 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1983 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1984 lwrite = 0; 1985 for (l=0; l<nzB; l++) { 1986 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1987 } 1988 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1989 for (l=0; l<nzB; l++) { 1990 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1991 } 1992 vals += ncols; 1993 rptr[i+1] = rptr[i] + ncols; 1994 } 1995 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1996 1997 /* send nzlocal to others, and recv other's nzlocal */ 1998 /*--------------------------------------------------*/ 1999 if (reuse == MAT_INITIAL_MATRIX){ 2000 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2001 s_waits2 = s_waits3 + nsends; 2002 s_waits1 = s_waits2 + nsends; 2003 r_waits1 = s_waits1 + nsends; 2004 r_waits2 = r_waits1 + nrecvs; 2005 r_waits3 = r_waits2 + nrecvs; 2006 } else { 2007 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2008 r_waits3 = s_waits3 + nsends; 2009 } 2010 2011 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2012 if (reuse == MAT_INITIAL_MATRIX){ 2013 /* get new tags to keep the communication clean */ 2014 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2015 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2016 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2017 rbuf_nz = sbuf_nz + nsends; 2018 2019 /* post receives of other's nzlocal */ 2020 for (i=0; i<nrecvs; i++){ 2021 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2022 } 2023 /* send nzlocal to others */ 2024 for (i=0; i<nsends; i++){ 2025 sbuf_nz[i] = nzlocal; 2026 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2027 } 2028 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2029 count = nrecvs; 2030 while (count) { 2031 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2032 recv_rank[imdex] = recv_status.MPI_SOURCE; 2033 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2034 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2035 2036 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2037 rbuf_nz[imdex] += i + 2; 2038 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2039 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2040 count--; 2041 } 2042 /* wait on sends of nzlocal */ 2043 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2044 /* send mat->i,j to others, and recv from other's */ 2045 /*------------------------------------------------*/ 2046 for (i=0; i<nsends; i++){ 2047 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2048 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2049 } 2050 /* wait on receives of mat->i,j */ 2051 /*------------------------------*/ 2052 count = nrecvs; 2053 while (count) { 2054 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2055 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2056 count--; 2057 } 2058 /* wait on sends of mat->i,j */ 2059 /*---------------------------*/ 2060 if (nsends) { 2061 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2062 } 2063 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2064 2065 /* post receives, send and receive mat->a */ 2066 /*----------------------------------------*/ 2067 for (imdex=0; imdex<nrecvs; imdex++) { 2068 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2069 } 2070 for (i=0; i<nsends; i++){ 2071 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2072 } 2073 count = nrecvs; 2074 while (count) { 2075 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2076 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2077 count--; 2078 } 2079 if (nsends) { 2080 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2081 } 2082 2083 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2084 2085 /* create redundant matrix */ 2086 /*-------------------------*/ 2087 if (reuse == MAT_INITIAL_MATRIX){ 2088 /* compute rownz_max for preallocation */ 2089 for (imdex=0; imdex<nrecvs; imdex++){ 2090 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2091 rptr = rbuf_j[imdex]; 2092 for (i=0; i<j; i++){ 2093 ncols = rptr[i+1] - rptr[i]; 2094 if (rownz_max < ncols) rownz_max = ncols; 2095 } 2096 } 2097 2098 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2099 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2100 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2101 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2102 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2103 } else { 2104 C = *matredundant; 2105 } 2106 2107 /* insert local matrix entries */ 2108 rptr = sbuf_j; 2109 cols = sbuf_j + rend-rstart + 1; 2110 vals = sbuf_a; 2111 for (i=0; i<rend-rstart; i++){ 2112 row = i + rstart; 2113 ncols = rptr[i+1] - rptr[i]; 2114 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2115 vals += ncols; 2116 cols += ncols; 2117 } 2118 /* insert received matrix entries */ 2119 for (imdex=0; imdex<nrecvs; imdex++){ 2120 rstart = rowrange[recv_rank[imdex]]; 2121 rend = rowrange[recv_rank[imdex]+1]; 2122 rptr = rbuf_j[imdex]; 2123 cols = rbuf_j[imdex] + rend-rstart + 1; 2124 vals = rbuf_a[imdex]; 2125 for (i=0; i<rend-rstart; i++){ 2126 row = i + rstart; 2127 ncols = rptr[i+1] - rptr[i]; 2128 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2129 vals += ncols; 2130 cols += ncols; 2131 } 2132 } 2133 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2136 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscContainer container; 2139 *matredundant = C; 2140 /* create a supporting struct and attach it to C for reuse */ 2141 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2142 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2143 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2144 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2145 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2146 2147 redund->nzlocal = nzlocal; 2148 redund->nsends = nsends; 2149 redund->nrecvs = nrecvs; 2150 redund->send_rank = send_rank; 2151 redund->sbuf_nz = sbuf_nz; 2152 redund->sbuf_j = sbuf_j; 2153 redund->sbuf_a = sbuf_a; 2154 redund->rbuf_j = rbuf_j; 2155 redund->rbuf_a = rbuf_a; 2156 2157 redund->MatDestroy = C->ops->destroy; 2158 C->ops->destroy = MatDestroy_MatRedundant; 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2165 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2166 { 2167 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2168 PetscInt n = A->rmap.n; 2169 PetscInt cstart = A->cmap.rstart; 2170 PetscInt *cmap = mat->garray; 2171 PetscInt *diagIdx, *offdiagIdx; 2172 Vec diagV, offdiagV; 2173 PetscScalar *a, *diagA, *offdiagA; 2174 PetscInt r; 2175 PetscErrorCode ierr; 2176 2177 PetscFunctionBegin; 2178 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2179 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2180 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2181 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2182 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2183 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2184 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2185 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2186 for(r = 0; r < n; ++r) { 2187 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2188 a[r] = diagA[r]; 2189 idx[r] = cstart + diagIdx[r]; 2190 } else { 2191 a[r] = offdiagA[r]; 2192 idx[r] = cmap[offdiagIdx[r]]; 2193 } 2194 } 2195 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2196 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2197 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2198 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2199 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2200 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2206 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2207 { 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 /* -------------------------------------------------------------------*/ 2216 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2217 MatGetRow_MPIAIJ, 2218 MatRestoreRow_MPIAIJ, 2219 MatMult_MPIAIJ, 2220 /* 4*/ MatMultAdd_MPIAIJ, 2221 MatMultTranspose_MPIAIJ, 2222 MatMultTransposeAdd_MPIAIJ, 2223 #ifdef PETSC_HAVE_PBGL 2224 MatSolve_MPIAIJ, 2225 #else 2226 0, 2227 #endif 2228 0, 2229 0, 2230 /*10*/ 0, 2231 0, 2232 0, 2233 MatRelax_MPIAIJ, 2234 MatTranspose_MPIAIJ, 2235 /*15*/ MatGetInfo_MPIAIJ, 2236 MatEqual_MPIAIJ, 2237 MatGetDiagonal_MPIAIJ, 2238 MatDiagonalScale_MPIAIJ, 2239 MatNorm_MPIAIJ, 2240 /*20*/ MatAssemblyBegin_MPIAIJ, 2241 MatAssemblyEnd_MPIAIJ, 2242 0, 2243 MatSetOption_MPIAIJ, 2244 MatZeroEntries_MPIAIJ, 2245 /*25*/ MatZeroRows_MPIAIJ, 2246 0, 2247 #ifdef PETSC_HAVE_PBGL 2248 MatLUFactorNumeric_MPIAIJ, 2249 #else 2250 0, 2251 #endif 2252 0, 2253 0, 2254 /*30*/ MatSetUpPreallocation_MPIAIJ, 2255 #ifdef PETSC_HAVE_PBGL 2256 MatILUFactorSymbolic_MPIAIJ, 2257 #else 2258 0, 2259 #endif 2260 0, 2261 0, 2262 0, 2263 /*35*/ MatDuplicate_MPIAIJ, 2264 0, 2265 0, 2266 0, 2267 0, 2268 /*40*/ MatAXPY_MPIAIJ, 2269 MatGetSubMatrices_MPIAIJ, 2270 MatIncreaseOverlap_MPIAIJ, 2271 MatGetValues_MPIAIJ, 2272 MatCopy_MPIAIJ, 2273 /*45*/ 0, 2274 MatScale_MPIAIJ, 2275 0, 2276 0, 2277 0, 2278 /*50*/ MatSetBlockSize_MPIAIJ, 2279 0, 2280 0, 2281 0, 2282 0, 2283 /*55*/ MatFDColoringCreate_MPIAIJ, 2284 0, 2285 MatSetUnfactored_MPIAIJ, 2286 MatPermute_MPIAIJ, 2287 0, 2288 /*60*/ MatGetSubMatrix_MPIAIJ, 2289 MatDestroy_MPIAIJ, 2290 MatView_MPIAIJ, 2291 0, 2292 0, 2293 /*65*/ 0, 2294 0, 2295 0, 2296 0, 2297 0, 2298 /*70*/ 0, 2299 0, 2300 MatSetColoring_MPIAIJ, 2301 #if defined(PETSC_HAVE_ADIC) 2302 MatSetValuesAdic_MPIAIJ, 2303 #else 2304 0, 2305 #endif 2306 MatSetValuesAdifor_MPIAIJ, 2307 /*75*/ 0, 2308 0, 2309 0, 2310 0, 2311 0, 2312 /*80*/ 0, 2313 0, 2314 0, 2315 0, 2316 /*84*/ MatLoad_MPIAIJ, 2317 0, 2318 0, 2319 0, 2320 0, 2321 0, 2322 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2323 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2324 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2325 MatPtAP_Basic, 2326 MatPtAPSymbolic_MPIAIJ, 2327 /*95*/ MatPtAPNumeric_MPIAIJ, 2328 0, 2329 0, 2330 0, 2331 0, 2332 /*100*/0, 2333 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2334 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2335 MatConjugate_MPIAIJ, 2336 0, 2337 /*105*/MatSetValuesRow_MPIAIJ, 2338 MatRealPart_MPIAIJ, 2339 MatImaginaryPart_MPIAIJ, 2340 0, 2341 0, 2342 /*110*/0, 2343 MatGetRedundantMatrix_MPIAIJ, 2344 MatGetRowMin_MPIAIJ, 2345 0, 2346 0, 2347 /*115*/MatGetSeqNonzerostructure_MPIAIJ}; 2348 2349 /* ----------------------------------------------------------------------------------------*/ 2350 2351 EXTERN_C_BEGIN 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2354 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2355 { 2356 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2357 PetscErrorCode ierr; 2358 2359 PetscFunctionBegin; 2360 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2361 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 EXTERN_C_END 2365 2366 EXTERN_C_BEGIN 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2369 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2370 { 2371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2376 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 EXTERN_C_END 2380 2381 #include "petscpc.h" 2382 EXTERN_C_BEGIN 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2385 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2386 { 2387 Mat_MPIAIJ *b; 2388 PetscErrorCode ierr; 2389 PetscInt i; 2390 2391 PetscFunctionBegin; 2392 B->preallocated = PETSC_TRUE; 2393 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2394 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2395 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2396 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2397 2398 B->rmap.bs = B->cmap.bs = 1; 2399 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2400 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2401 if (d_nnz) { 2402 for (i=0; i<B->rmap.n; i++) { 2403 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2404 } 2405 } 2406 if (o_nnz) { 2407 for (i=0; i<B->rmap.n; i++) { 2408 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2409 } 2410 } 2411 b = (Mat_MPIAIJ*)B->data; 2412 2413 /* Explicitly create 2 MATSEQAIJ matrices. */ 2414 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2415 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2416 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2417 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2418 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2419 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2420 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2421 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2422 2423 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2424 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2425 2426 PetscFunctionReturn(0); 2427 } 2428 EXTERN_C_END 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2432 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2433 { 2434 Mat mat; 2435 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 *newmat = 0; 2440 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2441 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2442 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2443 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2444 a = (Mat_MPIAIJ*)mat->data; 2445 2446 mat->factor = matin->factor; 2447 mat->rmap.bs = matin->rmap.bs; 2448 mat->assembled = PETSC_TRUE; 2449 mat->insertmode = NOT_SET_VALUES; 2450 mat->preallocated = PETSC_TRUE; 2451 2452 a->size = oldmat->size; 2453 a->rank = oldmat->rank; 2454 a->donotstash = oldmat->donotstash; 2455 a->roworiented = oldmat->roworiented; 2456 a->rowindices = 0; 2457 a->rowvalues = 0; 2458 a->getrowactive = PETSC_FALSE; 2459 2460 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2461 ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2462 2463 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2464 if (oldmat->colmap) { 2465 #if defined (PETSC_USE_CTABLE) 2466 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2467 #else 2468 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2469 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2470 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2471 #endif 2472 } else a->colmap = 0; 2473 if (oldmat->garray) { 2474 PetscInt len; 2475 len = oldmat->B->cmap.n; 2476 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2478 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2479 } else a->garray = 0; 2480 2481 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2482 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2483 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2484 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2485 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2486 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2487 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2488 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2489 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2490 *newmat = mat; 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #include "petscsys.h" 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatLoad_MPIAIJ" 2498 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2499 { 2500 Mat A; 2501 PetscScalar *vals,*svals; 2502 MPI_Comm comm = ((PetscObject)viewer)->comm; 2503 MPI_Status status; 2504 PetscErrorCode ierr; 2505 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2506 PetscInt i,nz,j,rstart,rend,mmax; 2507 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2508 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2509 PetscInt cend,cstart,n,*rowners; 2510 int fd; 2511 2512 PetscFunctionBegin; 2513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2514 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2515 if (!rank) { 2516 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2517 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2518 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2519 } 2520 2521 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2522 M = header[1]; N = header[2]; 2523 /* determine ownership of all rows */ 2524 m = M/size + ((M % size) > rank); 2525 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2526 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2527 2528 /* First process needs enough room for process with most rows */ 2529 if (!rank) { 2530 mmax = rowners[1]; 2531 for (i=2; i<size; i++) { 2532 mmax = PetscMax(mmax,rowners[i]); 2533 } 2534 } else mmax = m; 2535 2536 rowners[0] = 0; 2537 for (i=2; i<=size; i++) { 2538 rowners[i] += rowners[i-1]; 2539 } 2540 rstart = rowners[rank]; 2541 rend = rowners[rank+1]; 2542 2543 /* distribute row lengths to all processors */ 2544 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2545 if (!rank) { 2546 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2547 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2548 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2549 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2550 for (j=0; j<m; j++) { 2551 procsnz[0] += ourlens[j]; 2552 } 2553 for (i=1; i<size; i++) { 2554 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2555 /* calculate the number of nonzeros on each processor */ 2556 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2557 procsnz[i] += rowlengths[j]; 2558 } 2559 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2560 } 2561 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2562 } else { 2563 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2564 } 2565 2566 if (!rank) { 2567 /* determine max buffer needed and allocate it */ 2568 maxnz = 0; 2569 for (i=0; i<size; i++) { 2570 maxnz = PetscMax(maxnz,procsnz[i]); 2571 } 2572 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2573 2574 /* read in my part of the matrix column indices */ 2575 nz = procsnz[0]; 2576 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2577 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2578 2579 /* read in every one elses and ship off */ 2580 for (i=1; i<size; i++) { 2581 nz = procsnz[i]; 2582 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2583 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2584 } 2585 ierr = PetscFree(cols);CHKERRQ(ierr); 2586 } else { 2587 /* determine buffer space needed for message */ 2588 nz = 0; 2589 for (i=0; i<m; i++) { 2590 nz += ourlens[i]; 2591 } 2592 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2593 2594 /* receive message of column indices*/ 2595 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2596 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2597 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2598 } 2599 2600 /* determine column ownership if matrix is not square */ 2601 if (N != M) { 2602 n = N/size + ((N % size) > rank); 2603 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2604 cstart = cend - n; 2605 } else { 2606 cstart = rstart; 2607 cend = rend; 2608 n = cend - cstart; 2609 } 2610 2611 /* loop over local rows, determining number of off diagonal entries */ 2612 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2613 jj = 0; 2614 for (i=0; i<m; i++) { 2615 for (j=0; j<ourlens[i]; j++) { 2616 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2617 jj++; 2618 } 2619 } 2620 2621 /* create our matrix */ 2622 for (i=0; i<m; i++) { 2623 ourlens[i] -= offlens[i]; 2624 } 2625 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2626 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2627 ierr = MatSetType(A,type);CHKERRQ(ierr); 2628 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2629 2630 for (i=0; i<m; i++) { 2631 ourlens[i] += offlens[i]; 2632 } 2633 2634 if (!rank) { 2635 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2636 2637 /* read in my part of the matrix numerical values */ 2638 nz = procsnz[0]; 2639 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2640 2641 /* insert into matrix */ 2642 jj = rstart; 2643 smycols = mycols; 2644 svals = vals; 2645 for (i=0; i<m; i++) { 2646 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2647 smycols += ourlens[i]; 2648 svals += ourlens[i]; 2649 jj++; 2650 } 2651 2652 /* read in other processors and ship out */ 2653 for (i=1; i<size; i++) { 2654 nz = procsnz[i]; 2655 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2656 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2657 } 2658 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2659 } else { 2660 /* receive numeric values */ 2661 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2662 2663 /* receive message of values*/ 2664 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2665 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2666 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2667 2668 /* insert into matrix */ 2669 jj = rstart; 2670 smycols = mycols; 2671 svals = vals; 2672 for (i=0; i<m; i++) { 2673 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2674 smycols += ourlens[i]; 2675 svals += ourlens[i]; 2676 jj++; 2677 } 2678 } 2679 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2680 ierr = PetscFree(vals);CHKERRQ(ierr); 2681 ierr = PetscFree(mycols);CHKERRQ(ierr); 2682 ierr = PetscFree(rowners);CHKERRQ(ierr); 2683 2684 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 *newmat = A; 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2692 /* 2693 Not great since it makes two copies of the submatrix, first an SeqAIJ 2694 in local and then by concatenating the local matrices the end result. 2695 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2696 */ 2697 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2698 { 2699 PetscErrorCode ierr; 2700 PetscMPIInt rank,size; 2701 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2702 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2703 Mat *local,M,Mreuse; 2704 MatScalar *vwork,*aa; 2705 MPI_Comm comm = ((PetscObject)mat)->comm; 2706 Mat_SeqAIJ *aij; 2707 2708 2709 PetscFunctionBegin; 2710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2712 2713 if (call == MAT_REUSE_MATRIX) { 2714 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2715 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2716 local = &Mreuse; 2717 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2718 } else { 2719 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2720 Mreuse = *local; 2721 ierr = PetscFree(local);CHKERRQ(ierr); 2722 } 2723 2724 /* 2725 m - number of local rows 2726 n - number of columns (same on all processors) 2727 rstart - first row in new global matrix generated 2728 */ 2729 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2730 if (call == MAT_INITIAL_MATRIX) { 2731 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2732 ii = aij->i; 2733 jj = aij->j; 2734 2735 /* 2736 Determine the number of non-zeros in the diagonal and off-diagonal 2737 portions of the matrix in order to do correct preallocation 2738 */ 2739 2740 /* first get start and end of "diagonal" columns */ 2741 if (csize == PETSC_DECIDE) { 2742 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2743 if (mglobal == n) { /* square matrix */ 2744 nlocal = m; 2745 } else { 2746 nlocal = n/size + ((n % size) > rank); 2747 } 2748 } else { 2749 nlocal = csize; 2750 } 2751 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2752 rstart = rend - nlocal; 2753 if (rank == size - 1 && rend != n) { 2754 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2755 } 2756 2757 /* next, compute all the lengths */ 2758 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2759 olens = dlens + m; 2760 for (i=0; i<m; i++) { 2761 jend = ii[i+1] - ii[i]; 2762 olen = 0; 2763 dlen = 0; 2764 for (j=0; j<jend; j++) { 2765 if (*jj < rstart || *jj >= rend) olen++; 2766 else dlen++; 2767 jj++; 2768 } 2769 olens[i] = olen; 2770 dlens[i] = dlen; 2771 } 2772 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2773 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2774 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2775 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2776 ierr = PetscFree(dlens);CHKERRQ(ierr); 2777 } else { 2778 PetscInt ml,nl; 2779 2780 M = *newmat; 2781 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2782 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2783 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2784 /* 2785 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2786 rather than the slower MatSetValues(). 2787 */ 2788 M->was_assembled = PETSC_TRUE; 2789 M->assembled = PETSC_FALSE; 2790 } 2791 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2792 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2793 ii = aij->i; 2794 jj = aij->j; 2795 aa = aij->a; 2796 for (i=0; i<m; i++) { 2797 row = rstart + i; 2798 nz = ii[i+1] - ii[i]; 2799 cwork = jj; jj += nz; 2800 vwork = aa; aa += nz; 2801 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2802 } 2803 2804 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2806 *newmat = M; 2807 2808 /* save submatrix used in processor for next request */ 2809 if (call == MAT_INITIAL_MATRIX) { 2810 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2811 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2812 } 2813 2814 PetscFunctionReturn(0); 2815 } 2816 2817 EXTERN_C_BEGIN 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2820 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2821 { 2822 PetscInt m,cstart, cend,j,nnz,i,d; 2823 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2824 const PetscInt *JJ; 2825 PetscScalar *values; 2826 PetscErrorCode ierr; 2827 2828 PetscFunctionBegin; 2829 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2830 2831 B->rmap.bs = B->cmap.bs = 1; 2832 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2833 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2834 m = B->rmap.n; 2835 cstart = B->cmap.rstart; 2836 cend = B->cmap.rend; 2837 rstart = B->rmap.rstart; 2838 2839 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2840 o_nnz = d_nnz + m; 2841 2842 #if defined(PETSC_USE_DEBUGGING) 2843 for (i=0; i<m; i++) { 2844 nnz = Ii[i+1]- Ii[i]; 2845 JJ = J + Ii[i]; 2846 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2847 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 2848 if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N); 2849 for (j=1; j<nnz; j++) { 2850 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 2851 } 2852 } 2853 #endif 2854 2855 for (i=0; i<m; i++) { 2856 nnz = Ii[i+1]- Ii[i]; 2857 JJ = J + Ii[i]; 2858 nnz_max = PetscMax(nnz_max,nnz); 2859 for (j=0; j<nnz; j++) { 2860 if (*JJ >= cstart) break; 2861 JJ++; 2862 } 2863 d = 0; 2864 for (; j<nnz; j++) { 2865 if (*JJ++ >= cend) break; 2866 d++; 2867 } 2868 d_nnz[i] = d; 2869 o_nnz[i] = nnz - d; 2870 } 2871 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2872 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2873 2874 if (v) values = (PetscScalar*)v; 2875 else { 2876 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2877 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2878 } 2879 2880 for (i=0; i<m; i++) { 2881 ii = i + rstart; 2882 nnz = Ii[i+1]- Ii[i]; 2883 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2884 } 2885 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2887 2888 if (!v) { 2889 ierr = PetscFree(values);CHKERRQ(ierr); 2890 } 2891 PetscFunctionReturn(0); 2892 } 2893 EXTERN_C_END 2894 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2897 /*@ 2898 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2899 (the default parallel PETSc format). 2900 2901 Collective on MPI_Comm 2902 2903 Input Parameters: 2904 + B - the matrix 2905 . i - the indices into j for the start of each local row (starts with zero) 2906 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2907 - v - optional values in the matrix 2908 2909 Level: developer 2910 2911 Notes: 2912 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2913 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2914 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2915 2916 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2917 2918 The format which is used for the sparse matrix input, is equivalent to a 2919 row-major ordering.. i.e for the following matrix, the input data expected is 2920 as shown: 2921 2922 1 0 0 2923 2 0 3 P0 2924 ------- 2925 4 5 6 P1 2926 2927 Process0 [P0]: rows_owned=[0,1] 2928 i = {0,1,3} [size = nrow+1 = 2+1] 2929 j = {0,0,2} [size = nz = 6] 2930 v = {1,2,3} [size = nz = 6] 2931 2932 Process1 [P1]: rows_owned=[2] 2933 i = {0,3} [size = nrow+1 = 1+1] 2934 j = {0,1,2} [size = nz = 6] 2935 v = {4,5,6} [size = nz = 6] 2936 2937 The column indices for each row MUST be sorted. 2938 2939 .keywords: matrix, aij, compressed row, sparse, parallel 2940 2941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2942 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2943 @*/ 2944 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2945 { 2946 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2947 2948 PetscFunctionBegin; 2949 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2950 if (f) { 2951 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2952 } 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2958 /*@C 2959 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2960 (the default parallel PETSc format). For good matrix assembly performance 2961 the user should preallocate the matrix storage by setting the parameters 2962 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2963 performance can be increased by more than a factor of 50. 2964 2965 Collective on MPI_Comm 2966 2967 Input Parameters: 2968 + A - the matrix 2969 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2970 (same value is used for all local rows) 2971 . d_nnz - array containing the number of nonzeros in the various rows of the 2972 DIAGONAL portion of the local submatrix (possibly different for each row) 2973 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2974 The size of this array is equal to the number of local rows, i.e 'm'. 2975 You must leave room for the diagonal entry even if it is zero. 2976 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2977 submatrix (same value is used for all local rows). 2978 - o_nnz - array containing the number of nonzeros in the various rows of the 2979 OFF-DIAGONAL portion of the local submatrix (possibly different for 2980 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2981 structure. The size of this array is equal to the number 2982 of local rows, i.e 'm'. 2983 2984 If the *_nnz parameter is given then the *_nz parameter is ignored 2985 2986 The AIJ format (also called the Yale sparse matrix format or 2987 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2988 storage. The stored row and column indices begin with zero. See the users manual for details. 2989 2990 The parallel matrix is partitioned such that the first m0 rows belong to 2991 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2992 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2993 2994 The DIAGONAL portion of the local submatrix of a processor can be defined 2995 as the submatrix which is obtained by extraction the part corresponding 2996 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2997 first row that belongs to the processor, and r2 is the last row belonging 2998 to the this processor. This is a square mxm matrix. The remaining portion 2999 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3000 3001 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3002 3003 You can call MatGetInfo() to get information on how effective the preallocation was; 3004 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3005 You can also run with the option -info and look for messages with the string 3006 malloc in them to see if additional memory allocation was needed. 3007 3008 Example usage: 3009 3010 Consider the following 8x8 matrix with 34 non-zero values, that is 3011 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3012 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3013 as follows: 3014 3015 .vb 3016 1 2 0 | 0 3 0 | 0 4 3017 Proc0 0 5 6 | 7 0 0 | 8 0 3018 9 0 10 | 11 0 0 | 12 0 3019 ------------------------------------- 3020 13 0 14 | 15 16 17 | 0 0 3021 Proc1 0 18 0 | 19 20 21 | 0 0 3022 0 0 0 | 22 23 0 | 24 0 3023 ------------------------------------- 3024 Proc2 25 26 27 | 0 0 28 | 29 0 3025 30 0 0 | 31 32 33 | 0 34 3026 .ve 3027 3028 This can be represented as a collection of submatrices as: 3029 3030 .vb 3031 A B C 3032 D E F 3033 G H I 3034 .ve 3035 3036 Where the submatrices A,B,C are owned by proc0, D,E,F are 3037 owned by proc1, G,H,I are owned by proc2. 3038 3039 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3040 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3041 The 'M','N' parameters are 8,8, and have the same values on all procs. 3042 3043 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3044 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3045 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3046 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3047 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3048 matrix, ans [DF] as another SeqAIJ matrix. 3049 3050 When d_nz, o_nz parameters are specified, d_nz storage elements are 3051 allocated for every row of the local diagonal submatrix, and o_nz 3052 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3053 One way to choose d_nz and o_nz is to use the max nonzerors per local 3054 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3055 In this case, the values of d_nz,o_nz are: 3056 .vb 3057 proc0 : dnz = 2, o_nz = 2 3058 proc1 : dnz = 3, o_nz = 2 3059 proc2 : dnz = 1, o_nz = 4 3060 .ve 3061 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3062 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3063 for proc3. i.e we are using 12+15+10=37 storage locations to store 3064 34 values. 3065 3066 When d_nnz, o_nnz parameters are specified, the storage is specified 3067 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3068 In the above case the values for d_nnz,o_nnz are: 3069 .vb 3070 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3071 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3072 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3073 .ve 3074 Here the space allocated is sum of all the above values i.e 34, and 3075 hence pre-allocation is perfect. 3076 3077 Level: intermediate 3078 3079 .keywords: matrix, aij, compressed row, sparse, parallel 3080 3081 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3082 MPIAIJ, MatGetInfo() 3083 @*/ 3084 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3085 { 3086 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3087 3088 PetscFunctionBegin; 3089 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3090 if (f) { 3091 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3092 } 3093 PetscFunctionReturn(0); 3094 } 3095 3096 #undef __FUNCT__ 3097 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3098 /*@ 3099 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3100 CSR format the local rows. 3101 3102 Collective on MPI_Comm 3103 3104 Input Parameters: 3105 + comm - MPI communicator 3106 . m - number of local rows (Cannot be PETSC_DECIDE) 3107 . n - This value should be the same as the local size used in creating the 3108 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3109 calculated if N is given) For square matrices n is almost always m. 3110 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3111 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3112 . i - row indices 3113 . j - column indices 3114 - a - matrix values 3115 3116 Output Parameter: 3117 . mat - the matrix 3118 3119 Level: intermediate 3120 3121 Notes: 3122 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3123 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3124 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3125 3126 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3127 3128 The format which is used for the sparse matrix input, is equivalent to a 3129 row-major ordering.. i.e for the following matrix, the input data expected is 3130 as shown: 3131 3132 1 0 0 3133 2 0 3 P0 3134 ------- 3135 4 5 6 P1 3136 3137 Process0 [P0]: rows_owned=[0,1] 3138 i = {0,1,3} [size = nrow+1 = 2+1] 3139 j = {0,0,2} [size = nz = 6] 3140 v = {1,2,3} [size = nz = 6] 3141 3142 Process1 [P1]: rows_owned=[2] 3143 i = {0,3} [size = nrow+1 = 1+1] 3144 j = {0,1,2} [size = nz = 6] 3145 v = {4,5,6} [size = nz = 6] 3146 3147 .keywords: matrix, aij, compressed row, sparse, parallel 3148 3149 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3150 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3151 @*/ 3152 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3153 { 3154 PetscErrorCode ierr; 3155 3156 PetscFunctionBegin; 3157 if (i[0]) { 3158 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3159 } 3160 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3161 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3162 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3163 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3164 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3165 PetscFunctionReturn(0); 3166 } 3167 3168 #undef __FUNCT__ 3169 #define __FUNCT__ "MatCreateMPIAIJ" 3170 /*@C 3171 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3172 (the default parallel PETSc format). For good matrix assembly performance 3173 the user should preallocate the matrix storage by setting the parameters 3174 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3175 performance can be increased by more than a factor of 50. 3176 3177 Collective on MPI_Comm 3178 3179 Input Parameters: 3180 + comm - MPI communicator 3181 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3182 This value should be the same as the local size used in creating the 3183 y vector for the matrix-vector product y = Ax. 3184 . n - This value should be the same as the local size used in creating the 3185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3186 calculated if N is given) For square matrices n is almost always m. 3187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3189 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3190 (same value is used for all local rows) 3191 . d_nnz - array containing the number of nonzeros in the various rows of the 3192 DIAGONAL portion of the local submatrix (possibly different for each row) 3193 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3194 The size of this array is equal to the number of local rows, i.e 'm'. 3195 You must leave room for the diagonal entry even if it is zero. 3196 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3197 submatrix (same value is used for all local rows). 3198 - o_nnz - array containing the number of nonzeros in the various rows of the 3199 OFF-DIAGONAL portion of the local submatrix (possibly different for 3200 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3201 structure. The size of this array is equal to the number 3202 of local rows, i.e 'm'. 3203 3204 Output Parameter: 3205 . A - the matrix 3206 3207 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3208 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3209 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3210 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3211 3212 Notes: 3213 If the *_nnz parameter is given then the *_nz parameter is ignored 3214 3215 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3216 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3217 storage requirements for this matrix. 3218 3219 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3220 processor than it must be used on all processors that share the object for 3221 that argument. 3222 3223 The user MUST specify either the local or global matrix dimensions 3224 (possibly both). 3225 3226 The parallel matrix is partitioned across processors such that the 3227 first m0 rows belong to process 0, the next m1 rows belong to 3228 process 1, the next m2 rows belong to process 2 etc.. where 3229 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3230 values corresponding to [m x N] submatrix. 3231 3232 The columns are logically partitioned with the n0 columns belonging 3233 to 0th partition, the next n1 columns belonging to the next 3234 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3235 3236 The DIAGONAL portion of the local submatrix on any given processor 3237 is the submatrix corresponding to the rows and columns m,n 3238 corresponding to the given processor. i.e diagonal matrix on 3239 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3240 etc. The remaining portion of the local submatrix [m x (N-n)] 3241 constitute the OFF-DIAGONAL portion. The example below better 3242 illustrates this concept. 3243 3244 For a square global matrix we define each processor's diagonal portion 3245 to be its local rows and the corresponding columns (a square submatrix); 3246 each processor's off-diagonal portion encompasses the remainder of the 3247 local matrix (a rectangular submatrix). 3248 3249 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3250 3251 When calling this routine with a single process communicator, a matrix of 3252 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3253 type of communicator, use the construction mechanism: 3254 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3255 3256 By default, this format uses inodes (identical nodes) when possible. 3257 We search for consecutive rows with the same nonzero structure, thereby 3258 reusing matrix information to achieve increased efficiency. 3259 3260 Options Database Keys: 3261 + -mat_no_inode - Do not use inodes 3262 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3263 - -mat_aij_oneindex - Internally use indexing starting at 1 3264 rather than 0. Note that when calling MatSetValues(), 3265 the user still MUST index entries starting at 0! 3266 3267 3268 Example usage: 3269 3270 Consider the following 8x8 matrix with 34 non-zero values, that is 3271 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3272 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3273 as follows: 3274 3275 .vb 3276 1 2 0 | 0 3 0 | 0 4 3277 Proc0 0 5 6 | 7 0 0 | 8 0 3278 9 0 10 | 11 0 0 | 12 0 3279 ------------------------------------- 3280 13 0 14 | 15 16 17 | 0 0 3281 Proc1 0 18 0 | 19 20 21 | 0 0 3282 0 0 0 | 22 23 0 | 24 0 3283 ------------------------------------- 3284 Proc2 25 26 27 | 0 0 28 | 29 0 3285 30 0 0 | 31 32 33 | 0 34 3286 .ve 3287 3288 This can be represented as a collection of submatrices as: 3289 3290 .vb 3291 A B C 3292 D E F 3293 G H I 3294 .ve 3295 3296 Where the submatrices A,B,C are owned by proc0, D,E,F are 3297 owned by proc1, G,H,I are owned by proc2. 3298 3299 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3300 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3301 The 'M','N' parameters are 8,8, and have the same values on all procs. 3302 3303 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3304 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3305 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3306 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3307 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3308 matrix, ans [DF] as another SeqAIJ matrix. 3309 3310 When d_nz, o_nz parameters are specified, d_nz storage elements are 3311 allocated for every row of the local diagonal submatrix, and o_nz 3312 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3313 One way to choose d_nz and o_nz is to use the max nonzerors per local 3314 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3315 In this case, the values of d_nz,o_nz are: 3316 .vb 3317 proc0 : dnz = 2, o_nz = 2 3318 proc1 : dnz = 3, o_nz = 2 3319 proc2 : dnz = 1, o_nz = 4 3320 .ve 3321 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3322 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3323 for proc3. i.e we are using 12+15+10=37 storage locations to store 3324 34 values. 3325 3326 When d_nnz, o_nnz parameters are specified, the storage is specified 3327 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3328 In the above case the values for d_nnz,o_nnz are: 3329 .vb 3330 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3331 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3332 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3333 .ve 3334 Here the space allocated is sum of all the above values i.e 34, and 3335 hence pre-allocation is perfect. 3336 3337 Level: intermediate 3338 3339 .keywords: matrix, aij, compressed row, sparse, parallel 3340 3341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3342 MPIAIJ, MatCreateMPIAIJWithArrays() 3343 @*/ 3344 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3345 { 3346 PetscErrorCode ierr; 3347 PetscMPIInt size; 3348 3349 PetscFunctionBegin; 3350 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3351 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3352 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3353 if (size > 1) { 3354 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3355 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3356 } else { 3357 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3358 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3365 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3366 { 3367 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3368 3369 PetscFunctionBegin; 3370 *Ad = a->A; 3371 *Ao = a->B; 3372 *colmap = a->garray; 3373 PetscFunctionReturn(0); 3374 } 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3378 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3379 { 3380 PetscErrorCode ierr; 3381 PetscInt i; 3382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3383 3384 PetscFunctionBegin; 3385 if (coloring->ctype == IS_COLORING_GLOBAL) { 3386 ISColoringValue *allcolors,*colors; 3387 ISColoring ocoloring; 3388 3389 /* set coloring for diagonal portion */ 3390 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3391 3392 /* set coloring for off-diagonal portion */ 3393 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3394 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3395 for (i=0; i<a->B->cmap.n; i++) { 3396 colors[i] = allcolors[a->garray[i]]; 3397 } 3398 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3399 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3400 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3401 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3402 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3403 ISColoringValue *colors; 3404 PetscInt *larray; 3405 ISColoring ocoloring; 3406 3407 /* set coloring for diagonal portion */ 3408 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3409 for (i=0; i<a->A->cmap.n; i++) { 3410 larray[i] = i + A->cmap.rstart; 3411 } 3412 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3413 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3414 for (i=0; i<a->A->cmap.n; i++) { 3415 colors[i] = coloring->colors[larray[i]]; 3416 } 3417 ierr = PetscFree(larray);CHKERRQ(ierr); 3418 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3419 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3420 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3421 3422 /* set coloring for off-diagonal portion */ 3423 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3424 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3425 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3426 for (i=0; i<a->B->cmap.n; i++) { 3427 colors[i] = coloring->colors[larray[i]]; 3428 } 3429 ierr = PetscFree(larray);CHKERRQ(ierr); 3430 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3431 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3432 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3433 } else { 3434 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3435 } 3436 3437 PetscFunctionReturn(0); 3438 } 3439 3440 #if defined(PETSC_HAVE_ADIC) 3441 #undef __FUNCT__ 3442 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3443 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3444 { 3445 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3446 PetscErrorCode ierr; 3447 3448 PetscFunctionBegin; 3449 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3450 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 #endif 3454 3455 #undef __FUNCT__ 3456 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3457 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3458 { 3459 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3460 PetscErrorCode ierr; 3461 3462 PetscFunctionBegin; 3463 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3464 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3465 PetscFunctionReturn(0); 3466 } 3467 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "MatMerge" 3470 /*@ 3471 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3472 matrices from each processor 3473 3474 Collective on MPI_Comm 3475 3476 Input Parameters: 3477 + comm - the communicators the parallel matrix will live on 3478 . inmat - the input sequential matrices 3479 . n - number of local columns (or PETSC_DECIDE) 3480 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3481 3482 Output Parameter: 3483 . outmat - the parallel matrix generated 3484 3485 Level: advanced 3486 3487 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3488 3489 @*/ 3490 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3491 { 3492 PetscErrorCode ierr; 3493 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3494 PetscInt *indx; 3495 PetscScalar *values; 3496 3497 PetscFunctionBegin; 3498 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3499 if (scall == MAT_INITIAL_MATRIX){ 3500 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3501 if (n == PETSC_DECIDE){ 3502 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3503 } 3504 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3505 rstart -= m; 3506 3507 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3508 for (i=0;i<m;i++) { 3509 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3510 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3511 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3512 } 3513 /* This routine will ONLY return MPIAIJ type matrix */ 3514 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3515 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3516 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3517 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3518 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3519 3520 } else if (scall == MAT_REUSE_MATRIX){ 3521 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3522 } else { 3523 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3524 } 3525 3526 for (i=0;i<m;i++) { 3527 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3528 Ii = i + rstart; 3529 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3530 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3531 } 3532 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3533 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3535 3536 PetscFunctionReturn(0); 3537 } 3538 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatFileSplit" 3541 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3542 { 3543 PetscErrorCode ierr; 3544 PetscMPIInt rank; 3545 PetscInt m,N,i,rstart,nnz; 3546 size_t len; 3547 const PetscInt *indx; 3548 PetscViewer out; 3549 char *name; 3550 Mat B; 3551 const PetscScalar *values; 3552 3553 PetscFunctionBegin; 3554 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3555 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3556 /* Should this be the type of the diagonal block of A? */ 3557 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3558 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3559 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3560 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3561 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3562 for (i=0;i<m;i++) { 3563 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3564 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3565 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3566 } 3567 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3569 3570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3571 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3572 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3573 sprintf(name,"%s.%d",outfile,rank); 3574 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3575 ierr = PetscFree(name); 3576 ierr = MatView(B,out);CHKERRQ(ierr); 3577 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3578 ierr = MatDestroy(B);CHKERRQ(ierr); 3579 PetscFunctionReturn(0); 3580 } 3581 3582 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3585 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3586 { 3587 PetscErrorCode ierr; 3588 Mat_Merge_SeqsToMPI *merge; 3589 PetscContainer container; 3590 3591 PetscFunctionBegin; 3592 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3593 if (container) { 3594 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3595 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3596 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3597 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3598 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3599 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3600 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3601 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3602 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3603 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3604 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3605 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3606 3607 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3608 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3609 } 3610 ierr = PetscFree(merge);CHKERRQ(ierr); 3611 3612 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3613 PetscFunctionReturn(0); 3614 } 3615 3616 #include "src/mat/utils/freespace.h" 3617 #include "petscbt.h" 3618 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3621 /*@C 3622 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3623 matrices from each processor 3624 3625 Collective on MPI_Comm 3626 3627 Input Parameters: 3628 + comm - the communicators the parallel matrix will live on 3629 . seqmat - the input sequential matrices 3630 . m - number of local rows (or PETSC_DECIDE) 3631 . n - number of local columns (or PETSC_DECIDE) 3632 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3633 3634 Output Parameter: 3635 . mpimat - the parallel matrix generated 3636 3637 Level: advanced 3638 3639 Notes: 3640 The dimensions of the sequential matrix in each processor MUST be the same. 3641 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3642 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3643 @*/ 3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3645 { 3646 PetscErrorCode ierr; 3647 MPI_Comm comm=((PetscObject)mpimat)->comm; 3648 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3649 PetscMPIInt size,rank,taga,*len_s; 3650 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3651 PetscInt proc,m; 3652 PetscInt **buf_ri,**buf_rj; 3653 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3654 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3655 MPI_Request *s_waits,*r_waits; 3656 MPI_Status *status; 3657 MatScalar *aa=a->a; 3658 PetscScalar **abuf_r,*ba_i; 3659 Mat_Merge_SeqsToMPI *merge; 3660 PetscContainer container; 3661 3662 PetscFunctionBegin; 3663 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3664 3665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3666 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3667 3668 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3669 if (container) { 3670 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3671 } 3672 bi = merge->bi; 3673 bj = merge->bj; 3674 buf_ri = merge->buf_ri; 3675 buf_rj = merge->buf_rj; 3676 3677 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3678 owners = merge->rowmap.range; 3679 len_s = merge->len_s; 3680 3681 /* send and recv matrix values */ 3682 /*-----------------------------*/ 3683 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3684 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3685 3686 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3687 for (proc=0,k=0; proc<size; proc++){ 3688 if (!len_s[proc]) continue; 3689 i = owners[proc]; 3690 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3691 k++; 3692 } 3693 3694 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3695 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3696 ierr = PetscFree(status);CHKERRQ(ierr); 3697 3698 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3699 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3700 3701 /* insert mat values of mpimat */ 3702 /*----------------------------*/ 3703 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 3704 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3705 nextrow = buf_ri_k + merge->nrecv; 3706 nextai = nextrow + merge->nrecv; 3707 3708 for (k=0; k<merge->nrecv; k++){ 3709 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3710 nrows = *(buf_ri_k[k]); 3711 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3712 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3713 } 3714 3715 /* set values of ba */ 3716 m = merge->rowmap.n; 3717 for (i=0; i<m; i++) { 3718 arow = owners[rank] + i; 3719 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3720 bnzi = bi[i+1] - bi[i]; 3721 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3722 3723 /* add local non-zero vals of this proc's seqmat into ba */ 3724 anzi = ai[arow+1] - ai[arow]; 3725 aj = a->j + ai[arow]; 3726 aa = a->a + ai[arow]; 3727 nextaj = 0; 3728 for (j=0; nextaj<anzi; j++){ 3729 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3730 ba_i[j] += aa[nextaj++]; 3731 } 3732 } 3733 3734 /* add received vals into ba */ 3735 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3736 /* i-th row */ 3737 if (i == *nextrow[k]) { 3738 anzi = *(nextai[k]+1) - *nextai[k]; 3739 aj = buf_rj[k] + *(nextai[k]); 3740 aa = abuf_r[k] + *(nextai[k]); 3741 nextaj = 0; 3742 for (j=0; nextaj<anzi; j++){ 3743 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3744 ba_i[j] += aa[nextaj++]; 3745 } 3746 } 3747 nextrow[k]++; nextai[k]++; 3748 } 3749 } 3750 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3751 } 3752 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 3755 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3756 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3757 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3758 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3764 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3765 { 3766 PetscErrorCode ierr; 3767 Mat B_mpi; 3768 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3769 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3770 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3771 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3772 PetscInt len,proc,*dnz,*onz; 3773 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3774 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3775 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3776 MPI_Status *status; 3777 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3778 PetscBT lnkbt; 3779 Mat_Merge_SeqsToMPI *merge; 3780 PetscContainer container; 3781 3782 PetscFunctionBegin; 3783 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3784 3785 /* make sure it is a PETSc comm */ 3786 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3788 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3789 3790 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3791 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3792 3793 /* determine row ownership */ 3794 /*---------------------------------------------------------*/ 3795 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3796 merge->rowmap.n = m; 3797 merge->rowmap.N = M; 3798 merge->rowmap.bs = 1; 3799 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3800 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3801 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3802 3803 m = merge->rowmap.n; 3804 M = merge->rowmap.N; 3805 owners = merge->rowmap.range; 3806 3807 /* determine the number of messages to send, their lengths */ 3808 /*---------------------------------------------------------*/ 3809 len_s = merge->len_s; 3810 3811 len = 0; /* length of buf_si[] */ 3812 merge->nsend = 0; 3813 for (proc=0; proc<size; proc++){ 3814 len_si[proc] = 0; 3815 if (proc == rank){ 3816 len_s[proc] = 0; 3817 } else { 3818 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3819 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3820 } 3821 if (len_s[proc]) { 3822 merge->nsend++; 3823 nrows = 0; 3824 for (i=owners[proc]; i<owners[proc+1]; i++){ 3825 if (ai[i+1] > ai[i]) nrows++; 3826 } 3827 len_si[proc] = 2*(nrows+1); 3828 len += len_si[proc]; 3829 } 3830 } 3831 3832 /* determine the number and length of messages to receive for ij-structure */ 3833 /*-------------------------------------------------------------------------*/ 3834 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3835 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3836 3837 /* post the Irecv of j-structure */ 3838 /*-------------------------------*/ 3839 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3840 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3841 3842 /* post the Isend of j-structure */ 3843 /*--------------------------------*/ 3844 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3845 sj_waits = si_waits + merge->nsend; 3846 3847 for (proc=0, k=0; proc<size; proc++){ 3848 if (!len_s[proc]) continue; 3849 i = owners[proc]; 3850 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3851 k++; 3852 } 3853 3854 /* receives and sends of j-structure are complete */ 3855 /*------------------------------------------------*/ 3856 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3857 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3858 3859 /* send and recv i-structure */ 3860 /*---------------------------*/ 3861 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3862 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3863 3864 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3865 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3866 for (proc=0,k=0; proc<size; proc++){ 3867 if (!len_s[proc]) continue; 3868 /* form outgoing message for i-structure: 3869 buf_si[0]: nrows to be sent 3870 [1:nrows]: row index (global) 3871 [nrows+1:2*nrows+1]: i-structure index 3872 */ 3873 /*-------------------------------------------*/ 3874 nrows = len_si[proc]/2 - 1; 3875 buf_si_i = buf_si + nrows+1; 3876 buf_si[0] = nrows; 3877 buf_si_i[0] = 0; 3878 nrows = 0; 3879 for (i=owners[proc]; i<owners[proc+1]; i++){ 3880 anzi = ai[i+1] - ai[i]; 3881 if (anzi) { 3882 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3883 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3884 nrows++; 3885 } 3886 } 3887 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3888 k++; 3889 buf_si += len_si[proc]; 3890 } 3891 3892 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3893 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3894 3895 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3896 for (i=0; i<merge->nrecv; i++){ 3897 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3898 } 3899 3900 ierr = PetscFree(len_si);CHKERRQ(ierr); 3901 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3902 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3903 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3904 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3905 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3906 ierr = PetscFree(status);CHKERRQ(ierr); 3907 3908 /* compute a local seq matrix in each processor */ 3909 /*----------------------------------------------*/ 3910 /* allocate bi array and free space for accumulating nonzero column info */ 3911 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3912 bi[0] = 0; 3913 3914 /* create and initialize a linked list */ 3915 nlnk = N+1; 3916 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3917 3918 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3919 len = 0; 3920 len = ai[owners[rank+1]] - ai[owners[rank]]; 3921 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3922 current_space = free_space; 3923 3924 /* determine symbolic info for each local row */ 3925 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3926 nextrow = buf_ri_k + merge->nrecv; 3927 nextai = nextrow + merge->nrecv; 3928 for (k=0; k<merge->nrecv; k++){ 3929 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3930 nrows = *buf_ri_k[k]; 3931 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3932 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3933 } 3934 3935 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3936 len = 0; 3937 for (i=0;i<m;i++) { 3938 bnzi = 0; 3939 /* add local non-zero cols of this proc's seqmat into lnk */ 3940 arow = owners[rank] + i; 3941 anzi = ai[arow+1] - ai[arow]; 3942 aj = a->j + ai[arow]; 3943 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3944 bnzi += nlnk; 3945 /* add received col data into lnk */ 3946 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3947 if (i == *nextrow[k]) { /* i-th row */ 3948 anzi = *(nextai[k]+1) - *nextai[k]; 3949 aj = buf_rj[k] + *nextai[k]; 3950 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3951 bnzi += nlnk; 3952 nextrow[k]++; nextai[k]++; 3953 } 3954 } 3955 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3956 3957 /* if free space is not available, make more free space */ 3958 if (current_space->local_remaining<bnzi) { 3959 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3960 nspacedouble++; 3961 } 3962 /* copy data into free space, then initialize lnk */ 3963 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3964 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3965 3966 current_space->array += bnzi; 3967 current_space->local_used += bnzi; 3968 current_space->local_remaining -= bnzi; 3969 3970 bi[i+1] = bi[i] + bnzi; 3971 } 3972 3973 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3974 3975 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3976 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3977 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3978 3979 /* create symbolic parallel matrix B_mpi */ 3980 /*---------------------------------------*/ 3981 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3982 if (n==PETSC_DECIDE) { 3983 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3984 } else { 3985 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3986 } 3987 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3988 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3989 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3990 3991 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3992 B_mpi->assembled = PETSC_FALSE; 3993 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3994 merge->bi = bi; 3995 merge->bj = bj; 3996 merge->buf_ri = buf_ri; 3997 merge->buf_rj = buf_rj; 3998 merge->coi = PETSC_NULL; 3999 merge->coj = PETSC_NULL; 4000 merge->owners_co = PETSC_NULL; 4001 4002 /* attach the supporting struct to B_mpi for reuse */ 4003 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4004 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4005 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4006 *mpimat = B_mpi; 4007 4008 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4009 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 #undef __FUNCT__ 4014 #define __FUNCT__ "MatMerge_SeqsToMPI" 4015 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4016 { 4017 PetscErrorCode ierr; 4018 4019 PetscFunctionBegin; 4020 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4021 if (scall == MAT_INITIAL_MATRIX){ 4022 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4023 } 4024 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4025 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4026 PetscFunctionReturn(0); 4027 } 4028 4029 #undef __FUNCT__ 4030 #define __FUNCT__ "MatGetLocalMat" 4031 /*@ 4032 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4033 4034 Not Collective 4035 4036 Input Parameters: 4037 + A - the matrix 4038 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4039 4040 Output Parameter: 4041 . A_loc - the local sequential matrix generated 4042 4043 Level: developer 4044 4045 @*/ 4046 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4047 { 4048 PetscErrorCode ierr; 4049 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4050 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4051 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4052 MatScalar *aa=a->a,*ba=b->a,*cam; 4053 PetscScalar *ca; 4054 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 4055 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4056 4057 PetscFunctionBegin; 4058 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4059 if (scall == MAT_INITIAL_MATRIX){ 4060 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4061 ci[0] = 0; 4062 for (i=0; i<am; i++){ 4063 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4064 } 4065 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4066 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4067 k = 0; 4068 for (i=0; i<am; i++) { 4069 ncols_o = bi[i+1] - bi[i]; 4070 ncols_d = ai[i+1] - ai[i]; 4071 /* off-diagonal portion of A */ 4072 for (jo=0; jo<ncols_o; jo++) { 4073 col = cmap[*bj]; 4074 if (col >= cstart) break; 4075 cj[k] = col; bj++; 4076 ca[k++] = *ba++; 4077 } 4078 /* diagonal portion of A */ 4079 for (j=0; j<ncols_d; j++) { 4080 cj[k] = cstart + *aj++; 4081 ca[k++] = *aa++; 4082 } 4083 /* off-diagonal portion of A */ 4084 for (j=jo; j<ncols_o; j++) { 4085 cj[k] = cmap[*bj++]; 4086 ca[k++] = *ba++; 4087 } 4088 } 4089 /* put together the new matrix */ 4090 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4091 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4092 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4093 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4094 mat->free_a = PETSC_TRUE; 4095 mat->free_ij = PETSC_TRUE; 4096 mat->nonew = 0; 4097 } else if (scall == MAT_REUSE_MATRIX){ 4098 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4099 ci = mat->i; cj = mat->j; cam = mat->a; 4100 for (i=0; i<am; i++) { 4101 /* off-diagonal portion of A */ 4102 ncols_o = bi[i+1] - bi[i]; 4103 for (jo=0; jo<ncols_o; jo++) { 4104 col = cmap[*bj]; 4105 if (col >= cstart) break; 4106 *cam++ = *ba++; bj++; 4107 } 4108 /* diagonal portion of A */ 4109 ncols_d = ai[i+1] - ai[i]; 4110 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4111 /* off-diagonal portion of A */ 4112 for (j=jo; j<ncols_o; j++) { 4113 *cam++ = *ba++; bj++; 4114 } 4115 } 4116 } else { 4117 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4118 } 4119 4120 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4121 PetscFunctionReturn(0); 4122 } 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatGetLocalMatCondensed" 4126 /*@C 4127 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4128 4129 Not Collective 4130 4131 Input Parameters: 4132 + A - the matrix 4133 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4134 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4135 4136 Output Parameter: 4137 . A_loc - the local sequential matrix generated 4138 4139 Level: developer 4140 4141 @*/ 4142 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4143 { 4144 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4145 PetscErrorCode ierr; 4146 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4147 IS isrowa,iscola; 4148 Mat *aloc; 4149 4150 PetscFunctionBegin; 4151 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4152 if (!row){ 4153 start = A->rmap.rstart; end = A->rmap.rend; 4154 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4155 } else { 4156 isrowa = *row; 4157 } 4158 if (!col){ 4159 start = A->cmap.rstart; 4160 cmap = a->garray; 4161 nzA = a->A->cmap.n; 4162 nzB = a->B->cmap.n; 4163 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4164 ncols = 0; 4165 for (i=0; i<nzB; i++) { 4166 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4167 else break; 4168 } 4169 imark = i; 4170 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4171 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4172 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4173 ierr = PetscFree(idx);CHKERRQ(ierr); 4174 } else { 4175 iscola = *col; 4176 } 4177 if (scall != MAT_INITIAL_MATRIX){ 4178 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4179 aloc[0] = *A_loc; 4180 } 4181 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4182 *A_loc = aloc[0]; 4183 ierr = PetscFree(aloc);CHKERRQ(ierr); 4184 if (!row){ 4185 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4186 } 4187 if (!col){ 4188 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4189 } 4190 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4191 PetscFunctionReturn(0); 4192 } 4193 4194 #undef __FUNCT__ 4195 #define __FUNCT__ "MatGetBrowsOfAcols" 4196 /*@C 4197 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4198 4199 Collective on Mat 4200 4201 Input Parameters: 4202 + A,B - the matrices in mpiaij format 4203 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4204 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4205 4206 Output Parameter: 4207 + rowb, colb - index sets of rows and columns of B to extract 4208 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4209 - B_seq - the sequential matrix generated 4210 4211 Level: developer 4212 4213 @*/ 4214 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4215 { 4216 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4217 PetscErrorCode ierr; 4218 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4219 IS isrowb,iscolb; 4220 Mat *bseq; 4221 4222 PetscFunctionBegin; 4223 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4224 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4225 } 4226 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4227 4228 if (scall == MAT_INITIAL_MATRIX){ 4229 start = A->cmap.rstart; 4230 cmap = a->garray; 4231 nzA = a->A->cmap.n; 4232 nzB = a->B->cmap.n; 4233 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4234 ncols = 0; 4235 for (i=0; i<nzB; i++) { /* row < local row index */ 4236 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4237 else break; 4238 } 4239 imark = i; 4240 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4241 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4242 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4243 ierr = PetscFree(idx);CHKERRQ(ierr); 4244 *brstart = imark; 4245 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4246 } else { 4247 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4248 isrowb = *rowb; iscolb = *colb; 4249 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4250 bseq[0] = *B_seq; 4251 } 4252 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4253 *B_seq = bseq[0]; 4254 ierr = PetscFree(bseq);CHKERRQ(ierr); 4255 if (!rowb){ 4256 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4257 } else { 4258 *rowb = isrowb; 4259 } 4260 if (!colb){ 4261 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4262 } else { 4263 *colb = iscolb; 4264 } 4265 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4266 PetscFunctionReturn(0); 4267 } 4268 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "MatGetBrowsOfAoCols" 4271 /*@C 4272 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4273 of the OFF-DIAGONAL portion of local A 4274 4275 Collective on Mat 4276 4277 Input Parameters: 4278 + A,B - the matrices in mpiaij format 4279 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4280 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4281 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4282 4283 Output Parameter: 4284 + B_oth - the sequential matrix generated 4285 4286 Level: developer 4287 4288 @*/ 4289 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4290 { 4291 VecScatter_MPI_General *gen_to,*gen_from; 4292 PetscErrorCode ierr; 4293 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4294 Mat_SeqAIJ *b_oth; 4295 VecScatter ctx=a->Mvctx; 4296 MPI_Comm comm=((PetscObject)ctx)->comm; 4297 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4298 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4299 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4300 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4301 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4302 MPI_Status *sstatus,rstatus; 4303 PetscMPIInt jj; 4304 PetscInt *cols,sbs,rbs; 4305 PetscScalar *vals; 4306 4307 PetscFunctionBegin; 4308 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4309 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4310 } 4311 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4312 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4313 4314 gen_to = (VecScatter_MPI_General*)ctx->todata; 4315 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4316 rvalues = gen_from->values; /* holds the length of receiving row */ 4317 svalues = gen_to->values; /* holds the length of sending row */ 4318 nrecvs = gen_from->n; 4319 nsends = gen_to->n; 4320 4321 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4322 srow = gen_to->indices; /* local row index to be sent */ 4323 sstarts = gen_to->starts; 4324 sprocs = gen_to->procs; 4325 sstatus = gen_to->sstatus; 4326 sbs = gen_to->bs; 4327 rstarts = gen_from->starts; 4328 rprocs = gen_from->procs; 4329 rbs = gen_from->bs; 4330 4331 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4332 if (scall == MAT_INITIAL_MATRIX){ 4333 /* i-array */ 4334 /*---------*/ 4335 /* post receives */ 4336 for (i=0; i<nrecvs; i++){ 4337 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4338 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4339 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4340 } 4341 4342 /* pack the outgoing message */ 4343 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4344 rstartsj = sstartsj + nsends +1; 4345 sstartsj[0] = 0; rstartsj[0] = 0; 4346 len = 0; /* total length of j or a array to be sent */ 4347 k = 0; 4348 for (i=0; i<nsends; i++){ 4349 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4350 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4351 for (j=0; j<nrows; j++) { 4352 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4353 for (l=0; l<sbs; l++){ 4354 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4355 rowlen[j*sbs+l] = ncols; 4356 len += ncols; 4357 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4358 } 4359 k++; 4360 } 4361 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4362 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4363 } 4364 /* recvs and sends of i-array are completed */ 4365 i = nrecvs; 4366 while (i--) { 4367 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4368 } 4369 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4370 4371 /* allocate buffers for sending j and a arrays */ 4372 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4373 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4374 4375 /* create i-array of B_oth */ 4376 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4377 b_othi[0] = 0; 4378 len = 0; /* total length of j or a array to be received */ 4379 k = 0; 4380 for (i=0; i<nrecvs; i++){ 4381 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4382 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4383 for (j=0; j<nrows; j++) { 4384 b_othi[k+1] = b_othi[k] + rowlen[j]; 4385 len += rowlen[j]; k++; 4386 } 4387 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4388 } 4389 4390 /* allocate space for j and a arrrays of B_oth */ 4391 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4392 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4393 4394 /* j-array */ 4395 /*---------*/ 4396 /* post receives of j-array */ 4397 for (i=0; i<nrecvs; i++){ 4398 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4399 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4400 } 4401 4402 /* pack the outgoing message j-array */ 4403 k = 0; 4404 for (i=0; i<nsends; i++){ 4405 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4406 bufJ = bufj+sstartsj[i]; 4407 for (j=0; j<nrows; j++) { 4408 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4409 for (ll=0; ll<sbs; ll++){ 4410 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4411 for (l=0; l<ncols; l++){ 4412 *bufJ++ = cols[l]; 4413 } 4414 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4415 } 4416 } 4417 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4418 } 4419 4420 /* recvs and sends of j-array are completed */ 4421 i = nrecvs; 4422 while (i--) { 4423 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4424 } 4425 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4426 } else if (scall == MAT_REUSE_MATRIX){ 4427 sstartsj = *startsj; 4428 rstartsj = sstartsj + nsends +1; 4429 bufa = *bufa_ptr; 4430 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4431 b_otha = b_oth->a; 4432 } else { 4433 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4434 } 4435 4436 /* a-array */ 4437 /*---------*/ 4438 /* post receives of a-array */ 4439 for (i=0; i<nrecvs; i++){ 4440 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4441 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4442 } 4443 4444 /* pack the outgoing message a-array */ 4445 k = 0; 4446 for (i=0; i<nsends; i++){ 4447 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4448 bufA = bufa+sstartsj[i]; 4449 for (j=0; j<nrows; j++) { 4450 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4451 for (ll=0; ll<sbs; ll++){ 4452 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4453 for (l=0; l<ncols; l++){ 4454 *bufA++ = vals[l]; 4455 } 4456 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4457 } 4458 } 4459 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4460 } 4461 /* recvs and sends of a-array are completed */ 4462 i = nrecvs; 4463 while (i--) { 4464 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4465 } 4466 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4467 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4468 4469 if (scall == MAT_INITIAL_MATRIX){ 4470 /* put together the new matrix */ 4471 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4472 4473 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4474 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4475 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4476 b_oth->free_a = PETSC_TRUE; 4477 b_oth->free_ij = PETSC_TRUE; 4478 b_oth->nonew = 0; 4479 4480 ierr = PetscFree(bufj);CHKERRQ(ierr); 4481 if (!startsj || !bufa_ptr){ 4482 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4483 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4484 } else { 4485 *startsj = sstartsj; 4486 *bufa_ptr = bufa; 4487 } 4488 } 4489 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "MatGetCommunicationStructs" 4495 /*@C 4496 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4497 4498 Not Collective 4499 4500 Input Parameters: 4501 . A - The matrix in mpiaij format 4502 4503 Output Parameter: 4504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4505 . colmap - A map from global column index to local index into lvec 4506 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4507 4508 Level: developer 4509 4510 @*/ 4511 #if defined (PETSC_USE_CTABLE) 4512 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4513 #else 4514 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4515 #endif 4516 { 4517 Mat_MPIAIJ *a; 4518 4519 PetscFunctionBegin; 4520 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4521 PetscValidPointer(lvec, 2) 4522 PetscValidPointer(colmap, 3) 4523 PetscValidPointer(multScatter, 4) 4524 a = (Mat_MPIAIJ *) A->data; 4525 if (lvec) *lvec = a->lvec; 4526 if (colmap) *colmap = a->colmap; 4527 if (multScatter) *multScatter = a->Mvctx; 4528 PetscFunctionReturn(0); 4529 } 4530 4531 EXTERN_C_BEGIN 4532 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4533 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4534 EXTERN_C_END 4535 4536 /*MC 4537 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4538 4539 Options Database Keys: 4540 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4541 4542 Level: beginner 4543 4544 .seealso: MatCreateMPIAIJ() 4545 M*/ 4546 4547 EXTERN_C_BEGIN 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "MatCreate_MPIAIJ" 4550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4551 { 4552 Mat_MPIAIJ *b; 4553 PetscErrorCode ierr; 4554 PetscMPIInt size; 4555 4556 PetscFunctionBegin; 4557 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 4558 4559 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4560 B->data = (void*)b; 4561 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4562 B->factor = 0; 4563 B->rmap.bs = 1; 4564 B->assembled = PETSC_FALSE; 4565 B->mapping = 0; 4566 4567 B->insertmode = NOT_SET_VALUES; 4568 b->size = size; 4569 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 4570 4571 /* build cache for off array entries formed */ 4572 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 4573 b->donotstash = PETSC_FALSE; 4574 b->colmap = 0; 4575 b->garray = 0; 4576 b->roworiented = PETSC_TRUE; 4577 4578 /* stuff used for matrix vector multiply */ 4579 b->lvec = PETSC_NULL; 4580 b->Mvctx = PETSC_NULL; 4581 4582 /* stuff for MatGetRow() */ 4583 b->rowindices = 0; 4584 b->rowvalues = 0; 4585 b->getrowactive = PETSC_FALSE; 4586 4587 4588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4589 "MatStoreValues_MPIAIJ", 4590 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4592 "MatRetrieveValues_MPIAIJ", 4593 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4595 "MatGetDiagonalBlock_MPIAIJ", 4596 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4598 "MatIsTranspose_MPIAIJ", 4599 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4601 "MatMPIAIJSetPreallocation_MPIAIJ", 4602 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4604 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4605 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4607 "MatDiagonalScaleLocal_MPIAIJ", 4608 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4610 "MatConvert_MPIAIJ_MPICSRPERM", 4611 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4613 "MatConvert_MPIAIJ_MPICRL", 4614 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4615 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4616 PetscFunctionReturn(0); 4617 } 4618 EXTERN_C_END 4619 4620 #undef __FUNCT__ 4621 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4622 /*@ 4623 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4624 and "off-diagonal" part of the matrix in CSR format. 4625 4626 Collective on MPI_Comm 4627 4628 Input Parameters: 4629 + comm - MPI communicator 4630 . m - number of local rows (Cannot be PETSC_DECIDE) 4631 . n - This value should be the same as the local size used in creating the 4632 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4633 calculated if N is given) For square matrices n is almost always m. 4634 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4635 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4636 . i - row indices for "diagonal" portion of matrix 4637 . j - column indices 4638 . a - matrix values 4639 . oi - row indices for "off-diagonal" portion of matrix 4640 . oj - column indices 4641 - oa - matrix values 4642 4643 Output Parameter: 4644 . mat - the matrix 4645 4646 Level: advanced 4647 4648 Notes: 4649 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4650 4651 The i and j indices are 0 based 4652 4653 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4654 4655 4656 .keywords: matrix, aij, compressed row, sparse, parallel 4657 4658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4659 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4660 @*/ 4661 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4662 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4663 { 4664 PetscErrorCode ierr; 4665 Mat_MPIAIJ *maij; 4666 4667 PetscFunctionBegin; 4668 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4669 if (i[0]) { 4670 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4671 } 4672 if (oi[0]) { 4673 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4674 } 4675 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4676 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4677 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4678 maij = (Mat_MPIAIJ*) (*mat)->data; 4679 maij->donotstash = PETSC_TRUE; 4680 (*mat)->preallocated = PETSC_TRUE; 4681 4682 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4683 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4684 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4685 4686 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4687 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4688 4689 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4690 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4692 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4693 4694 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4695 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4696 PetscFunctionReturn(0); 4697 } 4698 4699 /* 4700 Special version for direct calls from Fortran 4701 */ 4702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4706 #endif 4707 4708 /* Change these macros so can be used in void function */ 4709 #undef CHKERRQ 4710 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4711 #undef SETERRQ2 4712 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4713 #undef SETERRQ 4714 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 4715 4716 EXTERN_C_BEGIN 4717 #undef __FUNCT__ 4718 #define __FUNCT__ "matsetvaluesmpiaij_" 4719 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4720 { 4721 Mat mat = *mmat; 4722 PetscInt m = *mm, n = *mn; 4723 InsertMode addv = *maddv; 4724 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4725 PetscScalar value; 4726 PetscErrorCode ierr; 4727 4728 MatPreallocated(mat); 4729 if (mat->insertmode == NOT_SET_VALUES) { 4730 mat->insertmode = addv; 4731 } 4732 #if defined(PETSC_USE_DEBUG) 4733 else if (mat->insertmode != addv) { 4734 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4735 } 4736 #endif 4737 { 4738 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4739 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4740 PetscTruth roworiented = aij->roworiented; 4741 4742 /* Some Variables required in the macro */ 4743 Mat A = aij->A; 4744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4745 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4746 PetscScalar *aa = a->a; 4747 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4748 Mat B = aij->B; 4749 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4750 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4751 PetscScalar *ba = b->a; 4752 4753 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4754 PetscInt nonew = a->nonew; 4755 PetscScalar *ap1,*ap2; 4756 4757 PetscFunctionBegin; 4758 for (i=0; i<m; i++) { 4759 if (im[i] < 0) continue; 4760 #if defined(PETSC_USE_DEBUG) 4761 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4762 #endif 4763 if (im[i] >= rstart && im[i] < rend) { 4764 row = im[i] - rstart; 4765 lastcol1 = -1; 4766 rp1 = aj + ai[row]; 4767 ap1 = aa + ai[row]; 4768 rmax1 = aimax[row]; 4769 nrow1 = ailen[row]; 4770 low1 = 0; 4771 high1 = nrow1; 4772 lastcol2 = -1; 4773 rp2 = bj + bi[row]; 4774 ap2 = ba + bi[row]; 4775 rmax2 = bimax[row]; 4776 nrow2 = bilen[row]; 4777 low2 = 0; 4778 high2 = nrow2; 4779 4780 for (j=0; j<n; j++) { 4781 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4782 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4783 if (in[j] >= cstart && in[j] < cend){ 4784 col = in[j] - cstart; 4785 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4786 } else if (in[j] < 0) continue; 4787 #if defined(PETSC_USE_DEBUG) 4788 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4789 #endif 4790 else { 4791 if (mat->was_assembled) { 4792 if (!aij->colmap) { 4793 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4794 } 4795 #if defined (PETSC_USE_CTABLE) 4796 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4797 col--; 4798 #else 4799 col = aij->colmap[in[j]] - 1; 4800 #endif 4801 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4802 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4803 col = in[j]; 4804 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4805 B = aij->B; 4806 b = (Mat_SeqAIJ*)B->data; 4807 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4808 rp2 = bj + bi[row]; 4809 ap2 = ba + bi[row]; 4810 rmax2 = bimax[row]; 4811 nrow2 = bilen[row]; 4812 low2 = 0; 4813 high2 = nrow2; 4814 bm = aij->B->rmap.n; 4815 ba = b->a; 4816 } 4817 } else col = in[j]; 4818 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4819 } 4820 } 4821 } else { 4822 if (!aij->donotstash) { 4823 if (roworiented) { 4824 if (ignorezeroentries && v[i*n] == 0.0) continue; 4825 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4826 } else { 4827 if (ignorezeroentries && v[i] == 0.0) continue; 4828 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4829 } 4830 } 4831 } 4832 }} 4833 PetscFunctionReturnVoid(); 4834 } 4835 EXTERN_C_END 4836 4837