1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij);CHKERRQ(ierr); 727 728 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRelax_MPIAIJ" 1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1017 PetscErrorCode ierr; 1018 Vec bb1; 1019 1020 PetscFunctionBegin; 1021 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1022 1023 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1024 1025 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1026 if (flag & SOR_ZERO_INITIAL_GUESS) { 1027 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1028 its--; 1029 } 1030 1031 while (its--) { 1032 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1034 1035 /* update rhs: bb1 = bb - B*x */ 1036 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1037 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1038 1039 /* local sweep */ 1040 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1041 CHKERRQ(ierr); 1042 } 1043 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1079 } 1080 1081 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatPermute_MPIAIJ" 1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1088 { 1089 MPI_Comm comm,pcomm; 1090 PetscInt first,local_size,nrows,*rows; 1091 int ntids; 1092 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1097 /* make a collective version of 'rowp' */ 1098 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1099 if (pcomm==comm) { 1100 crowp = rowp; 1101 } else { 1102 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1103 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1105 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1106 } 1107 /* collect the global row permutation and invert it */ 1108 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1109 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1110 if (pcomm!=comm) { 1111 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1112 } 1113 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1114 /* get the local target indices */ 1115 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1116 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1121 /* the column permutation is so much easier; 1122 make a local version of 'colp' and invert it */ 1123 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1125 if (ntids==1) { 1126 lcolp = colp; 1127 } else { 1128 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1129 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1131 } 1132 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1133 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1134 if (ntids>1) { 1135 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1137 } 1138 /* now we just get the submatrix */ 1139 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1140 /* clean up */ 1141 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1142 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1151 Mat A = mat->A,B = mat->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = 1.0; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 info->rows_global = (double)matin->rmap.N; 1188 info->columns_global = (double)matin->cmap.N; 1189 info->rows_local = (double)matin->rmap.n; 1190 info->columns_local = (double)matin->cmap.N; 1191 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSetOption_MPIAIJ" 1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NO_NEW_NONZERO_LOCATIONS: 1205 case MAT_YES_NEW_NONZERO_LOCATIONS: 1206 case MAT_COLUMNS_UNSORTED: 1207 case MAT_COLUMNS_SORTED: 1208 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1209 case MAT_KEEP_ZEROED_ROWS: 1210 case MAT_NEW_NONZERO_LOCATION_ERR: 1211 case MAT_USE_INODES: 1212 case MAT_DO_NOT_USE_INODES: 1213 case MAT_IGNORE_ZERO_ENTRIES: 1214 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1216 break; 1217 case MAT_ROW_ORIENTED: 1218 a->roworiented = PETSC_TRUE; 1219 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1220 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1221 break; 1222 case MAT_ROWS_SORTED: 1223 case MAT_ROWS_UNSORTED: 1224 case MAT_YES_NEW_DIAGONALS: 1225 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1226 break; 1227 case MAT_COLUMN_ORIENTED: 1228 a->roworiented = PETSC_FALSE; 1229 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1230 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1231 break; 1232 case MAT_IGNORE_OFF_PROC_ENTRIES: 1233 a->donotstash = PETSC_TRUE; 1234 break; 1235 case MAT_NO_NEW_DIAGONALS: 1236 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1237 case MAT_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1239 break; 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 case MAT_HERMITIAN: 1242 case MAT_SYMMETRY_ETERNAL: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 break; 1245 case MAT_NOT_SYMMETRIC: 1246 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1247 case MAT_NOT_HERMITIAN: 1248 case MAT_NOT_SYMMETRY_ETERNAL: 1249 break; 1250 default: 1251 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRow_MPIAIJ" 1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1264 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1265 PetscInt *cmap,*idx_p; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1276 PetscInt max = 1,tmp; 1277 for (i=0; i<matin->rmap.n; i++) { 1278 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1279 if (max < tmp) { max = tmp; } 1280 } 1281 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1282 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1283 } 1284 1285 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1286 lrow = row - rstart; 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]]; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!aij->getrowactive) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1346 } 1347 aij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatNorm_MPIAIJ" 1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1354 { 1355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1356 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,cstart = mat->cmap.rstart; 1359 PetscReal sum = 0.0; 1360 PetscScalar *v; 1361 1362 PetscFunctionBegin; 1363 if (aij->size == 1) { 1364 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1365 } else { 1366 if (type == NORM_FROBENIUS) { 1367 v = amat->a; 1368 for (i=0; i<amat->nz; i++) { 1369 #if defined(PETSC_USE_COMPLEX) 1370 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 v = bmat->a; 1376 for (i=0; i<bmat->nz; i++) { 1377 #if defined(PETSC_USE_COMPLEX) 1378 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1379 #else 1380 sum += (*v)*(*v); v++; 1381 #endif 1382 } 1383 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1384 *norm = sqrt(*norm); 1385 } else if (type == NORM_1) { /* max column norm */ 1386 PetscReal *tmp,*tmp2; 1387 PetscInt *jj,*garray = aij->garray; 1388 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1390 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1391 *norm = 0.0; 1392 v = amat->a; jj = amat->j; 1393 for (j=0; j<amat->nz; j++) { 1394 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a; jj = bmat->j; 1397 for (j=0; j<bmat->nz; j++) { 1398 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1399 } 1400 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1401 for (j=0; j<mat->cmap.N; j++) { 1402 if (tmp2[j] > *norm) *norm = tmp2[j]; 1403 } 1404 ierr = PetscFree(tmp);CHKERRQ(ierr); 1405 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1406 } else if (type == NORM_INFINITY) { /* max row norm */ 1407 PetscReal ntemp = 0.0; 1408 for (j=0; j<aij->A->rmap.n; j++) { 1409 v = amat->a + amat->i[j]; 1410 sum = 0.0; 1411 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1412 sum += PetscAbsScalar(*v); v++; 1413 } 1414 v = bmat->a + bmat->i[j]; 1415 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 if (sum > ntemp) ntemp = sum; 1419 } 1420 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1421 } else { 1422 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1423 } 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatTranspose_MPIAIJ" 1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1433 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1434 PetscErrorCode ierr; 1435 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1436 PetscInt cstart=A->cmap.rstart,ncol; 1437 Mat B; 1438 PetscScalar *array; 1439 1440 PetscFunctionBegin; 1441 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 1443 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1444 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1445 ai = Aloc->i; aj = Aloc->j; 1446 bi = Bloc->i; bj = Bloc->j; 1447 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1448 cols = d_nnz + na + 1; /* work space to be used by B part */ 1449 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1450 for (i=0; i<ai[ma]; i++){ 1451 d_nnz[aj[i]] ++; 1452 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1453 } 1454 1455 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1456 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1457 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1458 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1459 1460 /* copy over the A part */ 1461 array = Aloc->a; 1462 row = A->rmap.rstart; 1463 for (i=0; i<ma; i++) { 1464 ncol = ai[i+1]-ai[i]; 1465 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1466 row++; array += ncol; aj += ncol; 1467 } 1468 aj = Aloc->j; 1469 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1470 1471 /* copy over the B part */ 1472 array = Bloc->a; 1473 row = A->rmap.rstart; 1474 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1475 for (i=0; i<mb; i++) { 1476 ncol = bi[i+1]-bi[i]; 1477 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1478 row++; array += ncol; cols += ncol; 1479 } 1480 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1481 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1482 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1483 if (matout) { 1484 *matout = B; 1485 } else { 1486 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1493 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1494 { 1495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1496 Mat a = aij->A,b = aij->B; 1497 PetscErrorCode ierr; 1498 PetscInt s1,s2,s3; 1499 1500 PetscFunctionBegin; 1501 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1502 if (rr) { 1503 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1504 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1505 /* Overlap communication with computation. */ 1506 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1507 } 1508 if (ll) { 1509 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1510 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1511 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1512 } 1513 /* scale the diagonal block */ 1514 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1515 1516 if (rr) { 1517 /* Do a scatter end and then right scale the off-diagonal block */ 1518 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1519 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1520 } 1521 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1527 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1528 { 1529 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1534 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1539 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1540 { 1541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatEqual_MPIAIJ" 1551 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1552 { 1553 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1554 Mat a,b,c,d; 1555 PetscTruth flg; 1556 PetscErrorCode ierr; 1557 1558 PetscFunctionBegin; 1559 a = matA->A; b = matA->B; 1560 c = matB->A; d = matB->B; 1561 1562 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1563 if (flg) { 1564 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1565 } 1566 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatCopy_MPIAIJ" 1572 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1573 { 1574 PetscErrorCode ierr; 1575 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1576 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1577 1578 PetscFunctionBegin; 1579 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1580 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1581 /* because of the column compression in the off-processor part of the matrix a->B, 1582 the number of columns in a->B and b->B may be different, hence we cannot call 1583 the MatCopy() directly on the two parts. If need be, we can provide a more 1584 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1585 then copying the submatrices */ 1586 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1587 } else { 1588 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1589 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1596 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1597 { 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #include "petscblaslapack.h" 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatAXPY_MPIAIJ" 1608 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1609 { 1610 PetscErrorCode ierr; 1611 PetscInt i; 1612 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1613 PetscBLASInt bnz,one=1; 1614 Mat_SeqAIJ *x,*y; 1615 1616 PetscFunctionBegin; 1617 if (str == SAME_NONZERO_PATTERN) { 1618 PetscScalar alpha = a; 1619 x = (Mat_SeqAIJ *)xx->A->data; 1620 y = (Mat_SeqAIJ *)yy->A->data; 1621 bnz = (PetscBLASInt)x->nz; 1622 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1623 x = (Mat_SeqAIJ *)xx->B->data; 1624 y = (Mat_SeqAIJ *)yy->B->data; 1625 bnz = (PetscBLASInt)x->nz; 1626 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1627 } else if (str == SUBSET_NONZERO_PATTERN) { 1628 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1629 1630 x = (Mat_SeqAIJ *)xx->B->data; 1631 y = (Mat_SeqAIJ *)yy->B->data; 1632 if (y->xtoy && y->XtoY != xx->B) { 1633 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1634 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1635 } 1636 if (!y->xtoy) { /* get xtoy */ 1637 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1638 y->XtoY = xx->B; 1639 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1640 } 1641 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1642 } else { 1643 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatConjugate_MPIAIJ" 1652 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1653 { 1654 #if defined(PETSC_USE_COMPLEX) 1655 PetscErrorCode ierr; 1656 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1657 1658 PetscFunctionBegin; 1659 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1660 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1661 #else 1662 PetscFunctionBegin; 1663 #endif 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatRealPart_MPIAIJ" 1669 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1670 { 1671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1676 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1682 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1683 { 1684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1685 PetscErrorCode ierr; 1686 1687 PetscFunctionBegin; 1688 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1689 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #ifdef PETSC_HAVE_PBGL 1694 1695 #include <boost/parallel/mpi/bsp_process_group.hpp> 1696 #include <boost/graph/distributed/ilu_default_graph.hpp> 1697 #include <boost/graph/distributed/ilu_0_block.hpp> 1698 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1699 #include <boost/graph/distributed/petsc/interface.hpp> 1700 #include <boost/multi_array.hpp> 1701 #include <boost/parallel/distributed_property_map.hpp> 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1705 /* 1706 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1707 */ 1708 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1709 { 1710 namespace petsc = boost::distributed::petsc; 1711 1712 namespace graph_dist = boost::graph::distributed; 1713 using boost::graph::distributed::ilu_default::process_group_type; 1714 using boost::graph::ilu_permuted; 1715 1716 PetscTruth row_identity, col_identity; 1717 PetscContainer c; 1718 PetscInt m, n, M, N; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1723 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1724 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1725 if (!row_identity || !col_identity) { 1726 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1727 } 1728 1729 process_group_type pg; 1730 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1731 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1732 lgraph_type& level_graph = *lgraph_p; 1733 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1734 1735 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1736 ilu_permuted(level_graph); 1737 1738 /* put together the new matrix */ 1739 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1740 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1741 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1742 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1743 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1744 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1745 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1746 (*fact)->factor = FACTOR_LU; 1747 1748 ierr = PetscContainerCreate(A->comm, &c); 1749 ierr = PetscContainerSetPointer(c, lgraph_p); 1750 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1756 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1757 { 1758 PetscFunctionBegin; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatSolve_MPIAIJ" 1764 /* 1765 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1766 */ 1767 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1768 { 1769 namespace graph_dist = boost::graph::distributed; 1770 1771 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1772 lgraph_type* lgraph_p; 1773 PetscContainer c; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1778 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1779 ierr = VecCopy(b, x); CHKERRQ(ierr); 1780 1781 PetscScalar* array_x; 1782 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1783 PetscInt sx; 1784 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1785 1786 PetscScalar* array_b; 1787 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1788 PetscInt sb; 1789 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1790 1791 lgraph_type& level_graph = *lgraph_p; 1792 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1793 1794 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1795 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1796 ref_x(array_x, boost::extents[num_vertices(graph)]); 1797 1798 typedef boost::iterator_property_map<array_ref_type::iterator, 1799 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1800 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1801 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1802 1803 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1804 1805 PetscFunctionReturn(0); 1806 } 1807 #endif 1808 1809 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1810 PetscInt nzlocal,nsends,nrecvs; 1811 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1812 PetscScalar *sbuf_a,**rbuf_a; 1813 PetscErrorCode (*MatDestroy)(Mat); 1814 } Mat_Redundant; 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1818 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1819 { 1820 PetscErrorCode ierr; 1821 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1822 PetscInt i; 1823 1824 PetscFunctionBegin; 1825 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1826 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1827 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1828 for (i=0; i<redund->nrecvs; i++){ 1829 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1830 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1831 } 1832 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1833 ierr = PetscFree(redund);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatDestroy_MatRedundant" 1839 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1840 { 1841 PetscErrorCode ierr; 1842 PetscContainer container; 1843 Mat_Redundant *redund=PETSC_NULL; 1844 1845 PetscFunctionBegin; 1846 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1847 if (container) { 1848 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1849 } else { 1850 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1851 } 1852 A->ops->destroy = redund->MatDestroy; 1853 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1854 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1855 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1861 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1862 { 1863 PetscMPIInt rank,size; 1864 MPI_Comm comm=mat->comm; 1865 PetscErrorCode ierr; 1866 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1867 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1868 PetscInt *rowrange=mat->rmap.range; 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1870 Mat A=aij->A,B=aij->B,C=*matredundant; 1871 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1872 PetscScalar *sbuf_a; 1873 PetscInt nzlocal=a->nz+b->nz; 1874 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1875 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1876 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1877 PetscScalar *vals,*aworkA,*aworkB; 1878 PetscMPIInt tag1,tag2,tag3,imdex; 1879 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1880 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1881 MPI_Status recv_status,*send_status; 1882 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1883 PetscInt **rbuf_j=PETSC_NULL; 1884 PetscScalar **rbuf_a=PETSC_NULL; 1885 Mat_Redundant *redund=PETSC_NULL; 1886 PetscContainer container; 1887 1888 PetscFunctionBegin; 1889 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1890 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1891 1892 if (reuse == MAT_REUSE_MATRIX) { 1893 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1894 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1895 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1896 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1897 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1898 if (container) { 1899 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1900 } else { 1901 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1902 } 1903 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1904 1905 nsends = redund->nsends; 1906 nrecvs = redund->nrecvs; 1907 send_rank = redund->send_rank; recv_rank = send_rank + size; 1908 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1909 sbuf_j = redund->sbuf_j; 1910 sbuf_a = redund->sbuf_a; 1911 rbuf_j = redund->rbuf_j; 1912 rbuf_a = redund->rbuf_a; 1913 } 1914 1915 if (reuse == MAT_INITIAL_MATRIX){ 1916 PetscMPIInt subrank,subsize; 1917 PetscInt nleftover,np_subcomm; 1918 /* get the destination processors' id send_rank, nsends and nrecvs */ 1919 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1920 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1921 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1922 recv_rank = send_rank + size; 1923 np_subcomm = size/nsubcomm; 1924 nleftover = size - nsubcomm*np_subcomm; 1925 nsends = 0; nrecvs = 0; 1926 for (i=0; i<size; i++){ /* i=rank*/ 1927 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1928 send_rank[nsends] = i; nsends++; 1929 recv_rank[nrecvs++] = i; 1930 } 1931 } 1932 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1933 i = size-nleftover-1; 1934 j = 0; 1935 while (j < nsubcomm - nleftover){ 1936 send_rank[nsends++] = i; 1937 i--; j++; 1938 } 1939 } 1940 1941 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1942 for (i=0; i<nleftover; i++){ 1943 recv_rank[nrecvs++] = size-nleftover+i; 1944 } 1945 } 1946 1947 /* allocate sbuf_j, sbuf_a */ 1948 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1949 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1950 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1951 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1952 1953 /* copy mat's local entries into the buffers */ 1954 if (reuse == MAT_INITIAL_MATRIX){ 1955 rownz_max = 0; 1956 rptr = sbuf_j; 1957 cols = sbuf_j + rend-rstart + 1; 1958 vals = sbuf_a; 1959 rptr[0] = 0; 1960 for (i=0; i<rend-rstart; i++){ 1961 row = i + rstart; 1962 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1963 ncols = nzA + nzB; 1964 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1965 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1966 /* load the column indices for this row into cols */ 1967 lwrite = 0; 1968 for (l=0; l<nzB; l++) { 1969 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1970 vals[lwrite] = aworkB[l]; 1971 cols[lwrite++] = ctmp; 1972 } 1973 } 1974 for (l=0; l<nzA; l++){ 1975 vals[lwrite] = aworkA[l]; 1976 cols[lwrite++] = cstart + cworkA[l]; 1977 } 1978 for (l=0; l<nzB; l++) { 1979 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1980 vals[lwrite] = aworkB[l]; 1981 cols[lwrite++] = ctmp; 1982 } 1983 } 1984 vals += ncols; 1985 cols += ncols; 1986 rptr[i+1] = rptr[i] + ncols; 1987 if (rownz_max < ncols) rownz_max = ncols; 1988 } 1989 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1990 } else { /* only copy matrix values into sbuf_a */ 1991 rptr = sbuf_j; 1992 vals = sbuf_a; 1993 rptr[0] = 0; 1994 for (i=0; i<rend-rstart; i++){ 1995 row = i + rstart; 1996 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1997 ncols = nzA + nzB; 1998 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1999 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2000 lwrite = 0; 2001 for (l=0; l<nzB; l++) { 2002 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2003 } 2004 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2005 for (l=0; l<nzB; l++) { 2006 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2007 } 2008 vals += ncols; 2009 rptr[i+1] = rptr[i] + ncols; 2010 } 2011 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2012 2013 /* send nzlocal to others, and recv other's nzlocal */ 2014 /*--------------------------------------------------*/ 2015 if (reuse == MAT_INITIAL_MATRIX){ 2016 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2017 s_waits2 = s_waits3 + nsends; 2018 s_waits1 = s_waits2 + nsends; 2019 r_waits1 = s_waits1 + nsends; 2020 r_waits2 = r_waits1 + nrecvs; 2021 r_waits3 = r_waits2 + nrecvs; 2022 } else { 2023 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2024 r_waits3 = s_waits3 + nsends; 2025 } 2026 2027 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2028 if (reuse == MAT_INITIAL_MATRIX){ 2029 /* get new tags to keep the communication clean */ 2030 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2031 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2032 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2033 rbuf_nz = sbuf_nz + nsends; 2034 2035 /* post receives of other's nzlocal */ 2036 for (i=0; i<nrecvs; i++){ 2037 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2038 } 2039 /* send nzlocal to others */ 2040 for (i=0; i<nsends; i++){ 2041 sbuf_nz[i] = nzlocal; 2042 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2043 } 2044 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2045 count = nrecvs; 2046 while (count) { 2047 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2048 recv_rank[imdex] = recv_status.MPI_SOURCE; 2049 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2050 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2051 2052 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2053 rbuf_nz[imdex] += i + 2; 2054 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2055 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2056 count--; 2057 } 2058 /* wait on sends of nzlocal */ 2059 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2060 /* send mat->i,j to others, and recv from other's */ 2061 /*------------------------------------------------*/ 2062 for (i=0; i<nsends; i++){ 2063 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2064 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2065 } 2066 /* wait on receives of mat->i,j */ 2067 /*------------------------------*/ 2068 count = nrecvs; 2069 while (count) { 2070 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2071 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2072 count--; 2073 } 2074 /* wait on sends of mat->i,j */ 2075 /*---------------------------*/ 2076 if (nsends) { 2077 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2078 } 2079 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2080 2081 /* post receives, send and receive mat->a */ 2082 /*----------------------------------------*/ 2083 for (imdex=0; imdex<nrecvs; imdex++) { 2084 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2085 } 2086 for (i=0; i<nsends; i++){ 2087 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2088 } 2089 count = nrecvs; 2090 while (count) { 2091 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2092 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2093 count--; 2094 } 2095 if (nsends) { 2096 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2097 } 2098 2099 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2100 2101 /* create redundant matrix */ 2102 /*-------------------------*/ 2103 if (reuse == MAT_INITIAL_MATRIX){ 2104 /* compute rownz_max for preallocation */ 2105 for (imdex=0; imdex<nrecvs; imdex++){ 2106 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2107 rptr = rbuf_j[imdex]; 2108 for (i=0; i<j; i++){ 2109 ncols = rptr[i+1] - rptr[i]; 2110 if (rownz_max < ncols) rownz_max = ncols; 2111 } 2112 } 2113 2114 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2115 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2116 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2117 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2118 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2119 } else { 2120 C = *matredundant; 2121 } 2122 2123 /* insert local matrix entries */ 2124 rptr = sbuf_j; 2125 cols = sbuf_j + rend-rstart + 1; 2126 vals = sbuf_a; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 ncols = rptr[i+1] - rptr[i]; 2130 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2131 vals += ncols; 2132 cols += ncols; 2133 } 2134 /* insert received matrix entries */ 2135 for (imdex=0; imdex<nrecvs; imdex++){ 2136 rstart = rowrange[recv_rank[imdex]]; 2137 rend = rowrange[recv_rank[imdex]+1]; 2138 rptr = rbuf_j[imdex]; 2139 cols = rbuf_j[imdex] + rend-rstart + 1; 2140 vals = rbuf_a[imdex]; 2141 for (i=0; i<rend-rstart; i++){ 2142 row = i + rstart; 2143 ncols = rptr[i+1] - rptr[i]; 2144 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 vals += ncols; 2146 cols += ncols; 2147 } 2148 } 2149 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2152 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2153 if (reuse == MAT_INITIAL_MATRIX){ 2154 PetscContainer container; 2155 *matredundant = C; 2156 /* create a supporting struct and attach it to C for reuse */ 2157 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2158 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2159 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2160 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2161 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2162 2163 redund->nzlocal = nzlocal; 2164 redund->nsends = nsends; 2165 redund->nrecvs = nrecvs; 2166 redund->send_rank = send_rank; 2167 redund->sbuf_nz = sbuf_nz; 2168 redund->sbuf_j = sbuf_j; 2169 redund->sbuf_a = sbuf_a; 2170 redund->rbuf_j = rbuf_j; 2171 redund->rbuf_a = rbuf_a; 2172 2173 redund->MatDestroy = C->ops->destroy; 2174 C->ops->destroy = MatDestroy_MatRedundant; 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* -------------------------------------------------------------------*/ 2180 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2181 MatGetRow_MPIAIJ, 2182 MatRestoreRow_MPIAIJ, 2183 MatMult_MPIAIJ, 2184 /* 4*/ MatMultAdd_MPIAIJ, 2185 MatMultTranspose_MPIAIJ, 2186 MatMultTransposeAdd_MPIAIJ, 2187 #ifdef PETSC_HAVE_PBGL 2188 MatSolve_MPIAIJ, 2189 #else 2190 0, 2191 #endif 2192 0, 2193 0, 2194 /*10*/ 0, 2195 0, 2196 0, 2197 MatRelax_MPIAIJ, 2198 MatTranspose_MPIAIJ, 2199 /*15*/ MatGetInfo_MPIAIJ, 2200 MatEqual_MPIAIJ, 2201 MatGetDiagonal_MPIAIJ, 2202 MatDiagonalScale_MPIAIJ, 2203 MatNorm_MPIAIJ, 2204 /*20*/ MatAssemblyBegin_MPIAIJ, 2205 MatAssemblyEnd_MPIAIJ, 2206 0, 2207 MatSetOption_MPIAIJ, 2208 MatZeroEntries_MPIAIJ, 2209 /*25*/ MatZeroRows_MPIAIJ, 2210 0, 2211 #ifdef PETSC_HAVE_PBGL 2212 MatLUFactorNumeric_MPIAIJ, 2213 #else 2214 0, 2215 #endif 2216 0, 2217 0, 2218 /*30*/ MatSetUpPreallocation_MPIAIJ, 2219 #ifdef PETSC_HAVE_PBGL 2220 MatILUFactorSymbolic_MPIAIJ, 2221 #else 2222 0, 2223 #endif 2224 0, 2225 0, 2226 0, 2227 /*35*/ MatDuplicate_MPIAIJ, 2228 0, 2229 0, 2230 0, 2231 0, 2232 /*40*/ MatAXPY_MPIAIJ, 2233 MatGetSubMatrices_MPIAIJ, 2234 MatIncreaseOverlap_MPIAIJ, 2235 MatGetValues_MPIAIJ, 2236 MatCopy_MPIAIJ, 2237 /*45*/ 0, 2238 MatScale_MPIAIJ, 2239 0, 2240 0, 2241 0, 2242 /*50*/ MatSetBlockSize_MPIAIJ, 2243 0, 2244 0, 2245 0, 2246 0, 2247 /*55*/ MatFDColoringCreate_MPIAIJ, 2248 0, 2249 MatSetUnfactored_MPIAIJ, 2250 MatPermute_MPIAIJ, 2251 0, 2252 /*60*/ MatGetSubMatrix_MPIAIJ, 2253 MatDestroy_MPIAIJ, 2254 MatView_MPIAIJ, 2255 0, 2256 0, 2257 /*65*/ 0, 2258 0, 2259 0, 2260 0, 2261 0, 2262 /*70*/ 0, 2263 0, 2264 MatSetColoring_MPIAIJ, 2265 #if defined(PETSC_HAVE_ADIC) 2266 MatSetValuesAdic_MPIAIJ, 2267 #else 2268 0, 2269 #endif 2270 MatSetValuesAdifor_MPIAIJ, 2271 /*75*/ 0, 2272 0, 2273 0, 2274 0, 2275 0, 2276 /*80*/ 0, 2277 0, 2278 0, 2279 0, 2280 /*84*/ MatLoad_MPIAIJ, 2281 0, 2282 0, 2283 0, 2284 0, 2285 0, 2286 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2287 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2288 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2289 MatPtAP_Basic, 2290 MatPtAPSymbolic_MPIAIJ, 2291 /*95*/ MatPtAPNumeric_MPIAIJ, 2292 0, 2293 0, 2294 0, 2295 0, 2296 /*100*/0, 2297 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2298 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2299 MatConjugate_MPIAIJ, 2300 0, 2301 /*105*/MatSetValuesRow_MPIAIJ, 2302 MatRealPart_MPIAIJ, 2303 MatImaginaryPart_MPIAIJ, 2304 0, 2305 0, 2306 /*110*/0, 2307 MatGetRedundantMatrix_MPIAIJ}; 2308 2309 /* ----------------------------------------------------------------------------------------*/ 2310 2311 EXTERN_C_BEGIN 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2314 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2315 { 2316 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2321 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 EXTERN_C_END 2325 2326 EXTERN_C_BEGIN 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2329 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2330 { 2331 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2336 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 EXTERN_C_END 2340 2341 #include "petscpc.h" 2342 EXTERN_C_BEGIN 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2346 { 2347 Mat_MPIAIJ *b; 2348 PetscErrorCode ierr; 2349 PetscInt i; 2350 2351 PetscFunctionBegin; 2352 B->preallocated = PETSC_TRUE; 2353 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2354 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2355 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2356 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2357 2358 B->rmap.bs = B->cmap.bs = 1; 2359 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2360 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2361 if (d_nnz) { 2362 for (i=0; i<B->rmap.n; i++) { 2363 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2364 } 2365 } 2366 if (o_nnz) { 2367 for (i=0; i<B->rmap.n; i++) { 2368 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2369 } 2370 } 2371 b = (Mat_MPIAIJ*)B->data; 2372 2373 /* Explicitly create 2 MATSEQAIJ matrices. */ 2374 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2375 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2376 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2377 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2378 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2379 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2380 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2381 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2382 2383 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2384 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2385 2386 PetscFunctionReturn(0); 2387 } 2388 EXTERN_C_END 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2392 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2393 { 2394 Mat mat; 2395 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 *newmat = 0; 2400 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2401 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2402 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2403 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2404 a = (Mat_MPIAIJ*)mat->data; 2405 2406 mat->factor = matin->factor; 2407 mat->rmap.bs = matin->rmap.bs; 2408 mat->assembled = PETSC_TRUE; 2409 mat->insertmode = NOT_SET_VALUES; 2410 mat->preallocated = PETSC_TRUE; 2411 2412 a->size = oldmat->size; 2413 a->rank = oldmat->rank; 2414 a->donotstash = oldmat->donotstash; 2415 a->roworiented = oldmat->roworiented; 2416 a->rowindices = 0; 2417 a->rowvalues = 0; 2418 a->getrowactive = PETSC_FALSE; 2419 2420 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2421 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2422 2423 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2424 if (oldmat->colmap) { 2425 #if defined (PETSC_USE_CTABLE) 2426 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2427 #else 2428 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2429 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2430 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2431 #endif 2432 } else a->colmap = 0; 2433 if (oldmat->garray) { 2434 PetscInt len; 2435 len = oldmat->B->cmap.n; 2436 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2437 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2438 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2439 } else a->garray = 0; 2440 2441 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2442 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2443 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2444 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2445 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2447 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2448 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2449 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2450 *newmat = mat; 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #include "petscsys.h" 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLoad_MPIAIJ" 2458 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2459 { 2460 Mat A; 2461 PetscScalar *vals,*svals; 2462 MPI_Comm comm = ((PetscObject)viewer)->comm; 2463 MPI_Status status; 2464 PetscErrorCode ierr; 2465 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2466 PetscInt i,nz,j,rstart,rend,mmax; 2467 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2468 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2469 PetscInt cend,cstart,n,*rowners; 2470 int fd; 2471 2472 PetscFunctionBegin; 2473 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2474 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2475 if (!rank) { 2476 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2477 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2478 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2479 } 2480 2481 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2482 M = header[1]; N = header[2]; 2483 /* determine ownership of all rows */ 2484 m = M/size + ((M % size) > rank); 2485 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2486 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2487 2488 /* First process needs enough room for process with most rows */ 2489 if (!rank) { 2490 mmax = rowners[1]; 2491 for (i=2; i<size; i++) { 2492 mmax = PetscMax(mmax,rowners[i]); 2493 } 2494 } else mmax = m; 2495 2496 rowners[0] = 0; 2497 for (i=2; i<=size; i++) { 2498 rowners[i] += rowners[i-1]; 2499 } 2500 rstart = rowners[rank]; 2501 rend = rowners[rank+1]; 2502 2503 /* distribute row lengths to all processors */ 2504 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2505 if (!rank) { 2506 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2507 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2508 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2509 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2510 for (j=0; j<m; j++) { 2511 procsnz[0] += ourlens[j]; 2512 } 2513 for (i=1; i<size; i++) { 2514 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2515 /* calculate the number of nonzeros on each processor */ 2516 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2517 procsnz[i] += rowlengths[j]; 2518 } 2519 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2520 } 2521 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2522 } else { 2523 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2524 } 2525 2526 if (!rank) { 2527 /* determine max buffer needed and allocate it */ 2528 maxnz = 0; 2529 for (i=0; i<size; i++) { 2530 maxnz = PetscMax(maxnz,procsnz[i]); 2531 } 2532 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2533 2534 /* read in my part of the matrix column indices */ 2535 nz = procsnz[0]; 2536 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2537 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2538 2539 /* read in every one elses and ship off */ 2540 for (i=1; i<size; i++) { 2541 nz = procsnz[i]; 2542 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2543 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2544 } 2545 ierr = PetscFree(cols);CHKERRQ(ierr); 2546 } else { 2547 /* determine buffer space needed for message */ 2548 nz = 0; 2549 for (i=0; i<m; i++) { 2550 nz += ourlens[i]; 2551 } 2552 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2553 2554 /* receive message of column indices*/ 2555 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2556 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2557 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2558 } 2559 2560 /* determine column ownership if matrix is not square */ 2561 if (N != M) { 2562 n = N/size + ((N % size) > rank); 2563 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2564 cstart = cend - n; 2565 } else { 2566 cstart = rstart; 2567 cend = rend; 2568 n = cend - cstart; 2569 } 2570 2571 /* loop over local rows, determining number of off diagonal entries */ 2572 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2573 jj = 0; 2574 for (i=0; i<m; i++) { 2575 for (j=0; j<ourlens[i]; j++) { 2576 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2577 jj++; 2578 } 2579 } 2580 2581 /* create our matrix */ 2582 for (i=0; i<m; i++) { 2583 ourlens[i] -= offlens[i]; 2584 } 2585 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2586 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2587 ierr = MatSetType(A,type);CHKERRQ(ierr); 2588 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2589 2590 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2591 for (i=0; i<m; i++) { 2592 ourlens[i] += offlens[i]; 2593 } 2594 2595 if (!rank) { 2596 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2597 2598 /* read in my part of the matrix numerical values */ 2599 nz = procsnz[0]; 2600 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2601 2602 /* insert into matrix */ 2603 jj = rstart; 2604 smycols = mycols; 2605 svals = vals; 2606 for (i=0; i<m; i++) { 2607 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2608 smycols += ourlens[i]; 2609 svals += ourlens[i]; 2610 jj++; 2611 } 2612 2613 /* read in other processors and ship out */ 2614 for (i=1; i<size; i++) { 2615 nz = procsnz[i]; 2616 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2617 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2618 } 2619 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2620 } else { 2621 /* receive numeric values */ 2622 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2623 2624 /* receive message of values*/ 2625 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2626 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2627 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2628 2629 /* insert into matrix */ 2630 jj = rstart; 2631 smycols = mycols; 2632 svals = vals; 2633 for (i=0; i<m; i++) { 2634 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2635 smycols += ourlens[i]; 2636 svals += ourlens[i]; 2637 jj++; 2638 } 2639 } 2640 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2641 ierr = PetscFree(vals);CHKERRQ(ierr); 2642 ierr = PetscFree(mycols);CHKERRQ(ierr); 2643 ierr = PetscFree(rowners);CHKERRQ(ierr); 2644 2645 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2646 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 *newmat = A; 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2653 /* 2654 Not great since it makes two copies of the submatrix, first an SeqAIJ 2655 in local and then by concatenating the local matrices the end result. 2656 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2657 */ 2658 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2659 { 2660 PetscErrorCode ierr; 2661 PetscMPIInt rank,size; 2662 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2663 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2664 Mat *local,M,Mreuse; 2665 PetscScalar *vwork,*aa; 2666 MPI_Comm comm = mat->comm; 2667 Mat_SeqAIJ *aij; 2668 2669 2670 PetscFunctionBegin; 2671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2672 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2673 2674 if (call == MAT_REUSE_MATRIX) { 2675 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2676 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2677 local = &Mreuse; 2678 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2679 } else { 2680 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2681 Mreuse = *local; 2682 ierr = PetscFree(local);CHKERRQ(ierr); 2683 } 2684 2685 /* 2686 m - number of local rows 2687 n - number of columns (same on all processors) 2688 rstart - first row in new global matrix generated 2689 */ 2690 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2691 if (call == MAT_INITIAL_MATRIX) { 2692 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2693 ii = aij->i; 2694 jj = aij->j; 2695 2696 /* 2697 Determine the number of non-zeros in the diagonal and off-diagonal 2698 portions of the matrix in order to do correct preallocation 2699 */ 2700 2701 /* first get start and end of "diagonal" columns */ 2702 if (csize == PETSC_DECIDE) { 2703 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2704 if (mglobal == n) { /* square matrix */ 2705 nlocal = m; 2706 } else { 2707 nlocal = n/size + ((n % size) > rank); 2708 } 2709 } else { 2710 nlocal = csize; 2711 } 2712 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2713 rstart = rend - nlocal; 2714 if (rank == size - 1 && rend != n) { 2715 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2716 } 2717 2718 /* next, compute all the lengths */ 2719 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2720 olens = dlens + m; 2721 for (i=0; i<m; i++) { 2722 jend = ii[i+1] - ii[i]; 2723 olen = 0; 2724 dlen = 0; 2725 for (j=0; j<jend; j++) { 2726 if (*jj < rstart || *jj >= rend) olen++; 2727 else dlen++; 2728 jj++; 2729 } 2730 olens[i] = olen; 2731 dlens[i] = dlen; 2732 } 2733 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2734 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2735 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2736 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2737 ierr = PetscFree(dlens);CHKERRQ(ierr); 2738 } else { 2739 PetscInt ml,nl; 2740 2741 M = *newmat; 2742 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2743 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2744 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2745 /* 2746 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2747 rather than the slower MatSetValues(). 2748 */ 2749 M->was_assembled = PETSC_TRUE; 2750 M->assembled = PETSC_FALSE; 2751 } 2752 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2753 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2754 ii = aij->i; 2755 jj = aij->j; 2756 aa = aij->a; 2757 for (i=0; i<m; i++) { 2758 row = rstart + i; 2759 nz = ii[i+1] - ii[i]; 2760 cwork = jj; jj += nz; 2761 vwork = aa; aa += nz; 2762 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2763 } 2764 2765 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 *newmat = M; 2768 2769 /* save submatrix used in processor for next request */ 2770 if (call == MAT_INITIAL_MATRIX) { 2771 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2772 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2773 } 2774 2775 PetscFunctionReturn(0); 2776 } 2777 2778 EXTERN_C_BEGIN 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2782 { 2783 PetscInt m,cstart, cend,j,nnz,i,d; 2784 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2785 const PetscInt *JJ; 2786 PetscScalar *values; 2787 PetscErrorCode ierr; 2788 2789 PetscFunctionBegin; 2790 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2791 2792 B->rmap.bs = B->cmap.bs = 1; 2793 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2794 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2795 m = B->rmap.n; 2796 cstart = B->cmap.rstart; 2797 cend = B->cmap.rend; 2798 rstart = B->rmap.rstart; 2799 2800 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2801 o_nnz = d_nnz + m; 2802 2803 for (i=0; i<m; i++) { 2804 nnz = Ii[i+1]- Ii[i]; 2805 JJ = J + Ii[i]; 2806 nnz_max = PetscMax(nnz_max,nnz); 2807 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2808 for (j=0; j<nnz; j++) { 2809 if (*JJ >= cstart) break; 2810 JJ++; 2811 } 2812 d = 0; 2813 for (; j<nnz; j++) { 2814 if (*JJ++ >= cend) break; 2815 d++; 2816 } 2817 d_nnz[i] = d; 2818 o_nnz[i] = nnz - d; 2819 } 2820 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2821 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2822 2823 if (v) values = (PetscScalar*)v; 2824 else { 2825 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2826 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2827 } 2828 2829 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2830 for (i=0; i<m; i++) { 2831 ii = i + rstart; 2832 nnz = Ii[i+1]- Ii[i]; 2833 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2834 } 2835 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2836 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2838 2839 if (!v) { 2840 ierr = PetscFree(values);CHKERRQ(ierr); 2841 } 2842 PetscFunctionReturn(0); 2843 } 2844 EXTERN_C_END 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2848 /*@ 2849 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2850 (the default parallel PETSc format). 2851 2852 Collective on MPI_Comm 2853 2854 Input Parameters: 2855 + B - the matrix 2856 . i - the indices into j for the start of each local row (starts with zero) 2857 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2858 - v - optional values in the matrix 2859 2860 Level: developer 2861 2862 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2863 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2864 2865 .keywords: matrix, aij, compressed row, sparse, parallel 2866 2867 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2868 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2869 @*/ 2870 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2871 { 2872 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2873 2874 PetscFunctionBegin; 2875 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2876 if (f) { 2877 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2878 } 2879 PetscFunctionReturn(0); 2880 } 2881 2882 #undef __FUNCT__ 2883 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2884 /*@C 2885 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2886 (the default parallel PETSc format). For good matrix assembly performance 2887 the user should preallocate the matrix storage by setting the parameters 2888 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2889 performance can be increased by more than a factor of 50. 2890 2891 Collective on MPI_Comm 2892 2893 Input Parameters: 2894 + A - the matrix 2895 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2896 (same value is used for all local rows) 2897 . d_nnz - array containing the number of nonzeros in the various rows of the 2898 DIAGONAL portion of the local submatrix (possibly different for each row) 2899 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2900 The size of this array is equal to the number of local rows, i.e 'm'. 2901 You must leave room for the diagonal entry even if it is zero. 2902 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2903 submatrix (same value is used for all local rows). 2904 - o_nnz - array containing the number of nonzeros in the various rows of the 2905 OFF-DIAGONAL portion of the local submatrix (possibly different for 2906 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2907 structure. The size of this array is equal to the number 2908 of local rows, i.e 'm'. 2909 2910 If the *_nnz parameter is given then the *_nz parameter is ignored 2911 2912 The AIJ format (also called the Yale sparse matrix format or 2913 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2914 storage. The stored row and column indices begin with zero. See the users manual for details. 2915 2916 The parallel matrix is partitioned such that the first m0 rows belong to 2917 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2918 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2919 2920 The DIAGONAL portion of the local submatrix of a processor can be defined 2921 as the submatrix which is obtained by extraction the part corresponding 2922 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2923 first row that belongs to the processor, and r2 is the last row belonging 2924 to the this processor. This is a square mxm matrix. The remaining portion 2925 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2926 2927 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2928 2929 Example usage: 2930 2931 Consider the following 8x8 matrix with 34 non-zero values, that is 2932 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2933 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2934 as follows: 2935 2936 .vb 2937 1 2 0 | 0 3 0 | 0 4 2938 Proc0 0 5 6 | 7 0 0 | 8 0 2939 9 0 10 | 11 0 0 | 12 0 2940 ------------------------------------- 2941 13 0 14 | 15 16 17 | 0 0 2942 Proc1 0 18 0 | 19 20 21 | 0 0 2943 0 0 0 | 22 23 0 | 24 0 2944 ------------------------------------- 2945 Proc2 25 26 27 | 0 0 28 | 29 0 2946 30 0 0 | 31 32 33 | 0 34 2947 .ve 2948 2949 This can be represented as a collection of submatrices as: 2950 2951 .vb 2952 A B C 2953 D E F 2954 G H I 2955 .ve 2956 2957 Where the submatrices A,B,C are owned by proc0, D,E,F are 2958 owned by proc1, G,H,I are owned by proc2. 2959 2960 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2961 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2962 The 'M','N' parameters are 8,8, and have the same values on all procs. 2963 2964 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2965 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2966 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2967 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2968 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2969 matrix, ans [DF] as another SeqAIJ matrix. 2970 2971 When d_nz, o_nz parameters are specified, d_nz storage elements are 2972 allocated for every row of the local diagonal submatrix, and o_nz 2973 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2974 One way to choose d_nz and o_nz is to use the max nonzerors per local 2975 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2976 In this case, the values of d_nz,o_nz are: 2977 .vb 2978 proc0 : dnz = 2, o_nz = 2 2979 proc1 : dnz = 3, o_nz = 2 2980 proc2 : dnz = 1, o_nz = 4 2981 .ve 2982 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2983 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2984 for proc3. i.e we are using 12+15+10=37 storage locations to store 2985 34 values. 2986 2987 When d_nnz, o_nnz parameters are specified, the storage is specified 2988 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2989 In the above case the values for d_nnz,o_nnz are: 2990 .vb 2991 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2992 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2993 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2994 .ve 2995 Here the space allocated is sum of all the above values i.e 34, and 2996 hence pre-allocation is perfect. 2997 2998 Level: intermediate 2999 3000 .keywords: matrix, aij, compressed row, sparse, parallel 3001 3002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3003 MPIAIJ 3004 @*/ 3005 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3006 { 3007 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3008 3009 PetscFunctionBegin; 3010 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3011 if (f) { 3012 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3013 } 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3019 /*@C 3020 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3021 CSR format the local rows. 3022 3023 Collective on MPI_Comm 3024 3025 Input Parameters: 3026 + comm - MPI communicator 3027 . m - number of local rows (Cannot be PETSC_DECIDE) 3028 . n - This value should be the same as the local size used in creating the 3029 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3030 calculated if N is given) For square matrices n is almost always m. 3031 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3032 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3033 . i - row indices 3034 . j - column indices 3035 - a - matrix values 3036 3037 Output Parameter: 3038 . mat - the matrix 3039 3040 Level: intermediate 3041 3042 Notes: 3043 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3044 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3045 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3046 3047 The i and j indices are 0 based 3048 3049 .keywords: matrix, aij, compressed row, sparse, parallel 3050 3051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3052 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3053 @*/ 3054 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3055 { 3056 PetscErrorCode ierr; 3057 3058 PetscFunctionBegin; 3059 if (i[0]) { 3060 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3061 } 3062 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3063 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3064 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3065 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatCreateMPIAIJ" 3071 /*@C 3072 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3073 (the default parallel PETSc format). For good matrix assembly performance 3074 the user should preallocate the matrix storage by setting the parameters 3075 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3076 performance can be increased by more than a factor of 50. 3077 3078 Collective on MPI_Comm 3079 3080 Input Parameters: 3081 + comm - MPI communicator 3082 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3083 This value should be the same as the local size used in creating the 3084 y vector for the matrix-vector product y = Ax. 3085 . n - This value should be the same as the local size used in creating the 3086 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3087 calculated if N is given) For square matrices n is almost always m. 3088 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3089 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3090 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3091 (same value is used for all local rows) 3092 . d_nnz - array containing the number of nonzeros in the various rows of the 3093 DIAGONAL portion of the local submatrix (possibly different for each row) 3094 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3095 The size of this array is equal to the number of local rows, i.e 'm'. 3096 You must leave room for the diagonal entry even if it is zero. 3097 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3098 submatrix (same value is used for all local rows). 3099 - o_nnz - array containing the number of nonzeros in the various rows of the 3100 OFF-DIAGONAL portion of the local submatrix (possibly different for 3101 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3102 structure. The size of this array is equal to the number 3103 of local rows, i.e 'm'. 3104 3105 Output Parameter: 3106 . A - the matrix 3107 3108 Notes: 3109 If the *_nnz parameter is given then the *_nz parameter is ignored 3110 3111 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3112 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3113 storage requirements for this matrix. 3114 3115 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3116 processor than it must be used on all processors that share the object for 3117 that argument. 3118 3119 The user MUST specify either the local or global matrix dimensions 3120 (possibly both). 3121 3122 The parallel matrix is partitioned such that the first m0 rows belong to 3123 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3124 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3125 3126 The DIAGONAL portion of the local submatrix of a processor can be defined 3127 as the submatrix which is obtained by extraction the part corresponding 3128 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3129 first row that belongs to the processor, and r2 is the last row belonging 3130 to the this processor. This is a square mxm matrix. The remaining portion 3131 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3132 3133 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3134 3135 When calling this routine with a single process communicator, a matrix of 3136 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3137 type of communicator, use the construction mechanism: 3138 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3139 3140 By default, this format uses inodes (identical nodes) when possible. 3141 We search for consecutive rows with the same nonzero structure, thereby 3142 reusing matrix information to achieve increased efficiency. 3143 3144 Options Database Keys: 3145 + -mat_no_inode - Do not use inodes 3146 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3147 - -mat_aij_oneindex - Internally use indexing starting at 1 3148 rather than 0. Note that when calling MatSetValues(), 3149 the user still MUST index entries starting at 0! 3150 3151 3152 Example usage: 3153 3154 Consider the following 8x8 matrix with 34 non-zero values, that is 3155 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3156 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3157 as follows: 3158 3159 .vb 3160 1 2 0 | 0 3 0 | 0 4 3161 Proc0 0 5 6 | 7 0 0 | 8 0 3162 9 0 10 | 11 0 0 | 12 0 3163 ------------------------------------- 3164 13 0 14 | 15 16 17 | 0 0 3165 Proc1 0 18 0 | 19 20 21 | 0 0 3166 0 0 0 | 22 23 0 | 24 0 3167 ------------------------------------- 3168 Proc2 25 26 27 | 0 0 28 | 29 0 3169 30 0 0 | 31 32 33 | 0 34 3170 .ve 3171 3172 This can be represented as a collection of submatrices as: 3173 3174 .vb 3175 A B C 3176 D E F 3177 G H I 3178 .ve 3179 3180 Where the submatrices A,B,C are owned by proc0, D,E,F are 3181 owned by proc1, G,H,I are owned by proc2. 3182 3183 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3184 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3185 The 'M','N' parameters are 8,8, and have the same values on all procs. 3186 3187 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3188 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3189 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3190 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3191 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3192 matrix, ans [DF] as another SeqAIJ matrix. 3193 3194 When d_nz, o_nz parameters are specified, d_nz storage elements are 3195 allocated for every row of the local diagonal submatrix, and o_nz 3196 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3197 One way to choose d_nz and o_nz is to use the max nonzerors per local 3198 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3199 In this case, the values of d_nz,o_nz are: 3200 .vb 3201 proc0 : dnz = 2, o_nz = 2 3202 proc1 : dnz = 3, o_nz = 2 3203 proc2 : dnz = 1, o_nz = 4 3204 .ve 3205 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3206 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3207 for proc3. i.e we are using 12+15+10=37 storage locations to store 3208 34 values. 3209 3210 When d_nnz, o_nnz parameters are specified, the storage is specified 3211 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3212 In the above case the values for d_nnz,o_nnz are: 3213 .vb 3214 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3215 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3216 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3217 .ve 3218 Here the space allocated is sum of all the above values i.e 34, and 3219 hence pre-allocation is perfect. 3220 3221 Level: intermediate 3222 3223 .keywords: matrix, aij, compressed row, sparse, parallel 3224 3225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3226 MPIAIJ, MatCreateMPIAIJWithArrays() 3227 @*/ 3228 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3229 { 3230 PetscErrorCode ierr; 3231 PetscMPIInt size; 3232 3233 PetscFunctionBegin; 3234 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3235 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3237 if (size > 1) { 3238 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3239 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3240 } else { 3241 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3242 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3243 } 3244 PetscFunctionReturn(0); 3245 } 3246 3247 #undef __FUNCT__ 3248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3249 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3250 { 3251 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3252 3253 PetscFunctionBegin; 3254 *Ad = a->A; 3255 *Ao = a->B; 3256 *colmap = a->garray; 3257 PetscFunctionReturn(0); 3258 } 3259 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3263 { 3264 PetscErrorCode ierr; 3265 PetscInt i; 3266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3267 3268 PetscFunctionBegin; 3269 if (coloring->ctype == IS_COLORING_GLOBAL) { 3270 ISColoringValue *allcolors,*colors; 3271 ISColoring ocoloring; 3272 3273 /* set coloring for diagonal portion */ 3274 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3275 3276 /* set coloring for off-diagonal portion */ 3277 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3278 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3279 for (i=0; i<a->B->cmap.n; i++) { 3280 colors[i] = allcolors[a->garray[i]]; 3281 } 3282 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3283 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3284 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3285 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3286 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3287 ISColoringValue *colors; 3288 PetscInt *larray; 3289 ISColoring ocoloring; 3290 3291 /* set coloring for diagonal portion */ 3292 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3293 for (i=0; i<a->A->cmap.n; i++) { 3294 larray[i] = i + A->cmap.rstart; 3295 } 3296 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3297 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3298 for (i=0; i<a->A->cmap.n; i++) { 3299 colors[i] = coloring->colors[larray[i]]; 3300 } 3301 ierr = PetscFree(larray);CHKERRQ(ierr); 3302 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3303 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3304 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3305 3306 /* set coloring for off-diagonal portion */ 3307 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3308 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3309 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3310 for (i=0; i<a->B->cmap.n; i++) { 3311 colors[i] = coloring->colors[larray[i]]; 3312 } 3313 ierr = PetscFree(larray);CHKERRQ(ierr); 3314 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3315 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3316 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3317 } else { 3318 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3319 } 3320 3321 PetscFunctionReturn(0); 3322 } 3323 3324 #if defined(PETSC_HAVE_ADIC) 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3327 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3328 { 3329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3330 PetscErrorCode ierr; 3331 3332 PetscFunctionBegin; 3333 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3334 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 #endif 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3341 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3342 { 3343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3344 PetscErrorCode ierr; 3345 3346 PetscFunctionBegin; 3347 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3348 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatMerge" 3354 /*@C 3355 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3356 matrices from each processor 3357 3358 Collective on MPI_Comm 3359 3360 Input Parameters: 3361 + comm - the communicators the parallel matrix will live on 3362 . inmat - the input sequential matrices 3363 . n - number of local columns (or PETSC_DECIDE) 3364 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3365 3366 Output Parameter: 3367 . outmat - the parallel matrix generated 3368 3369 Level: advanced 3370 3371 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3372 3373 @*/ 3374 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3375 { 3376 PetscErrorCode ierr; 3377 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3378 PetscInt *indx; 3379 PetscScalar *values; 3380 3381 PetscFunctionBegin; 3382 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3383 if (scall == MAT_INITIAL_MATRIX){ 3384 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3385 if (n == PETSC_DECIDE){ 3386 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3387 } 3388 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3389 rstart -= m; 3390 3391 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3392 for (i=0;i<m;i++) { 3393 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3394 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3395 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3396 } 3397 /* This routine will ONLY return MPIAIJ type matrix */ 3398 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3399 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3400 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3401 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3402 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3403 3404 } else if (scall == MAT_REUSE_MATRIX){ 3405 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3406 } else { 3407 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3408 } 3409 3410 for (i=0;i<m;i++) { 3411 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3412 Ii = i + rstart; 3413 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3414 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3415 } 3416 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3417 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 3420 PetscFunctionReturn(0); 3421 } 3422 3423 #undef __FUNCT__ 3424 #define __FUNCT__ "MatFileSplit" 3425 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3426 { 3427 PetscErrorCode ierr; 3428 PetscMPIInt rank; 3429 PetscInt m,N,i,rstart,nnz; 3430 size_t len; 3431 const PetscInt *indx; 3432 PetscViewer out; 3433 char *name; 3434 Mat B; 3435 const PetscScalar *values; 3436 3437 PetscFunctionBegin; 3438 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3439 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3440 /* Should this be the type of the diagonal block of A? */ 3441 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3442 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3443 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3444 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3445 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3446 for (i=0;i<m;i++) { 3447 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3448 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3449 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3450 } 3451 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3452 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3453 3454 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3455 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3456 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3457 sprintf(name,"%s.%d",outfile,rank); 3458 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3459 ierr = PetscFree(name); 3460 ierr = MatView(B,out);CHKERRQ(ierr); 3461 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3462 ierr = MatDestroy(B);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3467 #undef __FUNCT__ 3468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3469 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3470 { 3471 PetscErrorCode ierr; 3472 Mat_Merge_SeqsToMPI *merge; 3473 PetscContainer container; 3474 3475 PetscFunctionBegin; 3476 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3477 if (container) { 3478 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3482 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3483 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3484 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3485 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3486 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3487 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3488 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3489 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3490 3491 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3492 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3493 } 3494 ierr = PetscFree(merge);CHKERRQ(ierr); 3495 3496 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } 3499 3500 #include "src/mat/utils/freespace.h" 3501 #include "petscbt.h" 3502 static PetscEvent logkey_seqstompinum = 0; 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3505 /*@C 3506 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3507 matrices from each processor 3508 3509 Collective on MPI_Comm 3510 3511 Input Parameters: 3512 + comm - the communicators the parallel matrix will live on 3513 . seqmat - the input sequential matrices 3514 . m - number of local rows (or PETSC_DECIDE) 3515 . n - number of local columns (or PETSC_DECIDE) 3516 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3517 3518 Output Parameter: 3519 . mpimat - the parallel matrix generated 3520 3521 Level: advanced 3522 3523 Notes: 3524 The dimensions of the sequential matrix in each processor MUST be the same. 3525 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3526 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3527 @*/ 3528 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3529 { 3530 PetscErrorCode ierr; 3531 MPI_Comm comm=mpimat->comm; 3532 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3533 PetscMPIInt size,rank,taga,*len_s; 3534 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3535 PetscInt proc,m; 3536 PetscInt **buf_ri,**buf_rj; 3537 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3538 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3539 MPI_Request *s_waits,*r_waits; 3540 MPI_Status *status; 3541 MatScalar *aa=a->a,**abuf_r,*ba_i; 3542 Mat_Merge_SeqsToMPI *merge; 3543 PetscContainer container; 3544 3545 PetscFunctionBegin; 3546 if (!logkey_seqstompinum) { 3547 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3548 } 3549 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3550 3551 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3552 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3553 3554 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3555 if (container) { 3556 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3557 } 3558 bi = merge->bi; 3559 bj = merge->bj; 3560 buf_ri = merge->buf_ri; 3561 buf_rj = merge->buf_rj; 3562 3563 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3564 owners = merge->rowmap.range; 3565 len_s = merge->len_s; 3566 3567 /* send and recv matrix values */ 3568 /*-----------------------------*/ 3569 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3570 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3571 3572 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3573 for (proc=0,k=0; proc<size; proc++){ 3574 if (!len_s[proc]) continue; 3575 i = owners[proc]; 3576 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3577 k++; 3578 } 3579 3580 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3581 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3582 ierr = PetscFree(status);CHKERRQ(ierr); 3583 3584 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3585 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3586 3587 /* insert mat values of mpimat */ 3588 /*----------------------------*/ 3589 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3590 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3591 nextrow = buf_ri_k + merge->nrecv; 3592 nextai = nextrow + merge->nrecv; 3593 3594 for (k=0; k<merge->nrecv; k++){ 3595 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3596 nrows = *(buf_ri_k[k]); 3597 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3598 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3599 } 3600 3601 /* set values of ba */ 3602 m = merge->rowmap.n; 3603 for (i=0; i<m; i++) { 3604 arow = owners[rank] + i; 3605 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3606 bnzi = bi[i+1] - bi[i]; 3607 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3608 3609 /* add local non-zero vals of this proc's seqmat into ba */ 3610 anzi = ai[arow+1] - ai[arow]; 3611 aj = a->j + ai[arow]; 3612 aa = a->a + ai[arow]; 3613 nextaj = 0; 3614 for (j=0; nextaj<anzi; j++){ 3615 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3616 ba_i[j] += aa[nextaj++]; 3617 } 3618 } 3619 3620 /* add received vals into ba */ 3621 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3622 /* i-th row */ 3623 if (i == *nextrow[k]) { 3624 anzi = *(nextai[k]+1) - *nextai[k]; 3625 aj = buf_rj[k] + *(nextai[k]); 3626 aa = abuf_r[k] + *(nextai[k]); 3627 nextaj = 0; 3628 for (j=0; nextaj<anzi; j++){ 3629 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3630 ba_i[j] += aa[nextaj++]; 3631 } 3632 } 3633 nextrow[k]++; nextai[k]++; 3634 } 3635 } 3636 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3637 } 3638 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3639 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3640 3641 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3642 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3643 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3644 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3645 PetscFunctionReturn(0); 3646 } 3647 3648 static PetscEvent logkey_seqstompisym = 0; 3649 #undef __FUNCT__ 3650 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3651 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3652 { 3653 PetscErrorCode ierr; 3654 Mat B_mpi; 3655 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3656 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3657 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3658 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3659 PetscInt len,proc,*dnz,*onz; 3660 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3661 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3662 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3663 MPI_Status *status; 3664 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3665 PetscBT lnkbt; 3666 Mat_Merge_SeqsToMPI *merge; 3667 PetscContainer container; 3668 3669 PetscFunctionBegin; 3670 if (!logkey_seqstompisym) { 3671 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3672 } 3673 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3674 3675 /* make sure it is a PETSc comm */ 3676 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3678 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3679 3680 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3681 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3682 3683 /* determine row ownership */ 3684 /*---------------------------------------------------------*/ 3685 merge->rowmap.n = m; 3686 merge->rowmap.N = M; 3687 merge->rowmap.bs = 1; 3688 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3689 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3690 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3691 3692 m = merge->rowmap.n; 3693 M = merge->rowmap.N; 3694 owners = merge->rowmap.range; 3695 3696 /* determine the number of messages to send, their lengths */ 3697 /*---------------------------------------------------------*/ 3698 len_s = merge->len_s; 3699 3700 len = 0; /* length of buf_si[] */ 3701 merge->nsend = 0; 3702 for (proc=0; proc<size; proc++){ 3703 len_si[proc] = 0; 3704 if (proc == rank){ 3705 len_s[proc] = 0; 3706 } else { 3707 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3708 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3709 } 3710 if (len_s[proc]) { 3711 merge->nsend++; 3712 nrows = 0; 3713 for (i=owners[proc]; i<owners[proc+1]; i++){ 3714 if (ai[i+1] > ai[i]) nrows++; 3715 } 3716 len_si[proc] = 2*(nrows+1); 3717 len += len_si[proc]; 3718 } 3719 } 3720 3721 /* determine the number and length of messages to receive for ij-structure */ 3722 /*-------------------------------------------------------------------------*/ 3723 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3724 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3725 3726 /* post the Irecv of j-structure */ 3727 /*-------------------------------*/ 3728 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3729 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3730 3731 /* post the Isend of j-structure */ 3732 /*--------------------------------*/ 3733 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3734 sj_waits = si_waits + merge->nsend; 3735 3736 for (proc=0, k=0; proc<size; proc++){ 3737 if (!len_s[proc]) continue; 3738 i = owners[proc]; 3739 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3740 k++; 3741 } 3742 3743 /* receives and sends of j-structure are complete */ 3744 /*------------------------------------------------*/ 3745 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3746 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3747 3748 /* send and recv i-structure */ 3749 /*---------------------------*/ 3750 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3751 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3752 3753 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3754 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3755 for (proc=0,k=0; proc<size; proc++){ 3756 if (!len_s[proc]) continue; 3757 /* form outgoing message for i-structure: 3758 buf_si[0]: nrows to be sent 3759 [1:nrows]: row index (global) 3760 [nrows+1:2*nrows+1]: i-structure index 3761 */ 3762 /*-------------------------------------------*/ 3763 nrows = len_si[proc]/2 - 1; 3764 buf_si_i = buf_si + nrows+1; 3765 buf_si[0] = nrows; 3766 buf_si_i[0] = 0; 3767 nrows = 0; 3768 for (i=owners[proc]; i<owners[proc+1]; i++){ 3769 anzi = ai[i+1] - ai[i]; 3770 if (anzi) { 3771 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3772 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3773 nrows++; 3774 } 3775 } 3776 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3777 k++; 3778 buf_si += len_si[proc]; 3779 } 3780 3781 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3782 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3783 3784 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3785 for (i=0; i<merge->nrecv; i++){ 3786 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3787 } 3788 3789 ierr = PetscFree(len_si);CHKERRQ(ierr); 3790 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3791 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3792 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3793 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3794 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3795 ierr = PetscFree(status);CHKERRQ(ierr); 3796 3797 /* compute a local seq matrix in each processor */ 3798 /*----------------------------------------------*/ 3799 /* allocate bi array and free space for accumulating nonzero column info */ 3800 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3801 bi[0] = 0; 3802 3803 /* create and initialize a linked list */ 3804 nlnk = N+1; 3805 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3806 3807 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3808 len = 0; 3809 len = ai[owners[rank+1]] - ai[owners[rank]]; 3810 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3811 current_space = free_space; 3812 3813 /* determine symbolic info for each local row */ 3814 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3815 nextrow = buf_ri_k + merge->nrecv; 3816 nextai = nextrow + merge->nrecv; 3817 for (k=0; k<merge->nrecv; k++){ 3818 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3819 nrows = *buf_ri_k[k]; 3820 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3821 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3822 } 3823 3824 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3825 len = 0; 3826 for (i=0;i<m;i++) { 3827 bnzi = 0; 3828 /* add local non-zero cols of this proc's seqmat into lnk */ 3829 arow = owners[rank] + i; 3830 anzi = ai[arow+1] - ai[arow]; 3831 aj = a->j + ai[arow]; 3832 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3833 bnzi += nlnk; 3834 /* add received col data into lnk */ 3835 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3836 if (i == *nextrow[k]) { /* i-th row */ 3837 anzi = *(nextai[k]+1) - *nextai[k]; 3838 aj = buf_rj[k] + *nextai[k]; 3839 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3840 bnzi += nlnk; 3841 nextrow[k]++; nextai[k]++; 3842 } 3843 } 3844 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3845 3846 /* if free space is not available, make more free space */ 3847 if (current_space->local_remaining<bnzi) { 3848 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3849 nspacedouble++; 3850 } 3851 /* copy data into free space, then initialize lnk */ 3852 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3853 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3854 3855 current_space->array += bnzi; 3856 current_space->local_used += bnzi; 3857 current_space->local_remaining -= bnzi; 3858 3859 bi[i+1] = bi[i] + bnzi; 3860 } 3861 3862 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3863 3864 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3865 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3866 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3867 3868 /* create symbolic parallel matrix B_mpi */ 3869 /*---------------------------------------*/ 3870 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3871 if (n==PETSC_DECIDE) { 3872 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3873 } else { 3874 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3875 } 3876 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3877 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3878 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3879 3880 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3881 B_mpi->assembled = PETSC_FALSE; 3882 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3883 merge->bi = bi; 3884 merge->bj = bj; 3885 merge->buf_ri = buf_ri; 3886 merge->buf_rj = buf_rj; 3887 merge->coi = PETSC_NULL; 3888 merge->coj = PETSC_NULL; 3889 merge->owners_co = PETSC_NULL; 3890 3891 /* attach the supporting struct to B_mpi for reuse */ 3892 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3893 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3894 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3895 *mpimat = B_mpi; 3896 3897 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3898 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3899 PetscFunctionReturn(0); 3900 } 3901 3902 static PetscEvent logkey_seqstompi = 0; 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatMerge_SeqsToMPI" 3905 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3906 { 3907 PetscErrorCode ierr; 3908 3909 PetscFunctionBegin; 3910 if (!logkey_seqstompi) { 3911 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3912 } 3913 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3914 if (scall == MAT_INITIAL_MATRIX){ 3915 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3916 } 3917 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3918 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3919 PetscFunctionReturn(0); 3920 } 3921 static PetscEvent logkey_getlocalmat = 0; 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatGetLocalMat" 3924 /*@C 3925 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3926 3927 Not Collective 3928 3929 Input Parameters: 3930 + A - the matrix 3931 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3932 3933 Output Parameter: 3934 . A_loc - the local sequential matrix generated 3935 3936 Level: developer 3937 3938 @*/ 3939 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3940 { 3941 PetscErrorCode ierr; 3942 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3943 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3944 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3945 PetscScalar *aa=a->a,*ba=b->a,*ca; 3946 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3947 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3948 3949 PetscFunctionBegin; 3950 if (!logkey_getlocalmat) { 3951 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3952 } 3953 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3954 if (scall == MAT_INITIAL_MATRIX){ 3955 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3956 ci[0] = 0; 3957 for (i=0; i<am; i++){ 3958 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3959 } 3960 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3961 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3962 k = 0; 3963 for (i=0; i<am; i++) { 3964 ncols_o = bi[i+1] - bi[i]; 3965 ncols_d = ai[i+1] - ai[i]; 3966 /* off-diagonal portion of A */ 3967 for (jo=0; jo<ncols_o; jo++) { 3968 col = cmap[*bj]; 3969 if (col >= cstart) break; 3970 cj[k] = col; bj++; 3971 ca[k++] = *ba++; 3972 } 3973 /* diagonal portion of A */ 3974 for (j=0; j<ncols_d; j++) { 3975 cj[k] = cstart + *aj++; 3976 ca[k++] = *aa++; 3977 } 3978 /* off-diagonal portion of A */ 3979 for (j=jo; j<ncols_o; j++) { 3980 cj[k] = cmap[*bj++]; 3981 ca[k++] = *ba++; 3982 } 3983 } 3984 /* put together the new matrix */ 3985 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3986 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3987 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3988 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3989 mat->free_a = PETSC_TRUE; 3990 mat->free_ij = PETSC_TRUE; 3991 mat->nonew = 0; 3992 } else if (scall == MAT_REUSE_MATRIX){ 3993 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3994 ci = mat->i; cj = mat->j; ca = mat->a; 3995 for (i=0; i<am; i++) { 3996 /* off-diagonal portion of A */ 3997 ncols_o = bi[i+1] - bi[i]; 3998 for (jo=0; jo<ncols_o; jo++) { 3999 col = cmap[*bj]; 4000 if (col >= cstart) break; 4001 *ca++ = *ba++; bj++; 4002 } 4003 /* diagonal portion of A */ 4004 ncols_d = ai[i+1] - ai[i]; 4005 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4006 /* off-diagonal portion of A */ 4007 for (j=jo; j<ncols_o; j++) { 4008 *ca++ = *ba++; bj++; 4009 } 4010 } 4011 } else { 4012 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4013 } 4014 4015 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 static PetscEvent logkey_getlocalmatcondensed = 0; 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatGetLocalMatCondensed" 4022 /*@C 4023 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4024 4025 Not Collective 4026 4027 Input Parameters: 4028 + A - the matrix 4029 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4030 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4031 4032 Output Parameter: 4033 . A_loc - the local sequential matrix generated 4034 4035 Level: developer 4036 4037 @*/ 4038 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4039 { 4040 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4041 PetscErrorCode ierr; 4042 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4043 IS isrowa,iscola; 4044 Mat *aloc; 4045 4046 PetscFunctionBegin; 4047 if (!logkey_getlocalmatcondensed) { 4048 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4049 } 4050 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4051 if (!row){ 4052 start = A->rmap.rstart; end = A->rmap.rend; 4053 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4054 } else { 4055 isrowa = *row; 4056 } 4057 if (!col){ 4058 start = A->cmap.rstart; 4059 cmap = a->garray; 4060 nzA = a->A->cmap.n; 4061 nzB = a->B->cmap.n; 4062 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4063 ncols = 0; 4064 for (i=0; i<nzB; i++) { 4065 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4066 else break; 4067 } 4068 imark = i; 4069 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4070 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4071 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4072 ierr = PetscFree(idx);CHKERRQ(ierr); 4073 } else { 4074 iscola = *col; 4075 } 4076 if (scall != MAT_INITIAL_MATRIX){ 4077 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4078 aloc[0] = *A_loc; 4079 } 4080 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4081 *A_loc = aloc[0]; 4082 ierr = PetscFree(aloc);CHKERRQ(ierr); 4083 if (!row){ 4084 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4085 } 4086 if (!col){ 4087 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4088 } 4089 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4090 PetscFunctionReturn(0); 4091 } 4092 4093 static PetscEvent logkey_GetBrowsOfAcols = 0; 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "MatGetBrowsOfAcols" 4096 /*@C 4097 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4098 4099 Collective on Mat 4100 4101 Input Parameters: 4102 + A,B - the matrices in mpiaij format 4103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4104 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4105 4106 Output Parameter: 4107 + rowb, colb - index sets of rows and columns of B to extract 4108 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4109 - B_seq - the sequential matrix generated 4110 4111 Level: developer 4112 4113 @*/ 4114 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4115 { 4116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4117 PetscErrorCode ierr; 4118 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4119 IS isrowb,iscolb; 4120 Mat *bseq; 4121 4122 PetscFunctionBegin; 4123 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4124 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4125 } 4126 if (!logkey_GetBrowsOfAcols) { 4127 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4128 } 4129 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4130 4131 if (scall == MAT_INITIAL_MATRIX){ 4132 start = A->cmap.rstart; 4133 cmap = a->garray; 4134 nzA = a->A->cmap.n; 4135 nzB = a->B->cmap.n; 4136 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4137 ncols = 0; 4138 for (i=0; i<nzB; i++) { /* row < local row index */ 4139 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4140 else break; 4141 } 4142 imark = i; 4143 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4144 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4145 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4146 ierr = PetscFree(idx);CHKERRQ(ierr); 4147 *brstart = imark; 4148 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4149 } else { 4150 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4151 isrowb = *rowb; iscolb = *colb; 4152 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4153 bseq[0] = *B_seq; 4154 } 4155 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4156 *B_seq = bseq[0]; 4157 ierr = PetscFree(bseq);CHKERRQ(ierr); 4158 if (!rowb){ 4159 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4160 } else { 4161 *rowb = isrowb; 4162 } 4163 if (!colb){ 4164 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4165 } else { 4166 *colb = iscolb; 4167 } 4168 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4169 PetscFunctionReturn(0); 4170 } 4171 4172 static PetscEvent logkey_GetBrowsOfAocols = 0; 4173 #undef __FUNCT__ 4174 #define __FUNCT__ "MatGetBrowsOfAoCols" 4175 /*@C 4176 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4177 of the OFF-DIAGONAL portion of local A 4178 4179 Collective on Mat 4180 4181 Input Parameters: 4182 + A,B - the matrices in mpiaij format 4183 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4184 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4185 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4186 4187 Output Parameter: 4188 + B_oth - the sequential matrix generated 4189 4190 Level: developer 4191 4192 @*/ 4193 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4194 { 4195 VecScatter_MPI_General *gen_to,*gen_from; 4196 PetscErrorCode ierr; 4197 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4198 Mat_SeqAIJ *b_oth; 4199 VecScatter ctx=a->Mvctx; 4200 MPI_Comm comm=ctx->comm; 4201 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4202 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4203 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4204 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4205 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4206 MPI_Status *sstatus,rstatus; 4207 PetscInt *cols,sbs,rbs; 4208 PetscScalar *vals; 4209 4210 PetscFunctionBegin; 4211 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4212 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4213 } 4214 if (!logkey_GetBrowsOfAocols) { 4215 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4216 } 4217 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4218 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4219 4220 gen_to = (VecScatter_MPI_General*)ctx->todata; 4221 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4222 rvalues = gen_from->values; /* holds the length of receiving row */ 4223 svalues = gen_to->values; /* holds the length of sending row */ 4224 nrecvs = gen_from->n; 4225 nsends = gen_to->n; 4226 4227 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4228 srow = gen_to->indices; /* local row index to be sent */ 4229 sstarts = gen_to->starts; 4230 sprocs = gen_to->procs; 4231 sstatus = gen_to->sstatus; 4232 sbs = gen_to->bs; 4233 rstarts = gen_from->starts; 4234 rprocs = gen_from->procs; 4235 rbs = gen_from->bs; 4236 4237 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4238 if (scall == MAT_INITIAL_MATRIX){ 4239 /* i-array */ 4240 /*---------*/ 4241 /* post receives */ 4242 for (i=0; i<nrecvs; i++){ 4243 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4244 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4245 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4246 } 4247 4248 /* pack the outgoing message */ 4249 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4250 rstartsj = sstartsj + nsends +1; 4251 sstartsj[0] = 0; rstartsj[0] = 0; 4252 len = 0; /* total length of j or a array to be sent */ 4253 k = 0; 4254 for (i=0; i<nsends; i++){ 4255 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4256 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4257 for (j=0; j<nrows; j++) { 4258 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4259 for (l=0; l<sbs; l++){ 4260 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4261 rowlen[j*sbs+l] = ncols; 4262 len += ncols; 4263 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4264 } 4265 k++; 4266 } 4267 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4268 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4269 } 4270 /* recvs and sends of i-array are completed */ 4271 i = nrecvs; 4272 while (i--) { 4273 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4274 } 4275 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4276 4277 /* allocate buffers for sending j and a arrays */ 4278 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4279 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4280 4281 /* create i-array of B_oth */ 4282 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4283 b_othi[0] = 0; 4284 len = 0; /* total length of j or a array to be received */ 4285 k = 0; 4286 for (i=0; i<nrecvs; i++){ 4287 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4288 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4289 for (j=0; j<nrows; j++) { 4290 b_othi[k+1] = b_othi[k] + rowlen[j]; 4291 len += rowlen[j]; k++; 4292 } 4293 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4294 } 4295 4296 /* allocate space for j and a arrrays of B_oth */ 4297 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4298 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4299 4300 /* j-array */ 4301 /*---------*/ 4302 /* post receives of j-array */ 4303 for (i=0; i<nrecvs; i++){ 4304 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4305 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4306 } 4307 4308 /* pack the outgoing message j-array */ 4309 k = 0; 4310 for (i=0; i<nsends; i++){ 4311 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4312 bufJ = bufj+sstartsj[i]; 4313 for (j=0; j<nrows; j++) { 4314 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4315 for (ll=0; ll<sbs; ll++){ 4316 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4317 for (l=0; l<ncols; l++){ 4318 *bufJ++ = cols[l]; 4319 } 4320 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4321 } 4322 } 4323 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4324 } 4325 4326 /* recvs and sends of j-array are completed */ 4327 i = nrecvs; 4328 while (i--) { 4329 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4330 } 4331 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4332 } else if (scall == MAT_REUSE_MATRIX){ 4333 sstartsj = *startsj; 4334 rstartsj = sstartsj + nsends +1; 4335 bufa = *bufa_ptr; 4336 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4337 b_otha = b_oth->a; 4338 } else { 4339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4340 } 4341 4342 /* a-array */ 4343 /*---------*/ 4344 /* post receives of a-array */ 4345 for (i=0; i<nrecvs; i++){ 4346 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4347 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4348 } 4349 4350 /* pack the outgoing message a-array */ 4351 k = 0; 4352 for (i=0; i<nsends; i++){ 4353 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4354 bufA = bufa+sstartsj[i]; 4355 for (j=0; j<nrows; j++) { 4356 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4357 for (ll=0; ll<sbs; ll++){ 4358 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4359 for (l=0; l<ncols; l++){ 4360 *bufA++ = vals[l]; 4361 } 4362 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4363 } 4364 } 4365 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4366 } 4367 /* recvs and sends of a-array are completed */ 4368 i = nrecvs; 4369 while (i--) { 4370 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4371 } 4372 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4373 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4374 4375 if (scall == MAT_INITIAL_MATRIX){ 4376 /* put together the new matrix */ 4377 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4378 4379 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4380 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4381 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4382 b_oth->free_a = PETSC_TRUE; 4383 b_oth->free_ij = PETSC_TRUE; 4384 b_oth->nonew = 0; 4385 4386 ierr = PetscFree(bufj);CHKERRQ(ierr); 4387 if (!startsj || !bufa_ptr){ 4388 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4389 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4390 } else { 4391 *startsj = sstartsj; 4392 *bufa_ptr = bufa; 4393 } 4394 } 4395 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "MatGetCommunicationStructs" 4401 /*@C 4402 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4403 4404 Not Collective 4405 4406 Input Parameters: 4407 . A - The matrix in mpiaij format 4408 4409 Output Parameter: 4410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4411 . colmap - A map from global column index to local index into lvec 4412 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4413 4414 Level: developer 4415 4416 @*/ 4417 #if defined (PETSC_USE_CTABLE) 4418 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4419 #else 4420 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4421 #endif 4422 { 4423 Mat_MPIAIJ *a; 4424 4425 PetscFunctionBegin; 4426 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4427 PetscValidPointer(lvec, 2) 4428 PetscValidPointer(colmap, 3) 4429 PetscValidPointer(multScatter, 4) 4430 a = (Mat_MPIAIJ *) A->data; 4431 if (lvec) *lvec = a->lvec; 4432 if (colmap) *colmap = a->colmap; 4433 if (multScatter) *multScatter = a->Mvctx; 4434 PetscFunctionReturn(0); 4435 } 4436 4437 EXTERN_C_BEGIN 4438 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4439 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4440 EXTERN_C_END 4441 4442 /*MC 4443 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4444 4445 Options Database Keys: 4446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4447 4448 Level: beginner 4449 4450 .seealso: MatCreateMPIAIJ 4451 M*/ 4452 4453 EXTERN_C_BEGIN 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatCreate_MPIAIJ" 4456 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4457 { 4458 Mat_MPIAIJ *b; 4459 PetscErrorCode ierr; 4460 PetscMPIInt size; 4461 4462 PetscFunctionBegin; 4463 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4464 4465 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4466 B->data = (void*)b; 4467 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4468 B->factor = 0; 4469 B->rmap.bs = 1; 4470 B->assembled = PETSC_FALSE; 4471 B->mapping = 0; 4472 4473 B->insertmode = NOT_SET_VALUES; 4474 b->size = size; 4475 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4476 4477 /* build cache for off array entries formed */ 4478 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4479 b->donotstash = PETSC_FALSE; 4480 b->colmap = 0; 4481 b->garray = 0; 4482 b->roworiented = PETSC_TRUE; 4483 4484 /* stuff used for matrix vector multiply */ 4485 b->lvec = PETSC_NULL; 4486 b->Mvctx = PETSC_NULL; 4487 4488 /* stuff for MatGetRow() */ 4489 b->rowindices = 0; 4490 b->rowvalues = 0; 4491 b->getrowactive = PETSC_FALSE; 4492 4493 4494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4495 "MatStoreValues_MPIAIJ", 4496 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4498 "MatRetrieveValues_MPIAIJ", 4499 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4501 "MatGetDiagonalBlock_MPIAIJ", 4502 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4504 "MatIsTranspose_MPIAIJ", 4505 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4507 "MatMPIAIJSetPreallocation_MPIAIJ", 4508 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4510 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4511 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4513 "MatDiagonalScaleLocal_MPIAIJ", 4514 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4516 "MatConvert_MPIAIJ_MPICSRPERM", 4517 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4519 "MatConvert_MPIAIJ_MPICRL", 4520 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4521 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 EXTERN_C_END 4525 4526 #undef __FUNCT__ 4527 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4528 /*@C 4529 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4530 and "off-diagonal" part of the matrix in CSR format. 4531 4532 Collective on MPI_Comm 4533 4534 Input Parameters: 4535 + comm - MPI communicator 4536 . m - number of local rows (Cannot be PETSC_DECIDE) 4537 . n - This value should be the same as the local size used in creating the 4538 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4539 calculated if N is given) For square matrices n is almost always m. 4540 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4541 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4542 . i - row indices for "diagonal" portion of matrix 4543 . j - column indices 4544 . a - matrix values 4545 . oi - row indices for "off-diagonal" portion of matrix 4546 . oj - column indices 4547 - oa - matrix values 4548 4549 Output Parameter: 4550 . mat - the matrix 4551 4552 Level: advanced 4553 4554 Notes: 4555 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4556 4557 The i and j indices are 0 based 4558 4559 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4560 4561 4562 .keywords: matrix, aij, compressed row, sparse, parallel 4563 4564 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4565 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4566 @*/ 4567 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4568 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4569 { 4570 PetscErrorCode ierr; 4571 Mat_MPIAIJ *maij; 4572 4573 PetscFunctionBegin; 4574 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4575 if (i[0]) { 4576 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4577 } 4578 if (oi[0]) { 4579 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4580 } 4581 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4582 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4583 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4584 maij = (Mat_MPIAIJ*) (*mat)->data; 4585 maij->donotstash = PETSC_TRUE; 4586 (*mat)->preallocated = PETSC_TRUE; 4587 4588 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4589 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4590 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4591 4592 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4593 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4594 4595 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 4600 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4601 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 PetscFunctionReturn(0); 4603 } 4604 4605 /* 4606 Special version for direct calls from Fortran 4607 */ 4608 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4609 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4610 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4611 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4612 #endif 4613 4614 /* Change these macros so can be used in void function */ 4615 #undef CHKERRQ 4616 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4617 #undef SETERRQ2 4618 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4619 #undef SETERRQ 4620 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4621 4622 EXTERN_C_BEGIN 4623 #undef __FUNCT__ 4624 #define __FUNCT__ "matsetvaluesmpiaij_" 4625 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4626 { 4627 Mat mat = *mmat; 4628 PetscInt m = *mm, n = *mn; 4629 InsertMode addv = *maddv; 4630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4631 PetscScalar value; 4632 PetscErrorCode ierr; 4633 4634 MatPreallocated(mat); 4635 if (mat->insertmode == NOT_SET_VALUES) { 4636 mat->insertmode = addv; 4637 } 4638 #if defined(PETSC_USE_DEBUG) 4639 else if (mat->insertmode != addv) { 4640 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4641 } 4642 #endif 4643 { 4644 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4645 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4646 PetscTruth roworiented = aij->roworiented; 4647 4648 /* Some Variables required in the macro */ 4649 Mat A = aij->A; 4650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4651 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4652 PetscScalar *aa = a->a; 4653 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4654 Mat B = aij->B; 4655 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4656 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4657 PetscScalar *ba = b->a; 4658 4659 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4660 PetscInt nonew = a->nonew; 4661 PetscScalar *ap1,*ap2; 4662 4663 PetscFunctionBegin; 4664 for (i=0; i<m; i++) { 4665 if (im[i] < 0) continue; 4666 #if defined(PETSC_USE_DEBUG) 4667 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4668 #endif 4669 if (im[i] >= rstart && im[i] < rend) { 4670 row = im[i] - rstart; 4671 lastcol1 = -1; 4672 rp1 = aj + ai[row]; 4673 ap1 = aa + ai[row]; 4674 rmax1 = aimax[row]; 4675 nrow1 = ailen[row]; 4676 low1 = 0; 4677 high1 = nrow1; 4678 lastcol2 = -1; 4679 rp2 = bj + bi[row]; 4680 ap2 = ba + bi[row]; 4681 rmax2 = bimax[row]; 4682 nrow2 = bilen[row]; 4683 low2 = 0; 4684 high2 = nrow2; 4685 4686 for (j=0; j<n; j++) { 4687 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4688 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4689 if (in[j] >= cstart && in[j] < cend){ 4690 col = in[j] - cstart; 4691 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4692 } else if (in[j] < 0) continue; 4693 #if defined(PETSC_USE_DEBUG) 4694 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4695 #endif 4696 else { 4697 if (mat->was_assembled) { 4698 if (!aij->colmap) { 4699 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4700 } 4701 #if defined (PETSC_USE_CTABLE) 4702 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4703 col--; 4704 #else 4705 col = aij->colmap[in[j]] - 1; 4706 #endif 4707 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4708 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4709 col = in[j]; 4710 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4711 B = aij->B; 4712 b = (Mat_SeqAIJ*)B->data; 4713 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4714 rp2 = bj + bi[row]; 4715 ap2 = ba + bi[row]; 4716 rmax2 = bimax[row]; 4717 nrow2 = bilen[row]; 4718 low2 = 0; 4719 high2 = nrow2; 4720 bm = aij->B->rmap.n; 4721 ba = b->a; 4722 } 4723 } else col = in[j]; 4724 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4725 } 4726 } 4727 } else { 4728 if (!aij->donotstash) { 4729 if (roworiented) { 4730 if (ignorezeroentries && v[i*n] == 0.0) continue; 4731 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4732 } else { 4733 if (ignorezeroentries && v[i] == 0.0) continue; 4734 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4735 } 4736 } 4737 } 4738 }} 4739 PetscFunctionReturnVoid(); 4740 } 4741 EXTERN_C_END 4742 4743