1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 case MAT_DO_NOT_IGNORE_ZERO_ENTRIES: 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROW_ORIENTED: 1221 a->roworiented = PETSC_TRUE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_ROWS_SORTED: 1226 case MAT_ROWS_UNSORTED: 1227 case MAT_YES_NEW_DIAGONALS: 1228 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1229 break; 1230 case MAT_COLUMN_ORIENTED: 1231 a->roworiented = PETSC_FALSE; 1232 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1233 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1234 break; 1235 case MAT_IGNORE_OFF_PROC_ENTRIES: 1236 a->donotstash = PETSC_TRUE; 1237 break; 1238 case MAT_NO_NEW_DIAGONALS: 1239 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 break; 1248 case MAT_NOT_SYMMETRIC: 1249 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1250 case MAT_NOT_HERMITIAN: 1251 case MAT_NOT_SYMMETRY_ETERNAL: 1252 break; 1253 default: 1254 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRow_MPIAIJ" 1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1264 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1265 PetscErrorCode ierr; 1266 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1267 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1268 PetscInt *cmap,*idx_p; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1279 PetscInt max = 1,tmp; 1280 for (i=0; i<matin->rmap.n; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1285 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1286 } 1287 1288 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1289 lrow = row - rstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 PetscInt imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1311 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for (i=0; i<imark; i++) { 1317 idx_p[i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!aij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1349 } 1350 aij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatNorm_MPIAIJ" 1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,j,cstart = mat->cmap.rstart; 1362 PetscReal sum = 0.0; 1363 PetscScalar *v; 1364 1365 PetscFunctionBegin; 1366 if (aij->size == 1) { 1367 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1368 } else { 1369 if (type == NORM_FROBENIUS) { 1370 v = amat->a; 1371 for (i=0; i<amat->nz; i++) { 1372 #if defined(PETSC_USE_COMPLEX) 1373 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 v = bmat->a; 1379 for (i=0; i<bmat->nz; i++) { 1380 #if defined(PETSC_USE_COMPLEX) 1381 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1382 #else 1383 sum += (*v)*(*v); v++; 1384 #endif 1385 } 1386 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 *norm = sqrt(*norm); 1388 } else if (type == NORM_1) { /* max column norm */ 1389 PetscReal *tmp,*tmp2; 1390 PetscInt *jj,*garray = aij->garray; 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1393 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1394 *norm = 0.0; 1395 v = amat->a; jj = amat->j; 1396 for (j=0; j<amat->nz; j++) { 1397 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a; jj = bmat->j; 1400 for (j=0; j<bmat->nz; j++) { 1401 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1402 } 1403 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1404 for (j=0; j<mat->cmap.N; j++) { 1405 if (tmp2[j] > *norm) *norm = tmp2[j]; 1406 } 1407 ierr = PetscFree(tmp);CHKERRQ(ierr); 1408 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1409 } else if (type == NORM_INFINITY) { /* max row norm */ 1410 PetscReal ntemp = 0.0; 1411 for (j=0; j<aij->A->rmap.n; j++) { 1412 v = amat->a + amat->i[j]; 1413 sum = 0.0; 1414 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1415 sum += PetscAbsScalar(*v); v++; 1416 } 1417 v = bmat->a + bmat->i[j]; 1418 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1419 sum += PetscAbsScalar(*v); v++; 1420 } 1421 if (sum > ntemp) ntemp = sum; 1422 } 1423 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1424 } else { 1425 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1426 } 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatTranspose_MPIAIJ" 1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1436 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1437 PetscErrorCode ierr; 1438 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1439 PetscInt cstart=A->cmap.rstart,ncol; 1440 Mat B; 1441 PetscScalar *array; 1442 1443 PetscFunctionBegin; 1444 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1445 1446 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1447 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1448 ai = Aloc->i; aj = Aloc->j; 1449 bi = Bloc->i; bj = Bloc->j; 1450 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1451 cols = d_nnz + na + 1; /* work space to be used by B part */ 1452 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1453 for (i=0; i<ai[ma]; i++){ 1454 d_nnz[aj[i]] ++; 1455 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1456 } 1457 1458 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1459 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1460 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1461 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1462 1463 /* copy over the A part */ 1464 array = Aloc->a; 1465 row = A->rmap.rstart; 1466 for (i=0; i<ma; i++) { 1467 ncol = ai[i+1]-ai[i]; 1468 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1469 row++; array += ncol; aj += ncol; 1470 } 1471 aj = Aloc->j; 1472 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1473 1474 /* copy over the B part */ 1475 array = Bloc->a; 1476 row = A->rmap.rstart; 1477 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1478 for (i=0; i<mb; i++) { 1479 ncol = bi[i+1]-bi[i]; 1480 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1481 row++; array += ncol; cols += ncol; 1482 } 1483 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1484 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1486 if (matout) { 1487 *matout = B; 1488 } else { 1489 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1496 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1497 { 1498 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1499 Mat a = aij->A,b = aij->B; 1500 PetscErrorCode ierr; 1501 PetscInt s1,s2,s3; 1502 1503 PetscFunctionBegin; 1504 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1505 if (rr) { 1506 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1507 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1508 /* Overlap communication with computation. */ 1509 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 } 1511 if (ll) { 1512 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1513 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1514 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1515 } 1516 /* scale the diagonal block */ 1517 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1518 1519 if (rr) { 1520 /* Do a scatter end and then right scale the off-diagonal block */ 1521 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1523 } 1524 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1530 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1531 { 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1537 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1542 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1543 { 1544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatEqual_MPIAIJ" 1554 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1555 { 1556 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1557 Mat a,b,c,d; 1558 PetscTruth flg; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 a = matA->A; b = matA->B; 1563 c = matB->A; d = matB->B; 1564 1565 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1566 if (flg) { 1567 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1568 } 1569 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatCopy_MPIAIJ" 1575 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1576 { 1577 PetscErrorCode ierr; 1578 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1579 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1580 1581 PetscFunctionBegin; 1582 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1583 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1584 /* because of the column compression in the off-processor part of the matrix a->B, 1585 the number of columns in a->B and b->B may be different, hence we cannot call 1586 the MatCopy() directly on the two parts. If need be, we can provide a more 1587 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1588 then copying the submatrices */ 1589 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1590 } else { 1591 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1592 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1599 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1600 { 1601 PetscErrorCode ierr; 1602 1603 PetscFunctionBegin; 1604 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #include "petscblaslapack.h" 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatAXPY_MPIAIJ" 1611 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1612 { 1613 PetscErrorCode ierr; 1614 PetscInt i; 1615 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1616 PetscBLASInt bnz,one=1; 1617 Mat_SeqAIJ *x,*y; 1618 1619 PetscFunctionBegin; 1620 if (str == SAME_NONZERO_PATTERN) { 1621 PetscScalar alpha = a; 1622 x = (Mat_SeqAIJ *)xx->A->data; 1623 y = (Mat_SeqAIJ *)yy->A->data; 1624 bnz = (PetscBLASInt)x->nz; 1625 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1626 x = (Mat_SeqAIJ *)xx->B->data; 1627 y = (Mat_SeqAIJ *)yy->B->data; 1628 bnz = (PetscBLASInt)x->nz; 1629 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1630 } else if (str == SUBSET_NONZERO_PATTERN) { 1631 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1632 1633 x = (Mat_SeqAIJ *)xx->B->data; 1634 y = (Mat_SeqAIJ *)yy->B->data; 1635 if (y->xtoy && y->XtoY != xx->B) { 1636 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1637 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1638 } 1639 if (!y->xtoy) { /* get xtoy */ 1640 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1641 y->XtoY = xx->B; 1642 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1643 } 1644 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1645 } else { 1646 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatConjugate_MPIAIJ" 1655 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1656 { 1657 #if defined(PETSC_USE_COMPLEX) 1658 PetscErrorCode ierr; 1659 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1660 1661 PetscFunctionBegin; 1662 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1663 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1664 #else 1665 PetscFunctionBegin; 1666 #endif 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatRealPart_MPIAIJ" 1672 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1673 { 1674 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1679 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1685 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1686 { 1687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1692 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #ifdef PETSC_HAVE_PBGL 1697 1698 #include <boost/parallel/mpi/bsp_process_group.hpp> 1699 #include <boost/graph/distributed/ilu_default_graph.hpp> 1700 #include <boost/graph/distributed/ilu_0_block.hpp> 1701 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1702 #include <boost/graph/distributed/petsc/interface.hpp> 1703 #include <boost/multi_array.hpp> 1704 #include <boost/parallel/distributed_property_map.hpp> 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1708 /* 1709 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1710 */ 1711 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1712 { 1713 namespace petsc = boost::distributed::petsc; 1714 1715 namespace graph_dist = boost::graph::distributed; 1716 using boost::graph::distributed::ilu_default::process_group_type; 1717 using boost::graph::ilu_permuted; 1718 1719 PetscTruth row_identity, col_identity; 1720 PetscContainer c; 1721 PetscInt m, n, M, N; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1726 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1727 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1728 if (!row_identity || !col_identity) { 1729 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1730 } 1731 1732 process_group_type pg; 1733 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1734 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1735 lgraph_type& level_graph = *lgraph_p; 1736 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1737 1738 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1739 ilu_permuted(level_graph); 1740 1741 /* put together the new matrix */ 1742 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1743 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1744 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1745 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1746 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1747 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1748 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1749 (*fact)->factor = FACTOR_LU; 1750 1751 ierr = PetscContainerCreate(A->comm, &c); 1752 ierr = PetscContainerSetPointer(c, lgraph_p); 1753 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1759 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1760 { 1761 PetscFunctionBegin; 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatSolve_MPIAIJ" 1767 /* 1768 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1769 */ 1770 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1771 { 1772 namespace graph_dist = boost::graph::distributed; 1773 1774 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1775 lgraph_type* lgraph_p; 1776 PetscContainer c; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1781 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1782 ierr = VecCopy(b, x); CHKERRQ(ierr); 1783 1784 PetscScalar* array_x; 1785 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1786 PetscInt sx; 1787 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1788 1789 PetscScalar* array_b; 1790 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1791 PetscInt sb; 1792 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1793 1794 lgraph_type& level_graph = *lgraph_p; 1795 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1796 1797 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1798 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1799 ref_x(array_x, boost::extents[num_vertices(graph)]); 1800 1801 typedef boost::iterator_property_map<array_ref_type::iterator, 1802 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1803 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1804 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1805 1806 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1807 1808 PetscFunctionReturn(0); 1809 } 1810 #endif 1811 1812 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1813 PetscInt nzlocal,nsends,nrecvs; 1814 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1815 PetscScalar *sbuf_a,**rbuf_a; 1816 PetscErrorCode (*MatDestroy)(Mat); 1817 } Mat_Redundant; 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1821 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1822 { 1823 PetscErrorCode ierr; 1824 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1825 PetscInt i; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1829 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1830 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1831 for (i=0; i<redund->nrecvs; i++){ 1832 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1833 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1834 } 1835 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1836 ierr = PetscFree(redund);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatDestroy_MatRedundant" 1842 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1843 { 1844 PetscErrorCode ierr; 1845 PetscContainer container; 1846 Mat_Redundant *redund=PETSC_NULL; 1847 1848 PetscFunctionBegin; 1849 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1850 if (container) { 1851 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1852 } else { 1853 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1854 } 1855 A->ops->destroy = redund->MatDestroy; 1856 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1857 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1858 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1864 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1865 { 1866 PetscMPIInt rank,size; 1867 MPI_Comm comm=mat->comm; 1868 PetscErrorCode ierr; 1869 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1870 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1871 PetscInt *rowrange=mat->rmap.range; 1872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1873 Mat A=aij->A,B=aij->B,C=*matredundant; 1874 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1875 PetscScalar *sbuf_a; 1876 PetscInt nzlocal=a->nz+b->nz; 1877 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1878 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1879 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1880 PetscScalar *vals,*aworkA,*aworkB; 1881 PetscMPIInt tag1,tag2,tag3,imdex; 1882 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1883 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1884 MPI_Status recv_status,*send_status; 1885 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1886 PetscInt **rbuf_j=PETSC_NULL; 1887 PetscScalar **rbuf_a=PETSC_NULL; 1888 Mat_Redundant *redund=PETSC_NULL; 1889 PetscContainer container; 1890 1891 PetscFunctionBegin; 1892 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1894 1895 if (reuse == MAT_REUSE_MATRIX) { 1896 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1897 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1898 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1899 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1900 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1901 if (container) { 1902 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1903 } else { 1904 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1905 } 1906 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1907 1908 nsends = redund->nsends; 1909 nrecvs = redund->nrecvs; 1910 send_rank = redund->send_rank; recv_rank = send_rank + size; 1911 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1912 sbuf_j = redund->sbuf_j; 1913 sbuf_a = redund->sbuf_a; 1914 rbuf_j = redund->rbuf_j; 1915 rbuf_a = redund->rbuf_a; 1916 } 1917 1918 if (reuse == MAT_INITIAL_MATRIX){ 1919 PetscMPIInt subrank,subsize; 1920 PetscInt nleftover,np_subcomm; 1921 /* get the destination processors' id send_rank, nsends and nrecvs */ 1922 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1923 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1924 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1925 recv_rank = send_rank + size; 1926 np_subcomm = size/nsubcomm; 1927 nleftover = size - nsubcomm*np_subcomm; 1928 nsends = 0; nrecvs = 0; 1929 for (i=0; i<size; i++){ /* i=rank*/ 1930 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1931 send_rank[nsends] = i; nsends++; 1932 recv_rank[nrecvs++] = i; 1933 } 1934 } 1935 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1936 i = size-nleftover-1; 1937 j = 0; 1938 while (j < nsubcomm - nleftover){ 1939 send_rank[nsends++] = i; 1940 i--; j++; 1941 } 1942 } 1943 1944 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1945 for (i=0; i<nleftover; i++){ 1946 recv_rank[nrecvs++] = size-nleftover+i; 1947 } 1948 } 1949 1950 /* allocate sbuf_j, sbuf_a */ 1951 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1952 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1953 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1954 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1955 1956 /* copy mat's local entries into the buffers */ 1957 if (reuse == MAT_INITIAL_MATRIX){ 1958 rownz_max = 0; 1959 rptr = sbuf_j; 1960 cols = sbuf_j + rend-rstart + 1; 1961 vals = sbuf_a; 1962 rptr[0] = 0; 1963 for (i=0; i<rend-rstart; i++){ 1964 row = i + rstart; 1965 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1966 ncols = nzA + nzB; 1967 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1968 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1969 /* load the column indices for this row into cols */ 1970 lwrite = 0; 1971 for (l=0; l<nzB; l++) { 1972 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1973 vals[lwrite] = aworkB[l]; 1974 cols[lwrite++] = ctmp; 1975 } 1976 } 1977 for (l=0; l<nzA; l++){ 1978 vals[lwrite] = aworkA[l]; 1979 cols[lwrite++] = cstart + cworkA[l]; 1980 } 1981 for (l=0; l<nzB; l++) { 1982 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1983 vals[lwrite] = aworkB[l]; 1984 cols[lwrite++] = ctmp; 1985 } 1986 } 1987 vals += ncols; 1988 cols += ncols; 1989 rptr[i+1] = rptr[i] + ncols; 1990 if (rownz_max < ncols) rownz_max = ncols; 1991 } 1992 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1993 } else { /* only copy matrix values into sbuf_a */ 1994 rptr = sbuf_j; 1995 vals = sbuf_a; 1996 rptr[0] = 0; 1997 for (i=0; i<rend-rstart; i++){ 1998 row = i + rstart; 1999 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2000 ncols = nzA + nzB; 2001 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2002 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2003 lwrite = 0; 2004 for (l=0; l<nzB; l++) { 2005 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2006 } 2007 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2008 for (l=0; l<nzB; l++) { 2009 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2010 } 2011 vals += ncols; 2012 rptr[i+1] = rptr[i] + ncols; 2013 } 2014 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2015 2016 /* send nzlocal to others, and recv other's nzlocal */ 2017 /*--------------------------------------------------*/ 2018 if (reuse == MAT_INITIAL_MATRIX){ 2019 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2020 s_waits2 = s_waits3 + nsends; 2021 s_waits1 = s_waits2 + nsends; 2022 r_waits1 = s_waits1 + nsends; 2023 r_waits2 = r_waits1 + nrecvs; 2024 r_waits3 = r_waits2 + nrecvs; 2025 } else { 2026 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2027 r_waits3 = s_waits3 + nsends; 2028 } 2029 2030 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2031 if (reuse == MAT_INITIAL_MATRIX){ 2032 /* get new tags to keep the communication clean */ 2033 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2034 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2035 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2036 rbuf_nz = sbuf_nz + nsends; 2037 2038 /* post receives of other's nzlocal */ 2039 for (i=0; i<nrecvs; i++){ 2040 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2041 } 2042 /* send nzlocal to others */ 2043 for (i=0; i<nsends; i++){ 2044 sbuf_nz[i] = nzlocal; 2045 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2046 } 2047 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2048 count = nrecvs; 2049 while (count) { 2050 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2051 recv_rank[imdex] = recv_status.MPI_SOURCE; 2052 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2053 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2054 2055 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2056 rbuf_nz[imdex] += i + 2; 2057 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2058 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2059 count--; 2060 } 2061 /* wait on sends of nzlocal */ 2062 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2063 /* send mat->i,j to others, and recv from other's */ 2064 /*------------------------------------------------*/ 2065 for (i=0; i<nsends; i++){ 2066 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2067 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2068 } 2069 /* wait on receives of mat->i,j */ 2070 /*------------------------------*/ 2071 count = nrecvs; 2072 while (count) { 2073 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2074 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2075 count--; 2076 } 2077 /* wait on sends of mat->i,j */ 2078 /*---------------------------*/ 2079 if (nsends) { 2080 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2081 } 2082 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2083 2084 /* post receives, send and receive mat->a */ 2085 /*----------------------------------------*/ 2086 for (imdex=0; imdex<nrecvs; imdex++) { 2087 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2088 } 2089 for (i=0; i<nsends; i++){ 2090 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2091 } 2092 count = nrecvs; 2093 while (count) { 2094 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2095 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2096 count--; 2097 } 2098 if (nsends) { 2099 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2100 } 2101 2102 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2103 2104 /* create redundant matrix */ 2105 /*-------------------------*/ 2106 if (reuse == MAT_INITIAL_MATRIX){ 2107 /* compute rownz_max for preallocation */ 2108 for (imdex=0; imdex<nrecvs; imdex++){ 2109 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2110 rptr = rbuf_j[imdex]; 2111 for (i=0; i<j; i++){ 2112 ncols = rptr[i+1] - rptr[i]; 2113 if (rownz_max < ncols) rownz_max = ncols; 2114 } 2115 } 2116 2117 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2118 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2119 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2120 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2122 } else { 2123 C = *matredundant; 2124 } 2125 2126 /* insert local matrix entries */ 2127 rptr = sbuf_j; 2128 cols = sbuf_j + rend-rstart + 1; 2129 vals = sbuf_a; 2130 for (i=0; i<rend-rstart; i++){ 2131 row = i + rstart; 2132 ncols = rptr[i+1] - rptr[i]; 2133 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2134 vals += ncols; 2135 cols += ncols; 2136 } 2137 /* insert received matrix entries */ 2138 for (imdex=0; imdex<nrecvs; imdex++){ 2139 rstart = rowrange[recv_rank[imdex]]; 2140 rend = rowrange[recv_rank[imdex]+1]; 2141 rptr = rbuf_j[imdex]; 2142 cols = rbuf_j[imdex] + rend-rstart + 1; 2143 vals = rbuf_a[imdex]; 2144 for (i=0; i<rend-rstart; i++){ 2145 row = i + rstart; 2146 ncols = rptr[i+1] - rptr[i]; 2147 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2148 vals += ncols; 2149 cols += ncols; 2150 } 2151 } 2152 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2155 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2156 if (reuse == MAT_INITIAL_MATRIX){ 2157 PetscContainer container; 2158 *matredundant = C; 2159 /* create a supporting struct and attach it to C for reuse */ 2160 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2161 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2162 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2163 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2164 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2165 2166 redund->nzlocal = nzlocal; 2167 redund->nsends = nsends; 2168 redund->nrecvs = nrecvs; 2169 redund->send_rank = send_rank; 2170 redund->sbuf_nz = sbuf_nz; 2171 redund->sbuf_j = sbuf_j; 2172 redund->sbuf_a = sbuf_a; 2173 redund->rbuf_j = rbuf_j; 2174 redund->rbuf_a = rbuf_a; 2175 2176 redund->MatDestroy = C->ops->destroy; 2177 C->ops->destroy = MatDestroy_MatRedundant; 2178 } 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /* -------------------------------------------------------------------*/ 2183 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2184 MatGetRow_MPIAIJ, 2185 MatRestoreRow_MPIAIJ, 2186 MatMult_MPIAIJ, 2187 /* 4*/ MatMultAdd_MPIAIJ, 2188 MatMultTranspose_MPIAIJ, 2189 MatMultTransposeAdd_MPIAIJ, 2190 #ifdef PETSC_HAVE_PBGL 2191 MatSolve_MPIAIJ, 2192 #else 2193 0, 2194 #endif 2195 0, 2196 0, 2197 /*10*/ 0, 2198 0, 2199 0, 2200 MatRelax_MPIAIJ, 2201 MatTranspose_MPIAIJ, 2202 /*15*/ MatGetInfo_MPIAIJ, 2203 MatEqual_MPIAIJ, 2204 MatGetDiagonal_MPIAIJ, 2205 MatDiagonalScale_MPIAIJ, 2206 MatNorm_MPIAIJ, 2207 /*20*/ MatAssemblyBegin_MPIAIJ, 2208 MatAssemblyEnd_MPIAIJ, 2209 0, 2210 MatSetOption_MPIAIJ, 2211 MatZeroEntries_MPIAIJ, 2212 /*25*/ MatZeroRows_MPIAIJ, 2213 0, 2214 #ifdef PETSC_HAVE_PBGL 2215 MatLUFactorNumeric_MPIAIJ, 2216 #else 2217 0, 2218 #endif 2219 0, 2220 0, 2221 /*30*/ MatSetUpPreallocation_MPIAIJ, 2222 #ifdef PETSC_HAVE_PBGL 2223 MatILUFactorSymbolic_MPIAIJ, 2224 #else 2225 0, 2226 #endif 2227 0, 2228 0, 2229 0, 2230 /*35*/ MatDuplicate_MPIAIJ, 2231 0, 2232 0, 2233 0, 2234 0, 2235 /*40*/ MatAXPY_MPIAIJ, 2236 MatGetSubMatrices_MPIAIJ, 2237 MatIncreaseOverlap_MPIAIJ, 2238 MatGetValues_MPIAIJ, 2239 MatCopy_MPIAIJ, 2240 /*45*/ 0, 2241 MatScale_MPIAIJ, 2242 0, 2243 0, 2244 0, 2245 /*50*/ MatSetBlockSize_MPIAIJ, 2246 0, 2247 0, 2248 0, 2249 0, 2250 /*55*/ MatFDColoringCreate_MPIAIJ, 2251 0, 2252 MatSetUnfactored_MPIAIJ, 2253 MatPermute_MPIAIJ, 2254 0, 2255 /*60*/ MatGetSubMatrix_MPIAIJ, 2256 MatDestroy_MPIAIJ, 2257 MatView_MPIAIJ, 2258 0, 2259 0, 2260 /*65*/ 0, 2261 0, 2262 0, 2263 0, 2264 0, 2265 /*70*/ 0, 2266 0, 2267 MatSetColoring_MPIAIJ, 2268 #if defined(PETSC_HAVE_ADIC) 2269 MatSetValuesAdic_MPIAIJ, 2270 #else 2271 0, 2272 #endif 2273 MatSetValuesAdifor_MPIAIJ, 2274 /*75*/ 0, 2275 0, 2276 0, 2277 0, 2278 0, 2279 /*80*/ 0, 2280 0, 2281 0, 2282 0, 2283 /*84*/ MatLoad_MPIAIJ, 2284 0, 2285 0, 2286 0, 2287 0, 2288 0, 2289 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2290 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2291 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2292 MatPtAP_Basic, 2293 MatPtAPSymbolic_MPIAIJ, 2294 /*95*/ MatPtAPNumeric_MPIAIJ, 2295 0, 2296 0, 2297 0, 2298 0, 2299 /*100*/0, 2300 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2301 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2302 MatConjugate_MPIAIJ, 2303 0, 2304 /*105*/MatSetValuesRow_MPIAIJ, 2305 MatRealPart_MPIAIJ, 2306 MatImaginaryPart_MPIAIJ, 2307 0, 2308 0, 2309 /*110*/0, 2310 MatGetRedundantMatrix_MPIAIJ}; 2311 2312 /* ----------------------------------------------------------------------------------------*/ 2313 2314 EXTERN_C_BEGIN 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2317 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2318 { 2319 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2324 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 EXTERN_C_END 2328 2329 EXTERN_C_BEGIN 2330 #undef __FUNCT__ 2331 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2332 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2333 { 2334 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2335 PetscErrorCode ierr; 2336 2337 PetscFunctionBegin; 2338 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2339 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 EXTERN_C_END 2343 2344 #include "petscpc.h" 2345 EXTERN_C_BEGIN 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2348 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2349 { 2350 Mat_MPIAIJ *b; 2351 PetscErrorCode ierr; 2352 PetscInt i; 2353 2354 PetscFunctionBegin; 2355 B->preallocated = PETSC_TRUE; 2356 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2357 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2358 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2359 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2360 2361 B->rmap.bs = B->cmap.bs = 1; 2362 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2363 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2364 if (d_nnz) { 2365 for (i=0; i<B->rmap.n; i++) { 2366 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2367 } 2368 } 2369 if (o_nnz) { 2370 for (i=0; i<B->rmap.n; i++) { 2371 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2372 } 2373 } 2374 b = (Mat_MPIAIJ*)B->data; 2375 2376 /* Explicitly create 2 MATSEQAIJ matrices. */ 2377 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2378 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2379 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2380 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2381 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2382 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2383 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2384 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2385 2386 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2387 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2388 2389 PetscFunctionReturn(0); 2390 } 2391 EXTERN_C_END 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2395 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2396 { 2397 Mat mat; 2398 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 *newmat = 0; 2403 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2404 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2405 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2406 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2407 a = (Mat_MPIAIJ*)mat->data; 2408 2409 mat->factor = matin->factor; 2410 mat->rmap.bs = matin->rmap.bs; 2411 mat->assembled = PETSC_TRUE; 2412 mat->insertmode = NOT_SET_VALUES; 2413 mat->preallocated = PETSC_TRUE; 2414 2415 a->size = oldmat->size; 2416 a->rank = oldmat->rank; 2417 a->donotstash = oldmat->donotstash; 2418 a->roworiented = oldmat->roworiented; 2419 a->rowindices = 0; 2420 a->rowvalues = 0; 2421 a->getrowactive = PETSC_FALSE; 2422 2423 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2424 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2425 2426 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2427 if (oldmat->colmap) { 2428 #if defined (PETSC_USE_CTABLE) 2429 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2430 #else 2431 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2432 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2433 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2434 #endif 2435 } else a->colmap = 0; 2436 if (oldmat->garray) { 2437 PetscInt len; 2438 len = oldmat->B->cmap.n; 2439 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2440 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2441 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2442 } else a->garray = 0; 2443 2444 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2445 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2446 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2447 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2448 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2450 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2452 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2453 *newmat = mat; 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #include "petscsys.h" 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatLoad_MPIAIJ" 2461 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2462 { 2463 Mat A; 2464 PetscScalar *vals,*svals; 2465 MPI_Comm comm = ((PetscObject)viewer)->comm; 2466 MPI_Status status; 2467 PetscErrorCode ierr; 2468 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2469 PetscInt i,nz,j,rstart,rend,mmax; 2470 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2471 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2472 PetscInt cend,cstart,n,*rowners; 2473 int fd; 2474 2475 PetscFunctionBegin; 2476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2477 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2478 if (!rank) { 2479 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2480 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2481 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2482 } 2483 2484 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2485 M = header[1]; N = header[2]; 2486 /* determine ownership of all rows */ 2487 m = M/size + ((M % size) > rank); 2488 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2489 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2490 2491 /* First process needs enough room for process with most rows */ 2492 if (!rank) { 2493 mmax = rowners[1]; 2494 for (i=2; i<size; i++) { 2495 mmax = PetscMax(mmax,rowners[i]); 2496 } 2497 } else mmax = m; 2498 2499 rowners[0] = 0; 2500 for (i=2; i<=size; i++) { 2501 rowners[i] += rowners[i-1]; 2502 } 2503 rstart = rowners[rank]; 2504 rend = rowners[rank+1]; 2505 2506 /* distribute row lengths to all processors */ 2507 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2508 if (!rank) { 2509 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2510 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2511 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2512 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2513 for (j=0; j<m; j++) { 2514 procsnz[0] += ourlens[j]; 2515 } 2516 for (i=1; i<size; i++) { 2517 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2518 /* calculate the number of nonzeros on each processor */ 2519 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2520 procsnz[i] += rowlengths[j]; 2521 } 2522 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2523 } 2524 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2525 } else { 2526 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2527 } 2528 2529 if (!rank) { 2530 /* determine max buffer needed and allocate it */ 2531 maxnz = 0; 2532 for (i=0; i<size; i++) { 2533 maxnz = PetscMax(maxnz,procsnz[i]); 2534 } 2535 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2536 2537 /* read in my part of the matrix column indices */ 2538 nz = procsnz[0]; 2539 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2540 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2541 2542 /* read in every one elses and ship off */ 2543 for (i=1; i<size; i++) { 2544 nz = procsnz[i]; 2545 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2546 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2547 } 2548 ierr = PetscFree(cols);CHKERRQ(ierr); 2549 } else { 2550 /* determine buffer space needed for message */ 2551 nz = 0; 2552 for (i=0; i<m; i++) { 2553 nz += ourlens[i]; 2554 } 2555 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2556 2557 /* receive message of column indices*/ 2558 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2559 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2560 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2561 } 2562 2563 /* determine column ownership if matrix is not square */ 2564 if (N != M) { 2565 n = N/size + ((N % size) > rank); 2566 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2567 cstart = cend - n; 2568 } else { 2569 cstart = rstart; 2570 cend = rend; 2571 n = cend - cstart; 2572 } 2573 2574 /* loop over local rows, determining number of off diagonal entries */ 2575 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2576 jj = 0; 2577 for (i=0; i<m; i++) { 2578 for (j=0; j<ourlens[i]; j++) { 2579 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2580 jj++; 2581 } 2582 } 2583 2584 /* create our matrix */ 2585 for (i=0; i<m; i++) { 2586 ourlens[i] -= offlens[i]; 2587 } 2588 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2589 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2590 ierr = MatSetType(A,type);CHKERRQ(ierr); 2591 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2592 2593 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2594 for (i=0; i<m; i++) { 2595 ourlens[i] += offlens[i]; 2596 } 2597 2598 if (!rank) { 2599 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2600 2601 /* read in my part of the matrix numerical values */ 2602 nz = procsnz[0]; 2603 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2604 2605 /* insert into matrix */ 2606 jj = rstart; 2607 smycols = mycols; 2608 svals = vals; 2609 for (i=0; i<m; i++) { 2610 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2611 smycols += ourlens[i]; 2612 svals += ourlens[i]; 2613 jj++; 2614 } 2615 2616 /* read in other processors and ship out */ 2617 for (i=1; i<size; i++) { 2618 nz = procsnz[i]; 2619 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2620 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2621 } 2622 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2623 } else { 2624 /* receive numeric values */ 2625 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2626 2627 /* receive message of values*/ 2628 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2629 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2630 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2631 2632 /* insert into matrix */ 2633 jj = rstart; 2634 smycols = mycols; 2635 svals = vals; 2636 for (i=0; i<m; i++) { 2637 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2638 smycols += ourlens[i]; 2639 svals += ourlens[i]; 2640 jj++; 2641 } 2642 } 2643 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2644 ierr = PetscFree(vals);CHKERRQ(ierr); 2645 ierr = PetscFree(mycols);CHKERRQ(ierr); 2646 ierr = PetscFree(rowners);CHKERRQ(ierr); 2647 2648 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 *newmat = A; 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2656 /* 2657 Not great since it makes two copies of the submatrix, first an SeqAIJ 2658 in local and then by concatenating the local matrices the end result. 2659 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2660 */ 2661 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2662 { 2663 PetscErrorCode ierr; 2664 PetscMPIInt rank,size; 2665 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2666 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2667 Mat *local,M,Mreuse; 2668 PetscScalar *vwork,*aa; 2669 MPI_Comm comm = mat->comm; 2670 Mat_SeqAIJ *aij; 2671 2672 2673 PetscFunctionBegin; 2674 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2676 2677 if (call == MAT_REUSE_MATRIX) { 2678 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2679 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2680 local = &Mreuse; 2681 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2682 } else { 2683 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2684 Mreuse = *local; 2685 ierr = PetscFree(local);CHKERRQ(ierr); 2686 } 2687 2688 /* 2689 m - number of local rows 2690 n - number of columns (same on all processors) 2691 rstart - first row in new global matrix generated 2692 */ 2693 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2694 if (call == MAT_INITIAL_MATRIX) { 2695 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2696 ii = aij->i; 2697 jj = aij->j; 2698 2699 /* 2700 Determine the number of non-zeros in the diagonal and off-diagonal 2701 portions of the matrix in order to do correct preallocation 2702 */ 2703 2704 /* first get start and end of "diagonal" columns */ 2705 if (csize == PETSC_DECIDE) { 2706 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2707 if (mglobal == n) { /* square matrix */ 2708 nlocal = m; 2709 } else { 2710 nlocal = n/size + ((n % size) > rank); 2711 } 2712 } else { 2713 nlocal = csize; 2714 } 2715 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2716 rstart = rend - nlocal; 2717 if (rank == size - 1 && rend != n) { 2718 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2719 } 2720 2721 /* next, compute all the lengths */ 2722 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2723 olens = dlens + m; 2724 for (i=0; i<m; i++) { 2725 jend = ii[i+1] - ii[i]; 2726 olen = 0; 2727 dlen = 0; 2728 for (j=0; j<jend; j++) { 2729 if (*jj < rstart || *jj >= rend) olen++; 2730 else dlen++; 2731 jj++; 2732 } 2733 olens[i] = olen; 2734 dlens[i] = dlen; 2735 } 2736 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2737 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2738 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2739 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2740 ierr = PetscFree(dlens);CHKERRQ(ierr); 2741 } else { 2742 PetscInt ml,nl; 2743 2744 M = *newmat; 2745 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2746 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2747 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2748 /* 2749 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2750 rather than the slower MatSetValues(). 2751 */ 2752 M->was_assembled = PETSC_TRUE; 2753 M->assembled = PETSC_FALSE; 2754 } 2755 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2756 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2757 ii = aij->i; 2758 jj = aij->j; 2759 aa = aij->a; 2760 for (i=0; i<m; i++) { 2761 row = rstart + i; 2762 nz = ii[i+1] - ii[i]; 2763 cwork = jj; jj += nz; 2764 vwork = aa; aa += nz; 2765 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2766 } 2767 2768 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2770 *newmat = M; 2771 2772 /* save submatrix used in processor for next request */ 2773 if (call == MAT_INITIAL_MATRIX) { 2774 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2775 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2776 } 2777 2778 PetscFunctionReturn(0); 2779 } 2780 2781 EXTERN_C_BEGIN 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2784 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2785 { 2786 PetscInt m,cstart, cend,j,nnz,i,d; 2787 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2788 const PetscInt *JJ; 2789 PetscScalar *values; 2790 PetscErrorCode ierr; 2791 2792 PetscFunctionBegin; 2793 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2794 2795 B->rmap.bs = B->cmap.bs = 1; 2796 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2797 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2798 m = B->rmap.n; 2799 cstart = B->cmap.rstart; 2800 cend = B->cmap.rend; 2801 rstart = B->rmap.rstart; 2802 2803 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2804 o_nnz = d_nnz + m; 2805 2806 for (i=0; i<m; i++) { 2807 nnz = Ii[i+1]- Ii[i]; 2808 JJ = J + Ii[i]; 2809 nnz_max = PetscMax(nnz_max,nnz); 2810 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2811 for (j=0; j<nnz; j++) { 2812 if (*JJ >= cstart) break; 2813 JJ++; 2814 } 2815 d = 0; 2816 for (; j<nnz; j++) { 2817 if (*JJ++ >= cend) break; 2818 d++; 2819 } 2820 d_nnz[i] = d; 2821 o_nnz[i] = nnz - d; 2822 } 2823 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2824 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2825 2826 if (v) values = (PetscScalar*)v; 2827 else { 2828 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2829 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2830 } 2831 2832 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2833 for (i=0; i<m; i++) { 2834 ii = i + rstart; 2835 nnz = Ii[i+1]- Ii[i]; 2836 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2837 } 2838 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2841 2842 if (!v) { 2843 ierr = PetscFree(values);CHKERRQ(ierr); 2844 } 2845 PetscFunctionReturn(0); 2846 } 2847 EXTERN_C_END 2848 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2851 /*@ 2852 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2853 (the default parallel PETSc format). 2854 2855 Collective on MPI_Comm 2856 2857 Input Parameters: 2858 + B - the matrix 2859 . i - the indices into j for the start of each local row (starts with zero) 2860 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2861 - v - optional values in the matrix 2862 2863 Level: developer 2864 2865 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2866 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2867 2868 .keywords: matrix, aij, compressed row, sparse, parallel 2869 2870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2871 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2872 @*/ 2873 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2874 { 2875 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2876 2877 PetscFunctionBegin; 2878 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2879 if (f) { 2880 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2881 } 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2887 /*@C 2888 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2889 (the default parallel PETSc format). For good matrix assembly performance 2890 the user should preallocate the matrix storage by setting the parameters 2891 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2892 performance can be increased by more than a factor of 50. 2893 2894 Collective on MPI_Comm 2895 2896 Input Parameters: 2897 + A - the matrix 2898 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2899 (same value is used for all local rows) 2900 . d_nnz - array containing the number of nonzeros in the various rows of the 2901 DIAGONAL portion of the local submatrix (possibly different for each row) 2902 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2903 The size of this array is equal to the number of local rows, i.e 'm'. 2904 You must leave room for the diagonal entry even if it is zero. 2905 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2906 submatrix (same value is used for all local rows). 2907 - o_nnz - array containing the number of nonzeros in the various rows of the 2908 OFF-DIAGONAL portion of the local submatrix (possibly different for 2909 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2910 structure. The size of this array is equal to the number 2911 of local rows, i.e 'm'. 2912 2913 If the *_nnz parameter is given then the *_nz parameter is ignored 2914 2915 The AIJ format (also called the Yale sparse matrix format or 2916 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2917 storage. The stored row and column indices begin with zero. See the users manual for details. 2918 2919 The parallel matrix is partitioned such that the first m0 rows belong to 2920 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2921 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2922 2923 The DIAGONAL portion of the local submatrix of a processor can be defined 2924 as the submatrix which is obtained by extraction the part corresponding 2925 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2926 first row that belongs to the processor, and r2 is the last row belonging 2927 to the this processor. This is a square mxm matrix. The remaining portion 2928 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2929 2930 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2931 2932 Example usage: 2933 2934 Consider the following 8x8 matrix with 34 non-zero values, that is 2935 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2936 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2937 as follows: 2938 2939 .vb 2940 1 2 0 | 0 3 0 | 0 4 2941 Proc0 0 5 6 | 7 0 0 | 8 0 2942 9 0 10 | 11 0 0 | 12 0 2943 ------------------------------------- 2944 13 0 14 | 15 16 17 | 0 0 2945 Proc1 0 18 0 | 19 20 21 | 0 0 2946 0 0 0 | 22 23 0 | 24 0 2947 ------------------------------------- 2948 Proc2 25 26 27 | 0 0 28 | 29 0 2949 30 0 0 | 31 32 33 | 0 34 2950 .ve 2951 2952 This can be represented as a collection of submatrices as: 2953 2954 .vb 2955 A B C 2956 D E F 2957 G H I 2958 .ve 2959 2960 Where the submatrices A,B,C are owned by proc0, D,E,F are 2961 owned by proc1, G,H,I are owned by proc2. 2962 2963 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2964 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2965 The 'M','N' parameters are 8,8, and have the same values on all procs. 2966 2967 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2968 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2969 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2970 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2971 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2972 matrix, ans [DF] as another SeqAIJ matrix. 2973 2974 When d_nz, o_nz parameters are specified, d_nz storage elements are 2975 allocated for every row of the local diagonal submatrix, and o_nz 2976 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2977 One way to choose d_nz and o_nz is to use the max nonzerors per local 2978 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2979 In this case, the values of d_nz,o_nz are: 2980 .vb 2981 proc0 : dnz = 2, o_nz = 2 2982 proc1 : dnz = 3, o_nz = 2 2983 proc2 : dnz = 1, o_nz = 4 2984 .ve 2985 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2986 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2987 for proc3. i.e we are using 12+15+10=37 storage locations to store 2988 34 values. 2989 2990 When d_nnz, o_nnz parameters are specified, the storage is specified 2991 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2992 In the above case the values for d_nnz,o_nnz are: 2993 .vb 2994 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2995 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2996 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2997 .ve 2998 Here the space allocated is sum of all the above values i.e 34, and 2999 hence pre-allocation is perfect. 3000 3001 Level: intermediate 3002 3003 .keywords: matrix, aij, compressed row, sparse, parallel 3004 3005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3006 MPIAIJ 3007 @*/ 3008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3009 { 3010 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3011 3012 PetscFunctionBegin; 3013 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3014 if (f) { 3015 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3016 } 3017 PetscFunctionReturn(0); 3018 } 3019 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3022 /*@C 3023 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3024 CSR format the local rows. 3025 3026 Collective on MPI_Comm 3027 3028 Input Parameters: 3029 + comm - MPI communicator 3030 . m - number of local rows (Cannot be PETSC_DECIDE) 3031 . n - This value should be the same as the local size used in creating the 3032 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3033 calculated if N is given) For square matrices n is almost always m. 3034 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3035 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3036 . i - row indices 3037 . j - column indices 3038 - a - matrix values 3039 3040 Output Parameter: 3041 . mat - the matrix 3042 3043 Level: intermediate 3044 3045 Notes: 3046 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3047 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3048 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3049 3050 The i and j indices are 0 based 3051 3052 .keywords: matrix, aij, compressed row, sparse, parallel 3053 3054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3055 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3056 @*/ 3057 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3058 { 3059 PetscErrorCode ierr; 3060 3061 PetscFunctionBegin; 3062 if (i[0]) { 3063 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3064 } 3065 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3066 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3067 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3068 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatCreateMPIAIJ" 3074 /*@C 3075 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3076 (the default parallel PETSc format). For good matrix assembly performance 3077 the user should preallocate the matrix storage by setting the parameters 3078 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3079 performance can be increased by more than a factor of 50. 3080 3081 Collective on MPI_Comm 3082 3083 Input Parameters: 3084 + comm - MPI communicator 3085 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3086 This value should be the same as the local size used in creating the 3087 y vector for the matrix-vector product y = Ax. 3088 . n - This value should be the same as the local size used in creating the 3089 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3090 calculated if N is given) For square matrices n is almost always m. 3091 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3092 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3093 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3094 (same value is used for all local rows) 3095 . d_nnz - array containing the number of nonzeros in the various rows of the 3096 DIAGONAL portion of the local submatrix (possibly different for each row) 3097 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3098 The size of this array is equal to the number of local rows, i.e 'm'. 3099 You must leave room for the diagonal entry even if it is zero. 3100 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3101 submatrix (same value is used for all local rows). 3102 - o_nnz - array containing the number of nonzeros in the various rows of the 3103 OFF-DIAGONAL portion of the local submatrix (possibly different for 3104 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3105 structure. The size of this array is equal to the number 3106 of local rows, i.e 'm'. 3107 3108 Output Parameter: 3109 . A - the matrix 3110 3111 Notes: 3112 If the *_nnz parameter is given then the *_nz parameter is ignored 3113 3114 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3115 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3116 storage requirements for this matrix. 3117 3118 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3119 processor than it must be used on all processors that share the object for 3120 that argument. 3121 3122 The user MUST specify either the local or global matrix dimensions 3123 (possibly both). 3124 3125 The parallel matrix is partitioned across processors such that the 3126 first m0 rows belong to process 0, the next m1 rows belong to 3127 process 1, the next m2 rows belong to process 2 etc.. where 3128 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3129 values corresponding to [m x N] submatrix. 3130 3131 The columns are logically partitioned with the n0 columns belonging 3132 to 0th partition, the next n1 columns belonging to the next 3133 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3134 3135 The DIAGONAL portion of the local submatrix on any given processor 3136 is the submatrix corresponding to the rows and columns m,n 3137 corresponding to the given processor. i.e diagonal matrix on 3138 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3139 etc. The remaining portion of the local submatrix [m x (N-n)] 3140 constitute the OFF-DIAGONAL portion. The example below better 3141 illustrates this concept. 3142 3143 For a square global matrix we define each processor's diagonal portion 3144 to be its local rows and the corresponding columns (a square submatrix); 3145 each processor's off-diagonal portion encompasses the remainder of the 3146 local matrix (a rectangular submatrix). 3147 3148 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3149 3150 When calling this routine with a single process communicator, a matrix of 3151 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3152 type of communicator, use the construction mechanism: 3153 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3154 3155 By default, this format uses inodes (identical nodes) when possible. 3156 We search for consecutive rows with the same nonzero structure, thereby 3157 reusing matrix information to achieve increased efficiency. 3158 3159 Options Database Keys: 3160 + -mat_no_inode - Do not use inodes 3161 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3162 - -mat_aij_oneindex - Internally use indexing starting at 1 3163 rather than 0. Note that when calling MatSetValues(), 3164 the user still MUST index entries starting at 0! 3165 3166 3167 Example usage: 3168 3169 Consider the following 8x8 matrix with 34 non-zero values, that is 3170 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3171 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3172 as follows: 3173 3174 .vb 3175 1 2 0 | 0 3 0 | 0 4 3176 Proc0 0 5 6 | 7 0 0 | 8 0 3177 9 0 10 | 11 0 0 | 12 0 3178 ------------------------------------- 3179 13 0 14 | 15 16 17 | 0 0 3180 Proc1 0 18 0 | 19 20 21 | 0 0 3181 0 0 0 | 22 23 0 | 24 0 3182 ------------------------------------- 3183 Proc2 25 26 27 | 0 0 28 | 29 0 3184 30 0 0 | 31 32 33 | 0 34 3185 .ve 3186 3187 This can be represented as a collection of submatrices as: 3188 3189 .vb 3190 A B C 3191 D E F 3192 G H I 3193 .ve 3194 3195 Where the submatrices A,B,C are owned by proc0, D,E,F are 3196 owned by proc1, G,H,I are owned by proc2. 3197 3198 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3199 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3200 The 'M','N' parameters are 8,8, and have the same values on all procs. 3201 3202 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3203 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3204 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3205 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3206 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3207 matrix, ans [DF] as another SeqAIJ matrix. 3208 3209 When d_nz, o_nz parameters are specified, d_nz storage elements are 3210 allocated for every row of the local diagonal submatrix, and o_nz 3211 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3212 One way to choose d_nz and o_nz is to use the max nonzerors per local 3213 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3214 In this case, the values of d_nz,o_nz are: 3215 .vb 3216 proc0 : dnz = 2, o_nz = 2 3217 proc1 : dnz = 3, o_nz = 2 3218 proc2 : dnz = 1, o_nz = 4 3219 .ve 3220 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3221 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3222 for proc3. i.e we are using 12+15+10=37 storage locations to store 3223 34 values. 3224 3225 When d_nnz, o_nnz parameters are specified, the storage is specified 3226 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3227 In the above case the values for d_nnz,o_nnz are: 3228 .vb 3229 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3230 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3231 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3232 .ve 3233 Here the space allocated is sum of all the above values i.e 34, and 3234 hence pre-allocation is perfect. 3235 3236 Level: intermediate 3237 3238 .keywords: matrix, aij, compressed row, sparse, parallel 3239 3240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3241 MPIAIJ, MatCreateMPIAIJWithArrays() 3242 @*/ 3243 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3244 { 3245 PetscErrorCode ierr; 3246 PetscMPIInt size; 3247 3248 PetscFunctionBegin; 3249 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3250 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3252 if (size > 1) { 3253 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3254 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3255 } else { 3256 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3257 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3258 } 3259 PetscFunctionReturn(0); 3260 } 3261 3262 #undef __FUNCT__ 3263 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3264 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3265 { 3266 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3267 3268 PetscFunctionBegin; 3269 *Ad = a->A; 3270 *Ao = a->B; 3271 *colmap = a->garray; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3277 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3278 { 3279 PetscErrorCode ierr; 3280 PetscInt i; 3281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3282 3283 PetscFunctionBegin; 3284 if (coloring->ctype == IS_COLORING_GLOBAL) { 3285 ISColoringValue *allcolors,*colors; 3286 ISColoring ocoloring; 3287 3288 /* set coloring for diagonal portion */ 3289 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3290 3291 /* set coloring for off-diagonal portion */ 3292 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3293 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3294 for (i=0; i<a->B->cmap.n; i++) { 3295 colors[i] = allcolors[a->garray[i]]; 3296 } 3297 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3298 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3299 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3300 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3301 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3302 ISColoringValue *colors; 3303 PetscInt *larray; 3304 ISColoring ocoloring; 3305 3306 /* set coloring for diagonal portion */ 3307 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3308 for (i=0; i<a->A->cmap.n; i++) { 3309 larray[i] = i + A->cmap.rstart; 3310 } 3311 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3312 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3313 for (i=0; i<a->A->cmap.n; i++) { 3314 colors[i] = coloring->colors[larray[i]]; 3315 } 3316 ierr = PetscFree(larray);CHKERRQ(ierr); 3317 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3318 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3319 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3320 3321 /* set coloring for off-diagonal portion */ 3322 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3323 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3324 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3325 for (i=0; i<a->B->cmap.n; i++) { 3326 colors[i] = coloring->colors[larray[i]]; 3327 } 3328 ierr = PetscFree(larray);CHKERRQ(ierr); 3329 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3330 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3331 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3332 } else { 3333 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3334 } 3335 3336 PetscFunctionReturn(0); 3337 } 3338 3339 #if defined(PETSC_HAVE_ADIC) 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3342 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3343 { 3344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3345 PetscErrorCode ierr; 3346 3347 PetscFunctionBegin; 3348 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3349 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 #endif 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3356 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3357 { 3358 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3359 PetscErrorCode ierr; 3360 3361 PetscFunctionBegin; 3362 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3363 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 #undef __FUNCT__ 3368 #define __FUNCT__ "MatMerge" 3369 /*@C 3370 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3371 matrices from each processor 3372 3373 Collective on MPI_Comm 3374 3375 Input Parameters: 3376 + comm - the communicators the parallel matrix will live on 3377 . inmat - the input sequential matrices 3378 . n - number of local columns (or PETSC_DECIDE) 3379 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3380 3381 Output Parameter: 3382 . outmat - the parallel matrix generated 3383 3384 Level: advanced 3385 3386 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3387 3388 @*/ 3389 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3390 { 3391 PetscErrorCode ierr; 3392 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3393 PetscInt *indx; 3394 PetscScalar *values; 3395 3396 PetscFunctionBegin; 3397 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3398 if (scall == MAT_INITIAL_MATRIX){ 3399 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3400 if (n == PETSC_DECIDE){ 3401 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3402 } 3403 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3404 rstart -= m; 3405 3406 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3407 for (i=0;i<m;i++) { 3408 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3409 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3410 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3411 } 3412 /* This routine will ONLY return MPIAIJ type matrix */ 3413 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3414 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3415 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3416 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3417 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3418 3419 } else if (scall == MAT_REUSE_MATRIX){ 3420 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3421 } else { 3422 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3423 } 3424 3425 for (i=0;i<m;i++) { 3426 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3427 Ii = i + rstart; 3428 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3429 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3430 } 3431 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3432 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3433 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3434 3435 PetscFunctionReturn(0); 3436 } 3437 3438 #undef __FUNCT__ 3439 #define __FUNCT__ "MatFileSplit" 3440 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3441 { 3442 PetscErrorCode ierr; 3443 PetscMPIInt rank; 3444 PetscInt m,N,i,rstart,nnz; 3445 size_t len; 3446 const PetscInt *indx; 3447 PetscViewer out; 3448 char *name; 3449 Mat B; 3450 const PetscScalar *values; 3451 3452 PetscFunctionBegin; 3453 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3454 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3455 /* Should this be the type of the diagonal block of A? */ 3456 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3457 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3458 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3459 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3460 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3461 for (i=0;i<m;i++) { 3462 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3463 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3464 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3465 } 3466 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 3469 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3470 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3471 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3472 sprintf(name,"%s.%d",outfile,rank); 3473 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3474 ierr = PetscFree(name); 3475 ierr = MatView(B,out);CHKERRQ(ierr); 3476 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3477 ierr = MatDestroy(B);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3484 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3485 { 3486 PetscErrorCode ierr; 3487 Mat_Merge_SeqsToMPI *merge; 3488 PetscContainer container; 3489 3490 PetscFunctionBegin; 3491 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3492 if (container) { 3493 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3494 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3495 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3496 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3497 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3498 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3499 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3500 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3501 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3502 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3503 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3504 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3505 3506 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3507 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(merge);CHKERRQ(ierr); 3510 3511 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3512 PetscFunctionReturn(0); 3513 } 3514 3515 #include "src/mat/utils/freespace.h" 3516 #include "petscbt.h" 3517 static PetscEvent logkey_seqstompinum = 0; 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3520 /*@C 3521 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3522 matrices from each processor 3523 3524 Collective on MPI_Comm 3525 3526 Input Parameters: 3527 + comm - the communicators the parallel matrix will live on 3528 . seqmat - the input sequential matrices 3529 . m - number of local rows (or PETSC_DECIDE) 3530 . n - number of local columns (or PETSC_DECIDE) 3531 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3532 3533 Output Parameter: 3534 . mpimat - the parallel matrix generated 3535 3536 Level: advanced 3537 3538 Notes: 3539 The dimensions of the sequential matrix in each processor MUST be the same. 3540 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3541 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3542 @*/ 3543 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3544 { 3545 PetscErrorCode ierr; 3546 MPI_Comm comm=mpimat->comm; 3547 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3548 PetscMPIInt size,rank,taga,*len_s; 3549 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3550 PetscInt proc,m; 3551 PetscInt **buf_ri,**buf_rj; 3552 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3553 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3554 MPI_Request *s_waits,*r_waits; 3555 MPI_Status *status; 3556 MatScalar *aa=a->a,**abuf_r,*ba_i; 3557 Mat_Merge_SeqsToMPI *merge; 3558 PetscContainer container; 3559 3560 PetscFunctionBegin; 3561 if (!logkey_seqstompinum) { 3562 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3563 } 3564 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3565 3566 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3567 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3568 3569 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3570 if (container) { 3571 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3572 } 3573 bi = merge->bi; 3574 bj = merge->bj; 3575 buf_ri = merge->buf_ri; 3576 buf_rj = merge->buf_rj; 3577 3578 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3579 owners = merge->rowmap.range; 3580 len_s = merge->len_s; 3581 3582 /* send and recv matrix values */ 3583 /*-----------------------------*/ 3584 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3585 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3586 3587 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3588 for (proc=0,k=0; proc<size; proc++){ 3589 if (!len_s[proc]) continue; 3590 i = owners[proc]; 3591 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3592 k++; 3593 } 3594 3595 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3596 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3597 ierr = PetscFree(status);CHKERRQ(ierr); 3598 3599 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3600 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3601 3602 /* insert mat values of mpimat */ 3603 /*----------------------------*/ 3604 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3605 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3606 nextrow = buf_ri_k + merge->nrecv; 3607 nextai = nextrow + merge->nrecv; 3608 3609 for (k=0; k<merge->nrecv; k++){ 3610 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3611 nrows = *(buf_ri_k[k]); 3612 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3613 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3614 } 3615 3616 /* set values of ba */ 3617 m = merge->rowmap.n; 3618 for (i=0; i<m; i++) { 3619 arow = owners[rank] + i; 3620 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3621 bnzi = bi[i+1] - bi[i]; 3622 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3623 3624 /* add local non-zero vals of this proc's seqmat into ba */ 3625 anzi = ai[arow+1] - ai[arow]; 3626 aj = a->j + ai[arow]; 3627 aa = a->a + ai[arow]; 3628 nextaj = 0; 3629 for (j=0; nextaj<anzi; j++){ 3630 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3631 ba_i[j] += aa[nextaj++]; 3632 } 3633 } 3634 3635 /* add received vals into ba */ 3636 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3637 /* i-th row */ 3638 if (i == *nextrow[k]) { 3639 anzi = *(nextai[k]+1) - *nextai[k]; 3640 aj = buf_rj[k] + *(nextai[k]); 3641 aa = abuf_r[k] + *(nextai[k]); 3642 nextaj = 0; 3643 for (j=0; nextaj<anzi; j++){ 3644 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3645 ba_i[j] += aa[nextaj++]; 3646 } 3647 } 3648 nextrow[k]++; nextai[k]++; 3649 } 3650 } 3651 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3652 } 3653 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 3656 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3657 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3658 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3659 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3660 PetscFunctionReturn(0); 3661 } 3662 3663 static PetscEvent logkey_seqstompisym = 0; 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3666 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3667 { 3668 PetscErrorCode ierr; 3669 Mat B_mpi; 3670 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3671 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3672 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3673 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3674 PetscInt len,proc,*dnz,*onz; 3675 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3676 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3677 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3678 MPI_Status *status; 3679 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3680 PetscBT lnkbt; 3681 Mat_Merge_SeqsToMPI *merge; 3682 PetscContainer container; 3683 3684 PetscFunctionBegin; 3685 if (!logkey_seqstompisym) { 3686 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3687 } 3688 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3689 3690 /* make sure it is a PETSc comm */ 3691 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3694 3695 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3696 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3697 3698 /* determine row ownership */ 3699 /*---------------------------------------------------------*/ 3700 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3701 merge->rowmap.n = m; 3702 merge->rowmap.N = M; 3703 merge->rowmap.bs = 1; 3704 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3705 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3706 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3707 3708 m = merge->rowmap.n; 3709 M = merge->rowmap.N; 3710 owners = merge->rowmap.range; 3711 3712 /* determine the number of messages to send, their lengths */ 3713 /*---------------------------------------------------------*/ 3714 len_s = merge->len_s; 3715 3716 len = 0; /* length of buf_si[] */ 3717 merge->nsend = 0; 3718 for (proc=0; proc<size; proc++){ 3719 len_si[proc] = 0; 3720 if (proc == rank){ 3721 len_s[proc] = 0; 3722 } else { 3723 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3724 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3725 } 3726 if (len_s[proc]) { 3727 merge->nsend++; 3728 nrows = 0; 3729 for (i=owners[proc]; i<owners[proc+1]; i++){ 3730 if (ai[i+1] > ai[i]) nrows++; 3731 } 3732 len_si[proc] = 2*(nrows+1); 3733 len += len_si[proc]; 3734 } 3735 } 3736 3737 /* determine the number and length of messages to receive for ij-structure */ 3738 /*-------------------------------------------------------------------------*/ 3739 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3740 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3741 3742 /* post the Irecv of j-structure */ 3743 /*-------------------------------*/ 3744 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3745 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3746 3747 /* post the Isend of j-structure */ 3748 /*--------------------------------*/ 3749 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3750 sj_waits = si_waits + merge->nsend; 3751 3752 for (proc=0, k=0; proc<size; proc++){ 3753 if (!len_s[proc]) continue; 3754 i = owners[proc]; 3755 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3756 k++; 3757 } 3758 3759 /* receives and sends of j-structure are complete */ 3760 /*------------------------------------------------*/ 3761 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3762 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3763 3764 /* send and recv i-structure */ 3765 /*---------------------------*/ 3766 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3767 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3768 3769 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3770 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3771 for (proc=0,k=0; proc<size; proc++){ 3772 if (!len_s[proc]) continue; 3773 /* form outgoing message for i-structure: 3774 buf_si[0]: nrows to be sent 3775 [1:nrows]: row index (global) 3776 [nrows+1:2*nrows+1]: i-structure index 3777 */ 3778 /*-------------------------------------------*/ 3779 nrows = len_si[proc]/2 - 1; 3780 buf_si_i = buf_si + nrows+1; 3781 buf_si[0] = nrows; 3782 buf_si_i[0] = 0; 3783 nrows = 0; 3784 for (i=owners[proc]; i<owners[proc+1]; i++){ 3785 anzi = ai[i+1] - ai[i]; 3786 if (anzi) { 3787 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3788 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3789 nrows++; 3790 } 3791 } 3792 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3793 k++; 3794 buf_si += len_si[proc]; 3795 } 3796 3797 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3798 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3799 3800 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3801 for (i=0; i<merge->nrecv; i++){ 3802 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3803 } 3804 3805 ierr = PetscFree(len_si);CHKERRQ(ierr); 3806 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3807 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3808 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3809 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3810 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3811 ierr = PetscFree(status);CHKERRQ(ierr); 3812 3813 /* compute a local seq matrix in each processor */ 3814 /*----------------------------------------------*/ 3815 /* allocate bi array and free space for accumulating nonzero column info */ 3816 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3817 bi[0] = 0; 3818 3819 /* create and initialize a linked list */ 3820 nlnk = N+1; 3821 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3822 3823 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3824 len = 0; 3825 len = ai[owners[rank+1]] - ai[owners[rank]]; 3826 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3827 current_space = free_space; 3828 3829 /* determine symbolic info for each local row */ 3830 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3831 nextrow = buf_ri_k + merge->nrecv; 3832 nextai = nextrow + merge->nrecv; 3833 for (k=0; k<merge->nrecv; k++){ 3834 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3835 nrows = *buf_ri_k[k]; 3836 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3837 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3838 } 3839 3840 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3841 len = 0; 3842 for (i=0;i<m;i++) { 3843 bnzi = 0; 3844 /* add local non-zero cols of this proc's seqmat into lnk */ 3845 arow = owners[rank] + i; 3846 anzi = ai[arow+1] - ai[arow]; 3847 aj = a->j + ai[arow]; 3848 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3849 bnzi += nlnk; 3850 /* add received col data into lnk */ 3851 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3852 if (i == *nextrow[k]) { /* i-th row */ 3853 anzi = *(nextai[k]+1) - *nextai[k]; 3854 aj = buf_rj[k] + *nextai[k]; 3855 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3856 bnzi += nlnk; 3857 nextrow[k]++; nextai[k]++; 3858 } 3859 } 3860 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3861 3862 /* if free space is not available, make more free space */ 3863 if (current_space->local_remaining<bnzi) { 3864 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3865 nspacedouble++; 3866 } 3867 /* copy data into free space, then initialize lnk */ 3868 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3869 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3870 3871 current_space->array += bnzi; 3872 current_space->local_used += bnzi; 3873 current_space->local_remaining -= bnzi; 3874 3875 bi[i+1] = bi[i] + bnzi; 3876 } 3877 3878 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3879 3880 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3881 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3882 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3883 3884 /* create symbolic parallel matrix B_mpi */ 3885 /*---------------------------------------*/ 3886 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3887 if (n==PETSC_DECIDE) { 3888 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3889 } else { 3890 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3891 } 3892 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3893 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3894 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3895 3896 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3897 B_mpi->assembled = PETSC_FALSE; 3898 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3899 merge->bi = bi; 3900 merge->bj = bj; 3901 merge->buf_ri = buf_ri; 3902 merge->buf_rj = buf_rj; 3903 merge->coi = PETSC_NULL; 3904 merge->coj = PETSC_NULL; 3905 merge->owners_co = PETSC_NULL; 3906 3907 /* attach the supporting struct to B_mpi for reuse */ 3908 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3909 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3910 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3911 *mpimat = B_mpi; 3912 3913 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3914 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 static PetscEvent logkey_seqstompi = 0; 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatMerge_SeqsToMPI" 3921 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3922 { 3923 PetscErrorCode ierr; 3924 3925 PetscFunctionBegin; 3926 if (!logkey_seqstompi) { 3927 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3928 } 3929 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3930 if (scall == MAT_INITIAL_MATRIX){ 3931 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3932 } 3933 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3934 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3935 PetscFunctionReturn(0); 3936 } 3937 static PetscEvent logkey_getlocalmat = 0; 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatGetLocalMat" 3940 /*@C 3941 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3942 3943 Not Collective 3944 3945 Input Parameters: 3946 + A - the matrix 3947 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3948 3949 Output Parameter: 3950 . A_loc - the local sequential matrix generated 3951 3952 Level: developer 3953 3954 @*/ 3955 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3956 { 3957 PetscErrorCode ierr; 3958 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3959 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3960 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3961 PetscScalar *aa=a->a,*ba=b->a,*ca; 3962 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3963 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3964 3965 PetscFunctionBegin; 3966 if (!logkey_getlocalmat) { 3967 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3968 } 3969 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3970 if (scall == MAT_INITIAL_MATRIX){ 3971 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3972 ci[0] = 0; 3973 for (i=0; i<am; i++){ 3974 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3975 } 3976 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3977 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3978 k = 0; 3979 for (i=0; i<am; i++) { 3980 ncols_o = bi[i+1] - bi[i]; 3981 ncols_d = ai[i+1] - ai[i]; 3982 /* off-diagonal portion of A */ 3983 for (jo=0; jo<ncols_o; jo++) { 3984 col = cmap[*bj]; 3985 if (col >= cstart) break; 3986 cj[k] = col; bj++; 3987 ca[k++] = *ba++; 3988 } 3989 /* diagonal portion of A */ 3990 for (j=0; j<ncols_d; j++) { 3991 cj[k] = cstart + *aj++; 3992 ca[k++] = *aa++; 3993 } 3994 /* off-diagonal portion of A */ 3995 for (j=jo; j<ncols_o; j++) { 3996 cj[k] = cmap[*bj++]; 3997 ca[k++] = *ba++; 3998 } 3999 } 4000 /* put together the new matrix */ 4001 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4002 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4003 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4004 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4005 mat->free_a = PETSC_TRUE; 4006 mat->free_ij = PETSC_TRUE; 4007 mat->nonew = 0; 4008 } else if (scall == MAT_REUSE_MATRIX){ 4009 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4010 ci = mat->i; cj = mat->j; ca = mat->a; 4011 for (i=0; i<am; i++) { 4012 /* off-diagonal portion of A */ 4013 ncols_o = bi[i+1] - bi[i]; 4014 for (jo=0; jo<ncols_o; jo++) { 4015 col = cmap[*bj]; 4016 if (col >= cstart) break; 4017 *ca++ = *ba++; bj++; 4018 } 4019 /* diagonal portion of A */ 4020 ncols_d = ai[i+1] - ai[i]; 4021 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4022 /* off-diagonal portion of A */ 4023 for (j=jo; j<ncols_o; j++) { 4024 *ca++ = *ba++; bj++; 4025 } 4026 } 4027 } else { 4028 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4029 } 4030 4031 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 static PetscEvent logkey_getlocalmatcondensed = 0; 4036 #undef __FUNCT__ 4037 #define __FUNCT__ "MatGetLocalMatCondensed" 4038 /*@C 4039 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4040 4041 Not Collective 4042 4043 Input Parameters: 4044 + A - the matrix 4045 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4046 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4047 4048 Output Parameter: 4049 . A_loc - the local sequential matrix generated 4050 4051 Level: developer 4052 4053 @*/ 4054 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4055 { 4056 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4057 PetscErrorCode ierr; 4058 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4059 IS isrowa,iscola; 4060 Mat *aloc; 4061 4062 PetscFunctionBegin; 4063 if (!logkey_getlocalmatcondensed) { 4064 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4065 } 4066 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4067 if (!row){ 4068 start = A->rmap.rstart; end = A->rmap.rend; 4069 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4070 } else { 4071 isrowa = *row; 4072 } 4073 if (!col){ 4074 start = A->cmap.rstart; 4075 cmap = a->garray; 4076 nzA = a->A->cmap.n; 4077 nzB = a->B->cmap.n; 4078 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4079 ncols = 0; 4080 for (i=0; i<nzB; i++) { 4081 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4082 else break; 4083 } 4084 imark = i; 4085 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4086 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4087 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4088 ierr = PetscFree(idx);CHKERRQ(ierr); 4089 } else { 4090 iscola = *col; 4091 } 4092 if (scall != MAT_INITIAL_MATRIX){ 4093 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4094 aloc[0] = *A_loc; 4095 } 4096 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4097 *A_loc = aloc[0]; 4098 ierr = PetscFree(aloc);CHKERRQ(ierr); 4099 if (!row){ 4100 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4101 } 4102 if (!col){ 4103 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4104 } 4105 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4106 PetscFunctionReturn(0); 4107 } 4108 4109 static PetscEvent logkey_GetBrowsOfAcols = 0; 4110 #undef __FUNCT__ 4111 #define __FUNCT__ "MatGetBrowsOfAcols" 4112 /*@C 4113 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4114 4115 Collective on Mat 4116 4117 Input Parameters: 4118 + A,B - the matrices in mpiaij format 4119 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4120 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4121 4122 Output Parameter: 4123 + rowb, colb - index sets of rows and columns of B to extract 4124 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4125 - B_seq - the sequential matrix generated 4126 4127 Level: developer 4128 4129 @*/ 4130 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4131 { 4132 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4133 PetscErrorCode ierr; 4134 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4135 IS isrowb,iscolb; 4136 Mat *bseq; 4137 4138 PetscFunctionBegin; 4139 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4140 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4141 } 4142 if (!logkey_GetBrowsOfAcols) { 4143 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4144 } 4145 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4146 4147 if (scall == MAT_INITIAL_MATRIX){ 4148 start = A->cmap.rstart; 4149 cmap = a->garray; 4150 nzA = a->A->cmap.n; 4151 nzB = a->B->cmap.n; 4152 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4153 ncols = 0; 4154 for (i=0; i<nzB; i++) { /* row < local row index */ 4155 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4156 else break; 4157 } 4158 imark = i; 4159 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4160 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4161 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4162 ierr = PetscFree(idx);CHKERRQ(ierr); 4163 *brstart = imark; 4164 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4165 } else { 4166 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4167 isrowb = *rowb; iscolb = *colb; 4168 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4169 bseq[0] = *B_seq; 4170 } 4171 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4172 *B_seq = bseq[0]; 4173 ierr = PetscFree(bseq);CHKERRQ(ierr); 4174 if (!rowb){ 4175 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4176 } else { 4177 *rowb = isrowb; 4178 } 4179 if (!colb){ 4180 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4181 } else { 4182 *colb = iscolb; 4183 } 4184 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4185 PetscFunctionReturn(0); 4186 } 4187 4188 static PetscEvent logkey_GetBrowsOfAocols = 0; 4189 #undef __FUNCT__ 4190 #define __FUNCT__ "MatGetBrowsOfAoCols" 4191 /*@C 4192 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4193 of the OFF-DIAGONAL portion of local A 4194 4195 Collective on Mat 4196 4197 Input Parameters: 4198 + A,B - the matrices in mpiaij format 4199 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4200 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4201 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4202 4203 Output Parameter: 4204 + B_oth - the sequential matrix generated 4205 4206 Level: developer 4207 4208 @*/ 4209 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4210 { 4211 VecScatter_MPI_General *gen_to,*gen_from; 4212 PetscErrorCode ierr; 4213 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4214 Mat_SeqAIJ *b_oth; 4215 VecScatter ctx=a->Mvctx; 4216 MPI_Comm comm=ctx->comm; 4217 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4218 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4219 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4220 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4221 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4222 MPI_Status *sstatus,rstatus; 4223 PetscInt *cols,sbs,rbs; 4224 PetscScalar *vals; 4225 4226 PetscFunctionBegin; 4227 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4228 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4229 } 4230 if (!logkey_GetBrowsOfAocols) { 4231 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4232 } 4233 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4234 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4235 4236 gen_to = (VecScatter_MPI_General*)ctx->todata; 4237 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4238 rvalues = gen_from->values; /* holds the length of receiving row */ 4239 svalues = gen_to->values; /* holds the length of sending row */ 4240 nrecvs = gen_from->n; 4241 nsends = gen_to->n; 4242 4243 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4244 srow = gen_to->indices; /* local row index to be sent */ 4245 sstarts = gen_to->starts; 4246 sprocs = gen_to->procs; 4247 sstatus = gen_to->sstatus; 4248 sbs = gen_to->bs; 4249 rstarts = gen_from->starts; 4250 rprocs = gen_from->procs; 4251 rbs = gen_from->bs; 4252 4253 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4254 if (scall == MAT_INITIAL_MATRIX){ 4255 /* i-array */ 4256 /*---------*/ 4257 /* post receives */ 4258 for (i=0; i<nrecvs; i++){ 4259 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4260 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4261 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4262 } 4263 4264 /* pack the outgoing message */ 4265 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4266 rstartsj = sstartsj + nsends +1; 4267 sstartsj[0] = 0; rstartsj[0] = 0; 4268 len = 0; /* total length of j or a array to be sent */ 4269 k = 0; 4270 for (i=0; i<nsends; i++){ 4271 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4273 for (j=0; j<nrows; j++) { 4274 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4275 for (l=0; l<sbs; l++){ 4276 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4277 rowlen[j*sbs+l] = ncols; 4278 len += ncols; 4279 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4280 } 4281 k++; 4282 } 4283 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4284 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4285 } 4286 /* recvs and sends of i-array are completed */ 4287 i = nrecvs; 4288 while (i--) { 4289 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4290 } 4291 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4292 4293 /* allocate buffers for sending j and a arrays */ 4294 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4295 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4296 4297 /* create i-array of B_oth */ 4298 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4299 b_othi[0] = 0; 4300 len = 0; /* total length of j or a array to be received */ 4301 k = 0; 4302 for (i=0; i<nrecvs; i++){ 4303 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4304 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4305 for (j=0; j<nrows; j++) { 4306 b_othi[k+1] = b_othi[k] + rowlen[j]; 4307 len += rowlen[j]; k++; 4308 } 4309 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4310 } 4311 4312 /* allocate space for j and a arrrays of B_oth */ 4313 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4314 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4315 4316 /* j-array */ 4317 /*---------*/ 4318 /* post receives of j-array */ 4319 for (i=0; i<nrecvs; i++){ 4320 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4321 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4322 } 4323 4324 /* pack the outgoing message j-array */ 4325 k = 0; 4326 for (i=0; i<nsends; i++){ 4327 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4328 bufJ = bufj+sstartsj[i]; 4329 for (j=0; j<nrows; j++) { 4330 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4331 for (ll=0; ll<sbs; ll++){ 4332 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4333 for (l=0; l<ncols; l++){ 4334 *bufJ++ = cols[l]; 4335 } 4336 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4337 } 4338 } 4339 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4340 } 4341 4342 /* recvs and sends of j-array are completed */ 4343 i = nrecvs; 4344 while (i--) { 4345 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4346 } 4347 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4348 } else if (scall == MAT_REUSE_MATRIX){ 4349 sstartsj = *startsj; 4350 rstartsj = sstartsj + nsends +1; 4351 bufa = *bufa_ptr; 4352 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4353 b_otha = b_oth->a; 4354 } else { 4355 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4356 } 4357 4358 /* a-array */ 4359 /*---------*/ 4360 /* post receives of a-array */ 4361 for (i=0; i<nrecvs; i++){ 4362 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4363 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4364 } 4365 4366 /* pack the outgoing message a-array */ 4367 k = 0; 4368 for (i=0; i<nsends; i++){ 4369 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4370 bufA = bufa+sstartsj[i]; 4371 for (j=0; j<nrows; j++) { 4372 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4373 for (ll=0; ll<sbs; ll++){ 4374 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4375 for (l=0; l<ncols; l++){ 4376 *bufA++ = vals[l]; 4377 } 4378 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4379 } 4380 } 4381 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4382 } 4383 /* recvs and sends of a-array are completed */ 4384 i = nrecvs; 4385 while (i--) { 4386 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4387 } 4388 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4389 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4390 4391 if (scall == MAT_INITIAL_MATRIX){ 4392 /* put together the new matrix */ 4393 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4394 4395 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4396 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4397 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4398 b_oth->free_a = PETSC_TRUE; 4399 b_oth->free_ij = PETSC_TRUE; 4400 b_oth->nonew = 0; 4401 4402 ierr = PetscFree(bufj);CHKERRQ(ierr); 4403 if (!startsj || !bufa_ptr){ 4404 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4405 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4406 } else { 4407 *startsj = sstartsj; 4408 *bufa_ptr = bufa; 4409 } 4410 } 4411 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4412 PetscFunctionReturn(0); 4413 } 4414 4415 #undef __FUNCT__ 4416 #define __FUNCT__ "MatGetCommunicationStructs" 4417 /*@C 4418 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4419 4420 Not Collective 4421 4422 Input Parameters: 4423 . A - The matrix in mpiaij format 4424 4425 Output Parameter: 4426 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4427 . colmap - A map from global column index to local index into lvec 4428 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4429 4430 Level: developer 4431 4432 @*/ 4433 #if defined (PETSC_USE_CTABLE) 4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4435 #else 4436 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4437 #endif 4438 { 4439 Mat_MPIAIJ *a; 4440 4441 PetscFunctionBegin; 4442 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4443 PetscValidPointer(lvec, 2) 4444 PetscValidPointer(colmap, 3) 4445 PetscValidPointer(multScatter, 4) 4446 a = (Mat_MPIAIJ *) A->data; 4447 if (lvec) *lvec = a->lvec; 4448 if (colmap) *colmap = a->colmap; 4449 if (multScatter) *multScatter = a->Mvctx; 4450 PetscFunctionReturn(0); 4451 } 4452 4453 EXTERN_C_BEGIN 4454 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4455 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4456 EXTERN_C_END 4457 4458 /*MC 4459 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4460 4461 Options Database Keys: 4462 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4463 4464 Level: beginner 4465 4466 .seealso: MatCreateMPIAIJ 4467 M*/ 4468 4469 EXTERN_C_BEGIN 4470 #undef __FUNCT__ 4471 #define __FUNCT__ "MatCreate_MPIAIJ" 4472 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4473 { 4474 Mat_MPIAIJ *b; 4475 PetscErrorCode ierr; 4476 PetscMPIInt size; 4477 4478 PetscFunctionBegin; 4479 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4480 4481 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4482 B->data = (void*)b; 4483 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4484 B->factor = 0; 4485 B->rmap.bs = 1; 4486 B->assembled = PETSC_FALSE; 4487 B->mapping = 0; 4488 4489 B->insertmode = NOT_SET_VALUES; 4490 b->size = size; 4491 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4492 4493 /* build cache for off array entries formed */ 4494 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4495 b->donotstash = PETSC_FALSE; 4496 b->colmap = 0; 4497 b->garray = 0; 4498 b->roworiented = PETSC_TRUE; 4499 4500 /* stuff used for matrix vector multiply */ 4501 b->lvec = PETSC_NULL; 4502 b->Mvctx = PETSC_NULL; 4503 4504 /* stuff for MatGetRow() */ 4505 b->rowindices = 0; 4506 b->rowvalues = 0; 4507 b->getrowactive = PETSC_FALSE; 4508 4509 4510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4511 "MatStoreValues_MPIAIJ", 4512 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4514 "MatRetrieveValues_MPIAIJ", 4515 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4517 "MatGetDiagonalBlock_MPIAIJ", 4518 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4520 "MatIsTranspose_MPIAIJ", 4521 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4523 "MatMPIAIJSetPreallocation_MPIAIJ", 4524 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4526 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4527 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4529 "MatDiagonalScaleLocal_MPIAIJ", 4530 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4532 "MatConvert_MPIAIJ_MPICSRPERM", 4533 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4535 "MatConvert_MPIAIJ_MPICRL", 4536 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4537 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4538 PetscFunctionReturn(0); 4539 } 4540 EXTERN_C_END 4541 4542 #undef __FUNCT__ 4543 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4544 /*@C 4545 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4546 and "off-diagonal" part of the matrix in CSR format. 4547 4548 Collective on MPI_Comm 4549 4550 Input Parameters: 4551 + comm - MPI communicator 4552 . m - number of local rows (Cannot be PETSC_DECIDE) 4553 . n - This value should be the same as the local size used in creating the 4554 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4555 calculated if N is given) For square matrices n is almost always m. 4556 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4557 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4558 . i - row indices for "diagonal" portion of matrix 4559 . j - column indices 4560 . a - matrix values 4561 . oi - row indices for "off-diagonal" portion of matrix 4562 . oj - column indices 4563 - oa - matrix values 4564 4565 Output Parameter: 4566 . mat - the matrix 4567 4568 Level: advanced 4569 4570 Notes: 4571 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4572 4573 The i and j indices are 0 based 4574 4575 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4576 4577 4578 .keywords: matrix, aij, compressed row, sparse, parallel 4579 4580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4581 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4582 @*/ 4583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4584 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4585 { 4586 PetscErrorCode ierr; 4587 Mat_MPIAIJ *maij; 4588 4589 PetscFunctionBegin; 4590 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4591 if (i[0]) { 4592 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4593 } 4594 if (oi[0]) { 4595 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4596 } 4597 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4598 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4599 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4600 maij = (Mat_MPIAIJ*) (*mat)->data; 4601 maij->donotstash = PETSC_TRUE; 4602 (*mat)->preallocated = PETSC_TRUE; 4603 4604 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4605 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4606 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4607 4608 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4609 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4610 4611 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4614 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4615 4616 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 PetscFunctionReturn(0); 4619 } 4620 4621 /* 4622 Special version for direct calls from Fortran 4623 */ 4624 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4625 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4626 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4627 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4628 #endif 4629 4630 /* Change these macros so can be used in void function */ 4631 #undef CHKERRQ 4632 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4633 #undef SETERRQ2 4634 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4635 #undef SETERRQ 4636 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4637 4638 EXTERN_C_BEGIN 4639 #undef __FUNCT__ 4640 #define __FUNCT__ "matsetvaluesmpiaij_" 4641 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4642 { 4643 Mat mat = *mmat; 4644 PetscInt m = *mm, n = *mn; 4645 InsertMode addv = *maddv; 4646 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4647 PetscScalar value; 4648 PetscErrorCode ierr; 4649 4650 MatPreallocated(mat); 4651 if (mat->insertmode == NOT_SET_VALUES) { 4652 mat->insertmode = addv; 4653 } 4654 #if defined(PETSC_USE_DEBUG) 4655 else if (mat->insertmode != addv) { 4656 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4657 } 4658 #endif 4659 { 4660 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4661 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4662 PetscTruth roworiented = aij->roworiented; 4663 4664 /* Some Variables required in the macro */ 4665 Mat A = aij->A; 4666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4667 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4668 PetscScalar *aa = a->a; 4669 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4670 Mat B = aij->B; 4671 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4672 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4673 PetscScalar *ba = b->a; 4674 4675 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4676 PetscInt nonew = a->nonew; 4677 PetscScalar *ap1,*ap2; 4678 4679 PetscFunctionBegin; 4680 for (i=0; i<m; i++) { 4681 if (im[i] < 0) continue; 4682 #if defined(PETSC_USE_DEBUG) 4683 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4684 #endif 4685 if (im[i] >= rstart && im[i] < rend) { 4686 row = im[i] - rstart; 4687 lastcol1 = -1; 4688 rp1 = aj + ai[row]; 4689 ap1 = aa + ai[row]; 4690 rmax1 = aimax[row]; 4691 nrow1 = ailen[row]; 4692 low1 = 0; 4693 high1 = nrow1; 4694 lastcol2 = -1; 4695 rp2 = bj + bi[row]; 4696 ap2 = ba + bi[row]; 4697 rmax2 = bimax[row]; 4698 nrow2 = bilen[row]; 4699 low2 = 0; 4700 high2 = nrow2; 4701 4702 for (j=0; j<n; j++) { 4703 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4704 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4705 if (in[j] >= cstart && in[j] < cend){ 4706 col = in[j] - cstart; 4707 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4708 } else if (in[j] < 0) continue; 4709 #if defined(PETSC_USE_DEBUG) 4710 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4711 #endif 4712 else { 4713 if (mat->was_assembled) { 4714 if (!aij->colmap) { 4715 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4716 } 4717 #if defined (PETSC_USE_CTABLE) 4718 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4719 col--; 4720 #else 4721 col = aij->colmap[in[j]] - 1; 4722 #endif 4723 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4724 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4725 col = in[j]; 4726 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4727 B = aij->B; 4728 b = (Mat_SeqAIJ*)B->data; 4729 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4730 rp2 = bj + bi[row]; 4731 ap2 = ba + bi[row]; 4732 rmax2 = bimax[row]; 4733 nrow2 = bilen[row]; 4734 low2 = 0; 4735 high2 = nrow2; 4736 bm = aij->B->rmap.n; 4737 ba = b->a; 4738 } 4739 } else col = in[j]; 4740 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4741 } 4742 } 4743 } else { 4744 if (!aij->donotstash) { 4745 if (roworiented) { 4746 if (ignorezeroentries && v[i*n] == 0.0) continue; 4747 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4748 } else { 4749 if (ignorezeroentries && v[i] == 0.0) continue; 4750 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4751 } 4752 } 4753 } 4754 }} 4755 PetscFunctionReturnVoid(); 4756 } 4757 EXTERN_C_END 4758 4759