1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NEW_NONZERO_LOCATIONS: 1207 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1208 case MAT_KEEP_ZEROED_ROWS: 1209 case MAT_NEW_NONZERO_LOCATION_ERR: 1210 case MAT_USE_INODES: 1211 case MAT_IGNORE_ZERO_ENTRIES: 1212 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1214 break; 1215 case MAT_ROW_ORIENTED: 1216 a->roworiented = flg; 1217 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1219 break; 1220 case MAT_NEW_DIAGONALS: 1221 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1222 break; 1223 case MAT_IGNORE_OFF_PROC_ENTRIES: 1224 a->donotstash = PETSC_TRUE; 1225 break; 1226 case MAT_SYMMETRIC: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 case MAT_STRUCTURALLY_SYMMETRIC: 1230 case MAT_HERMITIAN: 1231 case MAT_SYMMETRY_ETERNAL: 1232 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1233 break; 1234 default: 1235 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatGetRow_MPIAIJ" 1242 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1243 { 1244 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1245 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1246 PetscErrorCode ierr; 1247 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1248 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1249 PetscInt *cmap,*idx_p; 1250 1251 PetscFunctionBegin; 1252 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1253 mat->getrowactive = PETSC_TRUE; 1254 1255 if (!mat->rowvalues && (idx || v)) { 1256 /* 1257 allocate enough space to hold information from the longest row. 1258 */ 1259 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1260 PetscInt max = 1,tmp; 1261 for (i=0; i<matin->rmap.n; i++) { 1262 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1263 if (max < tmp) { max = tmp; } 1264 } 1265 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1266 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1267 } 1268 1269 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1270 lrow = row - rstart; 1271 1272 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1273 if (!v) {pvA = 0; pvB = 0;} 1274 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1275 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1276 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1277 nztot = nzA + nzB; 1278 1279 cmap = mat->garray; 1280 if (v || idx) { 1281 if (nztot) { 1282 /* Sort by increasing column numbers, assuming A and B already sorted */ 1283 PetscInt imark = -1; 1284 if (v) { 1285 *v = v_p = mat->rowvalues; 1286 for (i=0; i<nzB; i++) { 1287 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1288 else break; 1289 } 1290 imark = i; 1291 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1292 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1293 } 1294 if (idx) { 1295 *idx = idx_p = mat->rowindices; 1296 if (imark > -1) { 1297 for (i=0; i<imark; i++) { 1298 idx_p[i] = cmap[cworkB[i]]; 1299 } 1300 } else { 1301 for (i=0; i<nzB; i++) { 1302 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1303 else break; 1304 } 1305 imark = i; 1306 } 1307 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1308 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1309 } 1310 } else { 1311 if (idx) *idx = 0; 1312 if (v) *v = 0; 1313 } 1314 } 1315 *nz = nztot; 1316 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1317 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1323 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1326 1327 PetscFunctionBegin; 1328 if (!aij->getrowactive) { 1329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1330 } 1331 aij->getrowactive = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatNorm_MPIAIJ" 1337 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1338 { 1339 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1340 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1341 PetscErrorCode ierr; 1342 PetscInt i,j,cstart = mat->cmap.rstart; 1343 PetscReal sum = 0.0; 1344 PetscScalar *v; 1345 1346 PetscFunctionBegin; 1347 if (aij->size == 1) { 1348 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1349 } else { 1350 if (type == NORM_FROBENIUS) { 1351 v = amat->a; 1352 for (i=0; i<amat->nz; i++) { 1353 #if defined(PETSC_USE_COMPLEX) 1354 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1355 #else 1356 sum += (*v)*(*v); v++; 1357 #endif 1358 } 1359 v = bmat->a; 1360 for (i=0; i<bmat->nz; i++) { 1361 #if defined(PETSC_USE_COMPLEX) 1362 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1363 #else 1364 sum += (*v)*(*v); v++; 1365 #endif 1366 } 1367 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1368 *norm = sqrt(*norm); 1369 } else if (type == NORM_1) { /* max column norm */ 1370 PetscReal *tmp,*tmp2; 1371 PetscInt *jj,*garray = aij->garray; 1372 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1373 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1374 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1375 *norm = 0.0; 1376 v = amat->a; jj = amat->j; 1377 for (j=0; j<amat->nz; j++) { 1378 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1379 } 1380 v = bmat->a; jj = bmat->j; 1381 for (j=0; j<bmat->nz; j++) { 1382 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1383 } 1384 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1385 for (j=0; j<mat->cmap.N; j++) { 1386 if (tmp2[j] > *norm) *norm = tmp2[j]; 1387 } 1388 ierr = PetscFree(tmp);CHKERRQ(ierr); 1389 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1390 } else if (type == NORM_INFINITY) { /* max row norm */ 1391 PetscReal ntemp = 0.0; 1392 for (j=0; j<aij->A->rmap.n; j++) { 1393 v = amat->a + amat->i[j]; 1394 sum = 0.0; 1395 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1396 sum += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a + bmat->i[j]; 1399 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1400 sum += PetscAbsScalar(*v); v++; 1401 } 1402 if (sum > ntemp) ntemp = sum; 1403 } 1404 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1405 } else { 1406 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1407 } 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatTranspose_MPIAIJ" 1414 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1417 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1418 PetscErrorCode ierr; 1419 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1420 PetscInt cstart=A->cmap.rstart,ncol; 1421 Mat B; 1422 PetscScalar *array; 1423 1424 PetscFunctionBegin; 1425 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1426 1427 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1428 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1429 ai = Aloc->i; aj = Aloc->j; 1430 bi = Bloc->i; bj = Bloc->j; 1431 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1432 cols = d_nnz + na + 1; /* work space to be used by B part */ 1433 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1434 for (i=0; i<ai[ma]; i++){ 1435 d_nnz[aj[i]] ++; 1436 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1437 } 1438 1439 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1440 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1441 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1442 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1443 1444 /* copy over the A part */ 1445 array = Aloc->a; 1446 row = A->rmap.rstart; 1447 for (i=0; i<ma; i++) { 1448 ncol = ai[i+1]-ai[i]; 1449 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1450 row++; array += ncol; aj += ncol; 1451 } 1452 aj = Aloc->j; 1453 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1454 1455 /* copy over the B part */ 1456 array = Bloc->a; 1457 row = A->rmap.rstart; 1458 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1459 for (i=0; i<mb; i++) { 1460 ncol = bi[i+1]-bi[i]; 1461 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1462 row++; array += ncol; cols += ncol; 1463 } 1464 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1467 if (matout) { 1468 *matout = B; 1469 } else { 1470 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1478 { 1479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1480 Mat a = aij->A,b = aij->B; 1481 PetscErrorCode ierr; 1482 PetscInt s1,s2,s3; 1483 1484 PetscFunctionBegin; 1485 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1486 if (rr) { 1487 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1488 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1489 /* Overlap communication with computation. */ 1490 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 } 1492 if (ll) { 1493 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1494 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1495 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1496 } 1497 /* scale the diagonal block */ 1498 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1499 1500 if (rr) { 1501 /* Do a scatter end and then right scale the off-diagonal block */ 1502 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1504 } 1505 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1512 { 1513 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1518 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1524 { 1525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatEqual_MPIAIJ" 1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1536 { 1537 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1538 Mat a,b,c,d; 1539 PetscTruth flg; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 a = matA->A; b = matA->B; 1544 c = matB->A; d = matB->B; 1545 1546 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1547 if (flg) { 1548 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1549 } 1550 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCopy_MPIAIJ" 1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1557 { 1558 PetscErrorCode ierr; 1559 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1560 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1561 1562 PetscFunctionBegin; 1563 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1564 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1565 /* because of the column compression in the off-processor part of the matrix a->B, 1566 the number of columns in a->B and b->B may be different, hence we cannot call 1567 the MatCopy() directly on the two parts. If need be, we can provide a more 1568 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1569 then copying the submatrices */ 1570 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1573 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1581 { 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #include "petscblaslapack.h" 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatAXPY_MPIAIJ" 1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1593 { 1594 PetscErrorCode ierr; 1595 PetscInt i; 1596 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1597 PetscBLASInt bnz,one=1; 1598 Mat_SeqAIJ *x,*y; 1599 1600 PetscFunctionBegin; 1601 if (str == SAME_NONZERO_PATTERN) { 1602 PetscScalar alpha = a; 1603 x = (Mat_SeqAIJ *)xx->A->data; 1604 y = (Mat_SeqAIJ *)yy->A->data; 1605 bnz = (PetscBLASInt)x->nz; 1606 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1607 x = (Mat_SeqAIJ *)xx->B->data; 1608 y = (Mat_SeqAIJ *)yy->B->data; 1609 bnz = (PetscBLASInt)x->nz; 1610 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1611 } else if (str == SUBSET_NONZERO_PATTERN) { 1612 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1613 1614 x = (Mat_SeqAIJ *)xx->B->data; 1615 y = (Mat_SeqAIJ *)yy->B->data; 1616 if (y->xtoy && y->XtoY != xx->B) { 1617 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1618 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1619 } 1620 if (!y->xtoy) { /* get xtoy */ 1621 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1622 y->XtoY = xx->B; 1623 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1624 } 1625 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1626 } else { 1627 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatConjugate_MPIAIJ" 1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1637 { 1638 #if defined(PETSC_USE_COMPLEX) 1639 PetscErrorCode ierr; 1640 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1641 1642 PetscFunctionBegin; 1643 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1644 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1645 #else 1646 PetscFunctionBegin; 1647 #endif 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatRealPart_MPIAIJ" 1653 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1654 { 1655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1660 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1666 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1667 { 1668 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1673 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #ifdef PETSC_HAVE_PBGL 1678 1679 #include <boost/parallel/mpi/bsp_process_group.hpp> 1680 #include <boost/graph/distributed/ilu_default_graph.hpp> 1681 #include <boost/graph/distributed/ilu_0_block.hpp> 1682 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1683 #include <boost/graph/distributed/petsc/interface.hpp> 1684 #include <boost/multi_array.hpp> 1685 #include <boost/parallel/distributed_property_map.hpp> 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1689 /* 1690 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1691 */ 1692 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1693 { 1694 namespace petsc = boost::distributed::petsc; 1695 1696 namespace graph_dist = boost::graph::distributed; 1697 using boost::graph::distributed::ilu_default::process_group_type; 1698 using boost::graph::ilu_permuted; 1699 1700 PetscTruth row_identity, col_identity; 1701 PetscContainer c; 1702 PetscInt m, n, M, N; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1707 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1708 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1709 if (!row_identity || !col_identity) { 1710 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1711 } 1712 1713 process_group_type pg; 1714 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1715 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1716 lgraph_type& level_graph = *lgraph_p; 1717 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1718 1719 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1720 ilu_permuted(level_graph); 1721 1722 /* put together the new matrix */ 1723 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1724 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1725 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1726 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1727 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1728 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1729 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1730 (*fact)->factor = FACTOR_LU; 1731 1732 ierr = PetscContainerCreate(A->comm, &c); 1733 ierr = PetscContainerSetPointer(c, lgraph_p); 1734 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1740 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1741 { 1742 PetscFunctionBegin; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSolve_MPIAIJ" 1748 /* 1749 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1750 */ 1751 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1752 { 1753 namespace graph_dist = boost::graph::distributed; 1754 1755 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1756 lgraph_type* lgraph_p; 1757 PetscContainer c; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1762 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1763 ierr = VecCopy(b, x); CHKERRQ(ierr); 1764 1765 PetscScalar* array_x; 1766 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1767 PetscInt sx; 1768 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1769 1770 PetscScalar* array_b; 1771 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1772 PetscInt sb; 1773 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1774 1775 lgraph_type& level_graph = *lgraph_p; 1776 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1777 1778 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1779 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1780 ref_x(array_x, boost::extents[num_vertices(graph)]); 1781 1782 typedef boost::iterator_property_map<array_ref_type::iterator, 1783 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1784 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1785 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1786 1787 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1788 1789 PetscFunctionReturn(0); 1790 } 1791 #endif 1792 1793 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1794 PetscInt nzlocal,nsends,nrecvs; 1795 PetscMPIInt *send_rank; 1796 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1797 PetscScalar *sbuf_a,**rbuf_a; 1798 PetscErrorCode (*MatDestroy)(Mat); 1799 } Mat_Redundant; 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1803 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1804 { 1805 PetscErrorCode ierr; 1806 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1807 PetscInt i; 1808 1809 PetscFunctionBegin; 1810 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1811 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1812 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1813 for (i=0; i<redund->nrecvs; i++){ 1814 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1815 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1816 } 1817 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1818 ierr = PetscFree(redund);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatDestroy_MatRedundant" 1824 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1825 { 1826 PetscErrorCode ierr; 1827 PetscContainer container; 1828 Mat_Redundant *redund=PETSC_NULL; 1829 1830 PetscFunctionBegin; 1831 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1832 if (container) { 1833 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1834 } else { 1835 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1836 } 1837 A->ops->destroy = redund->MatDestroy; 1838 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1839 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1840 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1846 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1847 { 1848 PetscMPIInt rank,size; 1849 MPI_Comm comm=mat->comm; 1850 PetscErrorCode ierr; 1851 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1852 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1853 PetscInt *rowrange=mat->rmap.range; 1854 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1855 Mat A=aij->A,B=aij->B,C=*matredundant; 1856 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1857 PetscScalar *sbuf_a; 1858 PetscInt nzlocal=a->nz+b->nz; 1859 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1860 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1861 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1862 PetscScalar *vals,*aworkA,*aworkB; 1863 PetscMPIInt tag1,tag2,tag3,imdex; 1864 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1865 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1866 MPI_Status recv_status,*send_status; 1867 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1868 PetscInt **rbuf_j=PETSC_NULL; 1869 PetscScalar **rbuf_a=PETSC_NULL; 1870 Mat_Redundant *redund=PETSC_NULL; 1871 PetscContainer container; 1872 1873 PetscFunctionBegin; 1874 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1876 1877 if (reuse == MAT_REUSE_MATRIX) { 1878 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1879 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1880 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1881 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1882 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1883 if (container) { 1884 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1885 } else { 1886 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1887 } 1888 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1889 1890 nsends = redund->nsends; 1891 nrecvs = redund->nrecvs; 1892 send_rank = redund->send_rank; recv_rank = send_rank + size; 1893 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1894 sbuf_j = redund->sbuf_j; 1895 sbuf_a = redund->sbuf_a; 1896 rbuf_j = redund->rbuf_j; 1897 rbuf_a = redund->rbuf_a; 1898 } 1899 1900 if (reuse == MAT_INITIAL_MATRIX){ 1901 PetscMPIInt subrank,subsize; 1902 PetscInt nleftover,np_subcomm; 1903 /* get the destination processors' id send_rank, nsends and nrecvs */ 1904 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1905 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1906 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1907 recv_rank = send_rank + size; 1908 np_subcomm = size/nsubcomm; 1909 nleftover = size - nsubcomm*np_subcomm; 1910 nsends = 0; nrecvs = 0; 1911 for (i=0; i<size; i++){ /* i=rank*/ 1912 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1913 send_rank[nsends] = i; nsends++; 1914 recv_rank[nrecvs++] = i; 1915 } 1916 } 1917 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1918 i = size-nleftover-1; 1919 j = 0; 1920 while (j < nsubcomm - nleftover){ 1921 send_rank[nsends++] = i; 1922 i--; j++; 1923 } 1924 } 1925 1926 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1927 for (i=0; i<nleftover; i++){ 1928 recv_rank[nrecvs++] = size-nleftover+i; 1929 } 1930 } 1931 1932 /* allocate sbuf_j, sbuf_a */ 1933 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1934 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1935 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1936 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1937 1938 /* copy mat's local entries into the buffers */ 1939 if (reuse == MAT_INITIAL_MATRIX){ 1940 rownz_max = 0; 1941 rptr = sbuf_j; 1942 cols = sbuf_j + rend-rstart + 1; 1943 vals = sbuf_a; 1944 rptr[0] = 0; 1945 for (i=0; i<rend-rstart; i++){ 1946 row = i + rstart; 1947 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1948 ncols = nzA + nzB; 1949 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1950 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1951 /* load the column indices for this row into cols */ 1952 lwrite = 0; 1953 for (l=0; l<nzB; l++) { 1954 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1955 vals[lwrite] = aworkB[l]; 1956 cols[lwrite++] = ctmp; 1957 } 1958 } 1959 for (l=0; l<nzA; l++){ 1960 vals[lwrite] = aworkA[l]; 1961 cols[lwrite++] = cstart + cworkA[l]; 1962 } 1963 for (l=0; l<nzB; l++) { 1964 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1965 vals[lwrite] = aworkB[l]; 1966 cols[lwrite++] = ctmp; 1967 } 1968 } 1969 vals += ncols; 1970 cols += ncols; 1971 rptr[i+1] = rptr[i] + ncols; 1972 if (rownz_max < ncols) rownz_max = ncols; 1973 } 1974 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1975 } else { /* only copy matrix values into sbuf_a */ 1976 rptr = sbuf_j; 1977 vals = sbuf_a; 1978 rptr[0] = 0; 1979 for (i=0; i<rend-rstart; i++){ 1980 row = i + rstart; 1981 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1982 ncols = nzA + nzB; 1983 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1984 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1985 lwrite = 0; 1986 for (l=0; l<nzB; l++) { 1987 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1988 } 1989 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1990 for (l=0; l<nzB; l++) { 1991 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1992 } 1993 vals += ncols; 1994 rptr[i+1] = rptr[i] + ncols; 1995 } 1996 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1997 1998 /* send nzlocal to others, and recv other's nzlocal */ 1999 /*--------------------------------------------------*/ 2000 if (reuse == MAT_INITIAL_MATRIX){ 2001 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2002 s_waits2 = s_waits3 + nsends; 2003 s_waits1 = s_waits2 + nsends; 2004 r_waits1 = s_waits1 + nsends; 2005 r_waits2 = r_waits1 + nrecvs; 2006 r_waits3 = r_waits2 + nrecvs; 2007 } else { 2008 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2009 r_waits3 = s_waits3 + nsends; 2010 } 2011 2012 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2013 if (reuse == MAT_INITIAL_MATRIX){ 2014 /* get new tags to keep the communication clean */ 2015 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2016 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2017 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2018 rbuf_nz = sbuf_nz + nsends; 2019 2020 /* post receives of other's nzlocal */ 2021 for (i=0; i<nrecvs; i++){ 2022 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2023 } 2024 /* send nzlocal to others */ 2025 for (i=0; i<nsends; i++){ 2026 sbuf_nz[i] = nzlocal; 2027 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2028 } 2029 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2030 count = nrecvs; 2031 while (count) { 2032 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2033 recv_rank[imdex] = recv_status.MPI_SOURCE; 2034 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2035 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2036 2037 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2038 rbuf_nz[imdex] += i + 2; 2039 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2040 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2041 count--; 2042 } 2043 /* wait on sends of nzlocal */ 2044 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2045 /* send mat->i,j to others, and recv from other's */ 2046 /*------------------------------------------------*/ 2047 for (i=0; i<nsends; i++){ 2048 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2049 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2050 } 2051 /* wait on receives of mat->i,j */ 2052 /*------------------------------*/ 2053 count = nrecvs; 2054 while (count) { 2055 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2056 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2057 count--; 2058 } 2059 /* wait on sends of mat->i,j */ 2060 /*---------------------------*/ 2061 if (nsends) { 2062 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2063 } 2064 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2065 2066 /* post receives, send and receive mat->a */ 2067 /*----------------------------------------*/ 2068 for (imdex=0; imdex<nrecvs; imdex++) { 2069 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2070 } 2071 for (i=0; i<nsends; i++){ 2072 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2073 } 2074 count = nrecvs; 2075 while (count) { 2076 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2077 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2078 count--; 2079 } 2080 if (nsends) { 2081 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2082 } 2083 2084 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2085 2086 /* create redundant matrix */ 2087 /*-------------------------*/ 2088 if (reuse == MAT_INITIAL_MATRIX){ 2089 /* compute rownz_max for preallocation */ 2090 for (imdex=0; imdex<nrecvs; imdex++){ 2091 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2092 rptr = rbuf_j[imdex]; 2093 for (i=0; i<j; i++){ 2094 ncols = rptr[i+1] - rptr[i]; 2095 if (rownz_max < ncols) rownz_max = ncols; 2096 } 2097 } 2098 2099 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2100 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2101 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2102 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2103 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2104 } else { 2105 C = *matredundant; 2106 } 2107 2108 /* insert local matrix entries */ 2109 rptr = sbuf_j; 2110 cols = sbuf_j + rend-rstart + 1; 2111 vals = sbuf_a; 2112 for (i=0; i<rend-rstart; i++){ 2113 row = i + rstart; 2114 ncols = rptr[i+1] - rptr[i]; 2115 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2116 vals += ncols; 2117 cols += ncols; 2118 } 2119 /* insert received matrix entries */ 2120 for (imdex=0; imdex<nrecvs; imdex++){ 2121 rstart = rowrange[recv_rank[imdex]]; 2122 rend = rowrange[recv_rank[imdex]+1]; 2123 rptr = rbuf_j[imdex]; 2124 cols = rbuf_j[imdex] + rend-rstart + 1; 2125 vals = rbuf_a[imdex]; 2126 for (i=0; i<rend-rstart; i++){ 2127 row = i + rstart; 2128 ncols = rptr[i+1] - rptr[i]; 2129 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2130 vals += ncols; 2131 cols += ncols; 2132 } 2133 } 2134 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2137 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2138 if (reuse == MAT_INITIAL_MATRIX){ 2139 PetscContainer container; 2140 *matredundant = C; 2141 /* create a supporting struct and attach it to C for reuse */ 2142 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2143 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2144 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2145 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2146 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2147 2148 redund->nzlocal = nzlocal; 2149 redund->nsends = nsends; 2150 redund->nrecvs = nrecvs; 2151 redund->send_rank = send_rank; 2152 redund->sbuf_nz = sbuf_nz; 2153 redund->sbuf_j = sbuf_j; 2154 redund->sbuf_a = sbuf_a; 2155 redund->rbuf_j = rbuf_j; 2156 redund->rbuf_a = rbuf_a; 2157 2158 redund->MatDestroy = C->ops->destroy; 2159 C->ops->destroy = MatDestroy_MatRedundant; 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2166 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2167 { 2168 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2169 PetscInt n = A->rmap.n; 2170 PetscInt cstart = A->cmap.rstart; 2171 PetscInt *cmap = mat->garray; 2172 PetscInt *diagIdx, *offdiagIdx; 2173 Vec diagV, offdiagV; 2174 PetscScalar *a, *diagA, *offdiagA; 2175 PetscInt r; 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2180 ierr = VecCreateSeq(A->comm, n, &diagV);CHKERRQ(ierr); 2181 ierr = VecCreateSeq(A->comm, n, &offdiagV);CHKERRQ(ierr); 2182 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2183 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2184 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2185 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2186 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2187 for(r = 0; r < n; ++r) { 2188 if (diagA[r] <= offdiagA[r]) { 2189 a[r] = diagA[r]; 2190 idx[r] = cstart + diagIdx[r]; 2191 } else { 2192 a[r] = offdiagA[r]; 2193 idx[r] = cmap[offdiagIdx[r]]; 2194 } 2195 } 2196 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2197 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2198 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2199 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2200 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2201 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 /* -------------------------------------------------------------------*/ 2206 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2207 MatGetRow_MPIAIJ, 2208 MatRestoreRow_MPIAIJ, 2209 MatMult_MPIAIJ, 2210 /* 4*/ MatMultAdd_MPIAIJ, 2211 MatMultTranspose_MPIAIJ, 2212 MatMultTransposeAdd_MPIAIJ, 2213 #ifdef PETSC_HAVE_PBGL 2214 MatSolve_MPIAIJ, 2215 #else 2216 0, 2217 #endif 2218 0, 2219 0, 2220 /*10*/ 0, 2221 0, 2222 0, 2223 MatRelax_MPIAIJ, 2224 MatTranspose_MPIAIJ, 2225 /*15*/ MatGetInfo_MPIAIJ, 2226 MatEqual_MPIAIJ, 2227 MatGetDiagonal_MPIAIJ, 2228 MatDiagonalScale_MPIAIJ, 2229 MatNorm_MPIAIJ, 2230 /*20*/ MatAssemblyBegin_MPIAIJ, 2231 MatAssemblyEnd_MPIAIJ, 2232 0, 2233 MatSetOption_MPIAIJ, 2234 MatZeroEntries_MPIAIJ, 2235 /*25*/ MatZeroRows_MPIAIJ, 2236 0, 2237 #ifdef PETSC_HAVE_PBGL 2238 MatLUFactorNumeric_MPIAIJ, 2239 #else 2240 0, 2241 #endif 2242 0, 2243 0, 2244 /*30*/ MatSetUpPreallocation_MPIAIJ, 2245 #ifdef PETSC_HAVE_PBGL 2246 MatILUFactorSymbolic_MPIAIJ, 2247 #else 2248 0, 2249 #endif 2250 0, 2251 0, 2252 0, 2253 /*35*/ MatDuplicate_MPIAIJ, 2254 0, 2255 0, 2256 0, 2257 0, 2258 /*40*/ MatAXPY_MPIAIJ, 2259 MatGetSubMatrices_MPIAIJ, 2260 MatIncreaseOverlap_MPIAIJ, 2261 MatGetValues_MPIAIJ, 2262 MatCopy_MPIAIJ, 2263 /*45*/ 0, 2264 MatScale_MPIAIJ, 2265 0, 2266 0, 2267 0, 2268 /*50*/ MatSetBlockSize_MPIAIJ, 2269 0, 2270 0, 2271 0, 2272 0, 2273 /*55*/ MatFDColoringCreate_MPIAIJ, 2274 0, 2275 MatSetUnfactored_MPIAIJ, 2276 MatPermute_MPIAIJ, 2277 0, 2278 /*60*/ MatGetSubMatrix_MPIAIJ, 2279 MatDestroy_MPIAIJ, 2280 MatView_MPIAIJ, 2281 0, 2282 0, 2283 /*65*/ 0, 2284 0, 2285 0, 2286 0, 2287 0, 2288 /*70*/ 0, 2289 0, 2290 MatSetColoring_MPIAIJ, 2291 #if defined(PETSC_HAVE_ADIC) 2292 MatSetValuesAdic_MPIAIJ, 2293 #else 2294 0, 2295 #endif 2296 MatSetValuesAdifor_MPIAIJ, 2297 /*75*/ 0, 2298 0, 2299 0, 2300 0, 2301 0, 2302 /*80*/ 0, 2303 0, 2304 0, 2305 0, 2306 /*84*/ MatLoad_MPIAIJ, 2307 0, 2308 0, 2309 0, 2310 0, 2311 0, 2312 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2313 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2314 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2315 MatPtAP_Basic, 2316 MatPtAPSymbolic_MPIAIJ, 2317 /*95*/ MatPtAPNumeric_MPIAIJ, 2318 0, 2319 0, 2320 0, 2321 0, 2322 /*100*/0, 2323 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2324 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2325 MatConjugate_MPIAIJ, 2326 0, 2327 /*105*/MatSetValuesRow_MPIAIJ, 2328 MatRealPart_MPIAIJ, 2329 MatImaginaryPart_MPIAIJ, 2330 0, 2331 0, 2332 /*110*/0, 2333 MatGetRedundantMatrix_MPIAIJ, 2334 MatGetRowMin_MPIAIJ}; 2335 2336 /* ----------------------------------------------------------------------------------------*/ 2337 2338 EXTERN_C_BEGIN 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2341 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2342 { 2343 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2344 PetscErrorCode ierr; 2345 2346 PetscFunctionBegin; 2347 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2348 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 EXTERN_C_BEGIN 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2356 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2357 { 2358 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2363 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 EXTERN_C_END 2367 2368 #include "petscpc.h" 2369 EXTERN_C_BEGIN 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2372 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2373 { 2374 Mat_MPIAIJ *b; 2375 PetscErrorCode ierr; 2376 PetscInt i; 2377 2378 PetscFunctionBegin; 2379 B->preallocated = PETSC_TRUE; 2380 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2381 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2382 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2383 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2384 2385 B->rmap.bs = B->cmap.bs = 1; 2386 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2387 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2388 if (d_nnz) { 2389 for (i=0; i<B->rmap.n; i++) { 2390 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2391 } 2392 } 2393 if (o_nnz) { 2394 for (i=0; i<B->rmap.n; i++) { 2395 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2396 } 2397 } 2398 b = (Mat_MPIAIJ*)B->data; 2399 2400 /* Explicitly create 2 MATSEQAIJ matrices. */ 2401 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2402 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2403 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2404 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2405 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2406 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2407 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2408 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2409 2410 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2411 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2412 2413 PetscFunctionReturn(0); 2414 } 2415 EXTERN_C_END 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2419 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2420 { 2421 Mat mat; 2422 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 *newmat = 0; 2427 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2428 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2429 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2430 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2431 a = (Mat_MPIAIJ*)mat->data; 2432 2433 mat->factor = matin->factor; 2434 mat->rmap.bs = matin->rmap.bs; 2435 mat->assembled = PETSC_TRUE; 2436 mat->insertmode = NOT_SET_VALUES; 2437 mat->preallocated = PETSC_TRUE; 2438 2439 a->size = oldmat->size; 2440 a->rank = oldmat->rank; 2441 a->donotstash = oldmat->donotstash; 2442 a->roworiented = oldmat->roworiented; 2443 a->rowindices = 0; 2444 a->rowvalues = 0; 2445 a->getrowactive = PETSC_FALSE; 2446 2447 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2448 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2449 2450 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2451 if (oldmat->colmap) { 2452 #if defined (PETSC_USE_CTABLE) 2453 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2454 #else 2455 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2456 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2457 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2458 #endif 2459 } else a->colmap = 0; 2460 if (oldmat->garray) { 2461 PetscInt len; 2462 len = oldmat->B->cmap.n; 2463 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2464 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2465 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2466 } else a->garray = 0; 2467 2468 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2469 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2470 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2472 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2474 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2475 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2476 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2477 *newmat = mat; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #include "petscsys.h" 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatLoad_MPIAIJ" 2485 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2486 { 2487 Mat A; 2488 PetscScalar *vals,*svals; 2489 MPI_Comm comm = ((PetscObject)viewer)->comm; 2490 MPI_Status status; 2491 PetscErrorCode ierr; 2492 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2493 PetscInt i,nz,j,rstart,rend,mmax; 2494 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2495 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2496 PetscInt cend,cstart,n,*rowners; 2497 int fd; 2498 2499 PetscFunctionBegin; 2500 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2502 if (!rank) { 2503 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2504 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2505 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2506 } 2507 2508 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2509 M = header[1]; N = header[2]; 2510 /* determine ownership of all rows */ 2511 m = M/size + ((M % size) > rank); 2512 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2513 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2514 2515 /* First process needs enough room for process with most rows */ 2516 if (!rank) { 2517 mmax = rowners[1]; 2518 for (i=2; i<size; i++) { 2519 mmax = PetscMax(mmax,rowners[i]); 2520 } 2521 } else mmax = m; 2522 2523 rowners[0] = 0; 2524 for (i=2; i<=size; i++) { 2525 rowners[i] += rowners[i-1]; 2526 } 2527 rstart = rowners[rank]; 2528 rend = rowners[rank+1]; 2529 2530 /* distribute row lengths to all processors */ 2531 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2532 if (!rank) { 2533 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2534 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2535 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2536 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2537 for (j=0; j<m; j++) { 2538 procsnz[0] += ourlens[j]; 2539 } 2540 for (i=1; i<size; i++) { 2541 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2542 /* calculate the number of nonzeros on each processor */ 2543 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2544 procsnz[i] += rowlengths[j]; 2545 } 2546 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2547 } 2548 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2549 } else { 2550 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2551 } 2552 2553 if (!rank) { 2554 /* determine max buffer needed and allocate it */ 2555 maxnz = 0; 2556 for (i=0; i<size; i++) { 2557 maxnz = PetscMax(maxnz,procsnz[i]); 2558 } 2559 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2560 2561 /* read in my part of the matrix column indices */ 2562 nz = procsnz[0]; 2563 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2564 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2565 2566 /* read in every one elses and ship off */ 2567 for (i=1; i<size; i++) { 2568 nz = procsnz[i]; 2569 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2570 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2571 } 2572 ierr = PetscFree(cols);CHKERRQ(ierr); 2573 } else { 2574 /* determine buffer space needed for message */ 2575 nz = 0; 2576 for (i=0; i<m; i++) { 2577 nz += ourlens[i]; 2578 } 2579 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2580 2581 /* receive message of column indices*/ 2582 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2583 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2584 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2585 } 2586 2587 /* determine column ownership if matrix is not square */ 2588 if (N != M) { 2589 n = N/size + ((N % size) > rank); 2590 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2591 cstart = cend - n; 2592 } else { 2593 cstart = rstart; 2594 cend = rend; 2595 n = cend - cstart; 2596 } 2597 2598 /* loop over local rows, determining number of off diagonal entries */ 2599 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2600 jj = 0; 2601 for (i=0; i<m; i++) { 2602 for (j=0; j<ourlens[i]; j++) { 2603 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2604 jj++; 2605 } 2606 } 2607 2608 /* create our matrix */ 2609 for (i=0; i<m; i++) { 2610 ourlens[i] -= offlens[i]; 2611 } 2612 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2613 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2614 ierr = MatSetType(A,type);CHKERRQ(ierr); 2615 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2616 2617 for (i=0; i<m; i++) { 2618 ourlens[i] += offlens[i]; 2619 } 2620 2621 if (!rank) { 2622 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2623 2624 /* read in my part of the matrix numerical values */ 2625 nz = procsnz[0]; 2626 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2627 2628 /* insert into matrix */ 2629 jj = rstart; 2630 smycols = mycols; 2631 svals = vals; 2632 for (i=0; i<m; i++) { 2633 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2634 smycols += ourlens[i]; 2635 svals += ourlens[i]; 2636 jj++; 2637 } 2638 2639 /* read in other processors and ship out */ 2640 for (i=1; i<size; i++) { 2641 nz = procsnz[i]; 2642 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2643 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2644 } 2645 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2646 } else { 2647 /* receive numeric values */ 2648 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2649 2650 /* receive message of values*/ 2651 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2652 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2653 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2654 2655 /* insert into matrix */ 2656 jj = rstart; 2657 smycols = mycols; 2658 svals = vals; 2659 for (i=0; i<m; i++) { 2660 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2661 smycols += ourlens[i]; 2662 svals += ourlens[i]; 2663 jj++; 2664 } 2665 } 2666 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2667 ierr = PetscFree(vals);CHKERRQ(ierr); 2668 ierr = PetscFree(mycols);CHKERRQ(ierr); 2669 ierr = PetscFree(rowners);CHKERRQ(ierr); 2670 2671 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 *newmat = A; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2679 /* 2680 Not great since it makes two copies of the submatrix, first an SeqAIJ 2681 in local and then by concatenating the local matrices the end result. 2682 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2683 */ 2684 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2685 { 2686 PetscErrorCode ierr; 2687 PetscMPIInt rank,size; 2688 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2689 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2690 Mat *local,M,Mreuse; 2691 PetscScalar *vwork,*aa; 2692 MPI_Comm comm = mat->comm; 2693 Mat_SeqAIJ *aij; 2694 2695 2696 PetscFunctionBegin; 2697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2698 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2699 2700 if (call == MAT_REUSE_MATRIX) { 2701 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2702 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2703 local = &Mreuse; 2704 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2705 } else { 2706 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2707 Mreuse = *local; 2708 ierr = PetscFree(local);CHKERRQ(ierr); 2709 } 2710 2711 /* 2712 m - number of local rows 2713 n - number of columns (same on all processors) 2714 rstart - first row in new global matrix generated 2715 */ 2716 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2717 if (call == MAT_INITIAL_MATRIX) { 2718 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2719 ii = aij->i; 2720 jj = aij->j; 2721 2722 /* 2723 Determine the number of non-zeros in the diagonal and off-diagonal 2724 portions of the matrix in order to do correct preallocation 2725 */ 2726 2727 /* first get start and end of "diagonal" columns */ 2728 if (csize == PETSC_DECIDE) { 2729 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2730 if (mglobal == n) { /* square matrix */ 2731 nlocal = m; 2732 } else { 2733 nlocal = n/size + ((n % size) > rank); 2734 } 2735 } else { 2736 nlocal = csize; 2737 } 2738 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2739 rstart = rend - nlocal; 2740 if (rank == size - 1 && rend != n) { 2741 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2742 } 2743 2744 /* next, compute all the lengths */ 2745 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2746 olens = dlens + m; 2747 for (i=0; i<m; i++) { 2748 jend = ii[i+1] - ii[i]; 2749 olen = 0; 2750 dlen = 0; 2751 for (j=0; j<jend; j++) { 2752 if (*jj < rstart || *jj >= rend) olen++; 2753 else dlen++; 2754 jj++; 2755 } 2756 olens[i] = olen; 2757 dlens[i] = dlen; 2758 } 2759 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2760 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2761 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2762 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2763 ierr = PetscFree(dlens);CHKERRQ(ierr); 2764 } else { 2765 PetscInt ml,nl; 2766 2767 M = *newmat; 2768 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2769 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2770 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2771 /* 2772 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2773 rather than the slower MatSetValues(). 2774 */ 2775 M->was_assembled = PETSC_TRUE; 2776 M->assembled = PETSC_FALSE; 2777 } 2778 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2779 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2780 ii = aij->i; 2781 jj = aij->j; 2782 aa = aij->a; 2783 for (i=0; i<m; i++) { 2784 row = rstart + i; 2785 nz = ii[i+1] - ii[i]; 2786 cwork = jj; jj += nz; 2787 vwork = aa; aa += nz; 2788 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2793 *newmat = M; 2794 2795 /* save submatrix used in processor for next request */ 2796 if (call == MAT_INITIAL_MATRIX) { 2797 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2798 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2799 } 2800 2801 PetscFunctionReturn(0); 2802 } 2803 2804 EXTERN_C_BEGIN 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2808 { 2809 PetscInt m,cstart, cend,j,nnz,i,d; 2810 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2811 const PetscInt *JJ; 2812 PetscScalar *values; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2817 2818 B->rmap.bs = B->cmap.bs = 1; 2819 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2820 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2821 m = B->rmap.n; 2822 cstart = B->cmap.rstart; 2823 cend = B->cmap.rend; 2824 rstart = B->rmap.rstart; 2825 2826 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2827 o_nnz = d_nnz + m; 2828 2829 for (i=0; i<m; i++) { 2830 nnz = Ii[i+1]- Ii[i]; 2831 JJ = J + Ii[i]; 2832 nnz_max = PetscMax(nnz_max,nnz); 2833 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2834 for (j=0; j<nnz; j++) { 2835 if (*JJ >= cstart) break; 2836 JJ++; 2837 } 2838 d = 0; 2839 for (; j<nnz; j++) { 2840 if (*JJ++ >= cend) break; 2841 d++; 2842 } 2843 d_nnz[i] = d; 2844 o_nnz[i] = nnz - d; 2845 } 2846 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2847 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2848 2849 if (v) values = (PetscScalar*)v; 2850 else { 2851 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2852 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2853 } 2854 2855 for (i=0; i<m; i++) { 2856 ii = i + rstart; 2857 nnz = Ii[i+1]- Ii[i]; 2858 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2859 } 2860 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 2863 if (!v) { 2864 ierr = PetscFree(values);CHKERRQ(ierr); 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868 EXTERN_C_END 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2872 /*@ 2873 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2874 (the default parallel PETSc format). 2875 2876 Collective on MPI_Comm 2877 2878 Input Parameters: 2879 + B - the matrix 2880 . i - the indices into j for the start of each local row (starts with zero) 2881 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2882 - v - optional values in the matrix 2883 2884 Level: developer 2885 2886 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2887 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2888 2889 .keywords: matrix, aij, compressed row, sparse, parallel 2890 2891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2892 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2893 @*/ 2894 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2895 { 2896 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2897 2898 PetscFunctionBegin; 2899 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2900 if (f) { 2901 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2902 } 2903 PetscFunctionReturn(0); 2904 } 2905 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2908 /*@C 2909 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2910 (the default parallel PETSc format). For good matrix assembly performance 2911 the user should preallocate the matrix storage by setting the parameters 2912 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2913 performance can be increased by more than a factor of 50. 2914 2915 Collective on MPI_Comm 2916 2917 Input Parameters: 2918 + A - the matrix 2919 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2920 (same value is used for all local rows) 2921 . d_nnz - array containing the number of nonzeros in the various rows of the 2922 DIAGONAL portion of the local submatrix (possibly different for each row) 2923 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2924 The size of this array is equal to the number of local rows, i.e 'm'. 2925 You must leave room for the diagonal entry even if it is zero. 2926 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2927 submatrix (same value is used for all local rows). 2928 - o_nnz - array containing the number of nonzeros in the various rows of the 2929 OFF-DIAGONAL portion of the local submatrix (possibly different for 2930 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2931 structure. The size of this array is equal to the number 2932 of local rows, i.e 'm'. 2933 2934 If the *_nnz parameter is given then the *_nz parameter is ignored 2935 2936 The AIJ format (also called the Yale sparse matrix format or 2937 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2938 storage. The stored row and column indices begin with zero. See the users manual for details. 2939 2940 The parallel matrix is partitioned such that the first m0 rows belong to 2941 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2942 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2943 2944 The DIAGONAL portion of the local submatrix of a processor can be defined 2945 as the submatrix which is obtained by extraction the part corresponding 2946 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2947 first row that belongs to the processor, and r2 is the last row belonging 2948 to the this processor. This is a square mxm matrix. The remaining portion 2949 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2950 2951 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2952 2953 Example usage: 2954 2955 Consider the following 8x8 matrix with 34 non-zero values, that is 2956 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2957 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2958 as follows: 2959 2960 .vb 2961 1 2 0 | 0 3 0 | 0 4 2962 Proc0 0 5 6 | 7 0 0 | 8 0 2963 9 0 10 | 11 0 0 | 12 0 2964 ------------------------------------- 2965 13 0 14 | 15 16 17 | 0 0 2966 Proc1 0 18 0 | 19 20 21 | 0 0 2967 0 0 0 | 22 23 0 | 24 0 2968 ------------------------------------- 2969 Proc2 25 26 27 | 0 0 28 | 29 0 2970 30 0 0 | 31 32 33 | 0 34 2971 .ve 2972 2973 This can be represented as a collection of submatrices as: 2974 2975 .vb 2976 A B C 2977 D E F 2978 G H I 2979 .ve 2980 2981 Where the submatrices A,B,C are owned by proc0, D,E,F are 2982 owned by proc1, G,H,I are owned by proc2. 2983 2984 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2985 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2986 The 'M','N' parameters are 8,8, and have the same values on all procs. 2987 2988 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2989 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2990 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2991 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2992 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2993 matrix, ans [DF] as another SeqAIJ matrix. 2994 2995 When d_nz, o_nz parameters are specified, d_nz storage elements are 2996 allocated for every row of the local diagonal submatrix, and o_nz 2997 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2998 One way to choose d_nz and o_nz is to use the max nonzerors per local 2999 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3000 In this case, the values of d_nz,o_nz are: 3001 .vb 3002 proc0 : dnz = 2, o_nz = 2 3003 proc1 : dnz = 3, o_nz = 2 3004 proc2 : dnz = 1, o_nz = 4 3005 .ve 3006 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3007 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3008 for proc3. i.e we are using 12+15+10=37 storage locations to store 3009 34 values. 3010 3011 When d_nnz, o_nnz parameters are specified, the storage is specified 3012 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3013 In the above case the values for d_nnz,o_nnz are: 3014 .vb 3015 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3016 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3017 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3018 .ve 3019 Here the space allocated is sum of all the above values i.e 34, and 3020 hence pre-allocation is perfect. 3021 3022 Level: intermediate 3023 3024 .keywords: matrix, aij, compressed row, sparse, parallel 3025 3026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3027 MPIAIJ 3028 @*/ 3029 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3030 { 3031 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3032 3033 PetscFunctionBegin; 3034 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3035 if (f) { 3036 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3037 } 3038 PetscFunctionReturn(0); 3039 } 3040 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3043 /*@C 3044 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3045 CSR format the local rows. 3046 3047 Collective on MPI_Comm 3048 3049 Input Parameters: 3050 + comm - MPI communicator 3051 . m - number of local rows (Cannot be PETSC_DECIDE) 3052 . n - This value should be the same as the local size used in creating the 3053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3054 calculated if N is given) For square matrices n is almost always m. 3055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3057 . i - row indices 3058 . j - column indices 3059 - a - matrix values 3060 3061 Output Parameter: 3062 . mat - the matrix 3063 3064 Level: intermediate 3065 3066 Notes: 3067 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3068 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3069 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3070 3071 The i and j indices are 0 based 3072 3073 .keywords: matrix, aij, compressed row, sparse, parallel 3074 3075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3076 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3077 @*/ 3078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3079 { 3080 PetscErrorCode ierr; 3081 3082 PetscFunctionBegin; 3083 if (i[0]) { 3084 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3085 } 3086 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3087 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3088 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3089 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatCreateMPIAIJ" 3095 /*@C 3096 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3097 (the default parallel PETSc format). For good matrix assembly performance 3098 the user should preallocate the matrix storage by setting the parameters 3099 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3100 performance can be increased by more than a factor of 50. 3101 3102 Collective on MPI_Comm 3103 3104 Input Parameters: 3105 + comm - MPI communicator 3106 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3107 This value should be the same as the local size used in creating the 3108 y vector for the matrix-vector product y = Ax. 3109 . n - This value should be the same as the local size used in creating the 3110 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3111 calculated if N is given) For square matrices n is almost always m. 3112 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3113 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3114 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3115 (same value is used for all local rows) 3116 . d_nnz - array containing the number of nonzeros in the various rows of the 3117 DIAGONAL portion of the local submatrix (possibly different for each row) 3118 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3119 The size of this array is equal to the number of local rows, i.e 'm'. 3120 You must leave room for the diagonal entry even if it is zero. 3121 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3122 submatrix (same value is used for all local rows). 3123 - o_nnz - array containing the number of nonzeros in the various rows of the 3124 OFF-DIAGONAL portion of the local submatrix (possibly different for 3125 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3126 structure. The size of this array is equal to the number 3127 of local rows, i.e 'm'. 3128 3129 Output Parameter: 3130 . A - the matrix 3131 3132 Notes: 3133 If the *_nnz parameter is given then the *_nz parameter is ignored 3134 3135 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3136 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3137 storage requirements for this matrix. 3138 3139 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3140 processor than it must be used on all processors that share the object for 3141 that argument. 3142 3143 The user MUST specify either the local or global matrix dimensions 3144 (possibly both). 3145 3146 The parallel matrix is partitioned across processors such that the 3147 first m0 rows belong to process 0, the next m1 rows belong to 3148 process 1, the next m2 rows belong to process 2 etc.. where 3149 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3150 values corresponding to [m x N] submatrix. 3151 3152 The columns are logically partitioned with the n0 columns belonging 3153 to 0th partition, the next n1 columns belonging to the next 3154 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3155 3156 The DIAGONAL portion of the local submatrix on any given processor 3157 is the submatrix corresponding to the rows and columns m,n 3158 corresponding to the given processor. i.e diagonal matrix on 3159 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3160 etc. The remaining portion of the local submatrix [m x (N-n)] 3161 constitute the OFF-DIAGONAL portion. The example below better 3162 illustrates this concept. 3163 3164 For a square global matrix we define each processor's diagonal portion 3165 to be its local rows and the corresponding columns (a square submatrix); 3166 each processor's off-diagonal portion encompasses the remainder of the 3167 local matrix (a rectangular submatrix). 3168 3169 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3170 3171 When calling this routine with a single process communicator, a matrix of 3172 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3173 type of communicator, use the construction mechanism: 3174 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3175 3176 By default, this format uses inodes (identical nodes) when possible. 3177 We search for consecutive rows with the same nonzero structure, thereby 3178 reusing matrix information to achieve increased efficiency. 3179 3180 Options Database Keys: 3181 + -mat_no_inode - Do not use inodes 3182 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3183 - -mat_aij_oneindex - Internally use indexing starting at 1 3184 rather than 0. Note that when calling MatSetValues(), 3185 the user still MUST index entries starting at 0! 3186 3187 3188 Example usage: 3189 3190 Consider the following 8x8 matrix with 34 non-zero values, that is 3191 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3192 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3193 as follows: 3194 3195 .vb 3196 1 2 0 | 0 3 0 | 0 4 3197 Proc0 0 5 6 | 7 0 0 | 8 0 3198 9 0 10 | 11 0 0 | 12 0 3199 ------------------------------------- 3200 13 0 14 | 15 16 17 | 0 0 3201 Proc1 0 18 0 | 19 20 21 | 0 0 3202 0 0 0 | 22 23 0 | 24 0 3203 ------------------------------------- 3204 Proc2 25 26 27 | 0 0 28 | 29 0 3205 30 0 0 | 31 32 33 | 0 34 3206 .ve 3207 3208 This can be represented as a collection of submatrices as: 3209 3210 .vb 3211 A B C 3212 D E F 3213 G H I 3214 .ve 3215 3216 Where the submatrices A,B,C are owned by proc0, D,E,F are 3217 owned by proc1, G,H,I are owned by proc2. 3218 3219 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3220 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3221 The 'M','N' parameters are 8,8, and have the same values on all procs. 3222 3223 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3224 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3225 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3226 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3227 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3228 matrix, ans [DF] as another SeqAIJ matrix. 3229 3230 When d_nz, o_nz parameters are specified, d_nz storage elements are 3231 allocated for every row of the local diagonal submatrix, and o_nz 3232 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3233 One way to choose d_nz and o_nz is to use the max nonzerors per local 3234 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3235 In this case, the values of d_nz,o_nz are: 3236 .vb 3237 proc0 : dnz = 2, o_nz = 2 3238 proc1 : dnz = 3, o_nz = 2 3239 proc2 : dnz = 1, o_nz = 4 3240 .ve 3241 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3242 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3243 for proc3. i.e we are using 12+15+10=37 storage locations to store 3244 34 values. 3245 3246 When d_nnz, o_nnz parameters are specified, the storage is specified 3247 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3248 In the above case the values for d_nnz,o_nnz are: 3249 .vb 3250 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3251 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3252 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3253 .ve 3254 Here the space allocated is sum of all the above values i.e 34, and 3255 hence pre-allocation is perfect. 3256 3257 Level: intermediate 3258 3259 .keywords: matrix, aij, compressed row, sparse, parallel 3260 3261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3262 MPIAIJ, MatCreateMPIAIJWithArrays() 3263 @*/ 3264 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3265 { 3266 PetscErrorCode ierr; 3267 PetscMPIInt size; 3268 3269 PetscFunctionBegin; 3270 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3271 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3273 if (size > 1) { 3274 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3275 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3276 } else { 3277 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3278 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3285 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3286 { 3287 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3288 3289 PetscFunctionBegin; 3290 *Ad = a->A; 3291 *Ao = a->B; 3292 *colmap = a->garray; 3293 PetscFunctionReturn(0); 3294 } 3295 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3298 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3299 { 3300 PetscErrorCode ierr; 3301 PetscInt i; 3302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3303 3304 PetscFunctionBegin; 3305 if (coloring->ctype == IS_COLORING_GLOBAL) { 3306 ISColoringValue *allcolors,*colors; 3307 ISColoring ocoloring; 3308 3309 /* set coloring for diagonal portion */ 3310 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3311 3312 /* set coloring for off-diagonal portion */ 3313 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3314 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3315 for (i=0; i<a->B->cmap.n; i++) { 3316 colors[i] = allcolors[a->garray[i]]; 3317 } 3318 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3319 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3320 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3321 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3322 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3323 ISColoringValue *colors; 3324 PetscInt *larray; 3325 ISColoring ocoloring; 3326 3327 /* set coloring for diagonal portion */ 3328 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3329 for (i=0; i<a->A->cmap.n; i++) { 3330 larray[i] = i + A->cmap.rstart; 3331 } 3332 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3333 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3334 for (i=0; i<a->A->cmap.n; i++) { 3335 colors[i] = coloring->colors[larray[i]]; 3336 } 3337 ierr = PetscFree(larray);CHKERRQ(ierr); 3338 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3339 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3340 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3341 3342 /* set coloring for off-diagonal portion */ 3343 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3344 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3345 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3346 for (i=0; i<a->B->cmap.n; i++) { 3347 colors[i] = coloring->colors[larray[i]]; 3348 } 3349 ierr = PetscFree(larray);CHKERRQ(ierr); 3350 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3351 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3352 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3353 } else { 3354 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3355 } 3356 3357 PetscFunctionReturn(0); 3358 } 3359 3360 #if defined(PETSC_HAVE_ADIC) 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3363 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3364 { 3365 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3366 PetscErrorCode ierr; 3367 3368 PetscFunctionBegin; 3369 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3370 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3371 PetscFunctionReturn(0); 3372 } 3373 #endif 3374 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3377 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3378 { 3379 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3380 PetscErrorCode ierr; 3381 3382 PetscFunctionBegin; 3383 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3384 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "MatMerge" 3390 /*@C 3391 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3392 matrices from each processor 3393 3394 Collective on MPI_Comm 3395 3396 Input Parameters: 3397 + comm - the communicators the parallel matrix will live on 3398 . inmat - the input sequential matrices 3399 . n - number of local columns (or PETSC_DECIDE) 3400 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3401 3402 Output Parameter: 3403 . outmat - the parallel matrix generated 3404 3405 Level: advanced 3406 3407 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3408 3409 @*/ 3410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3411 { 3412 PetscErrorCode ierr; 3413 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3414 PetscInt *indx; 3415 PetscScalar *values; 3416 3417 PetscFunctionBegin; 3418 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3419 if (scall == MAT_INITIAL_MATRIX){ 3420 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3421 if (n == PETSC_DECIDE){ 3422 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3423 } 3424 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3425 rstart -= m; 3426 3427 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3428 for (i=0;i<m;i++) { 3429 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3430 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3431 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3432 } 3433 /* This routine will ONLY return MPIAIJ type matrix */ 3434 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3435 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3436 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3437 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3438 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3439 3440 } else if (scall == MAT_REUSE_MATRIX){ 3441 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3442 } else { 3443 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3444 } 3445 3446 for (i=0;i<m;i++) { 3447 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3448 Ii = i + rstart; 3449 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3450 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3451 } 3452 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3453 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3454 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3455 3456 PetscFunctionReturn(0); 3457 } 3458 3459 #undef __FUNCT__ 3460 #define __FUNCT__ "MatFileSplit" 3461 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3462 { 3463 PetscErrorCode ierr; 3464 PetscMPIInt rank; 3465 PetscInt m,N,i,rstart,nnz; 3466 size_t len; 3467 const PetscInt *indx; 3468 PetscViewer out; 3469 char *name; 3470 Mat B; 3471 const PetscScalar *values; 3472 3473 PetscFunctionBegin; 3474 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3475 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3476 /* Should this be the type of the diagonal block of A? */ 3477 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3478 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3479 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3480 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3481 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3482 for (i=0;i<m;i++) { 3483 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3484 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3485 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3486 } 3487 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3488 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3489 3490 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3491 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3492 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3493 sprintf(name,"%s.%d",outfile,rank); 3494 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3495 ierr = PetscFree(name); 3496 ierr = MatView(B,out);CHKERRQ(ierr); 3497 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3498 ierr = MatDestroy(B);CHKERRQ(ierr); 3499 PetscFunctionReturn(0); 3500 } 3501 3502 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3505 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3506 { 3507 PetscErrorCode ierr; 3508 Mat_Merge_SeqsToMPI *merge; 3509 PetscContainer container; 3510 3511 PetscFunctionBegin; 3512 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3513 if (container) { 3514 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3515 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3516 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3517 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3518 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3519 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3520 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3521 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3522 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3523 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3524 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3525 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3526 3527 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3528 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3529 } 3530 ierr = PetscFree(merge);CHKERRQ(ierr); 3531 3532 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3533 PetscFunctionReturn(0); 3534 } 3535 3536 #include "src/mat/utils/freespace.h" 3537 #include "petscbt.h" 3538 static PetscEvent logkey_seqstompinum = 0; 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3541 /*@C 3542 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3543 matrices from each processor 3544 3545 Collective on MPI_Comm 3546 3547 Input Parameters: 3548 + comm - the communicators the parallel matrix will live on 3549 . seqmat - the input sequential matrices 3550 . m - number of local rows (or PETSC_DECIDE) 3551 . n - number of local columns (or PETSC_DECIDE) 3552 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3553 3554 Output Parameter: 3555 . mpimat - the parallel matrix generated 3556 3557 Level: advanced 3558 3559 Notes: 3560 The dimensions of the sequential matrix in each processor MUST be the same. 3561 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3562 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3563 @*/ 3564 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3565 { 3566 PetscErrorCode ierr; 3567 MPI_Comm comm=mpimat->comm; 3568 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3569 PetscMPIInt size,rank,taga,*len_s; 3570 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3571 PetscInt proc,m; 3572 PetscInt **buf_ri,**buf_rj; 3573 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3574 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3575 MPI_Request *s_waits,*r_waits; 3576 MPI_Status *status; 3577 MatScalar *aa=a->a,**abuf_r,*ba_i; 3578 Mat_Merge_SeqsToMPI *merge; 3579 PetscContainer container; 3580 3581 PetscFunctionBegin; 3582 if (!logkey_seqstompinum) { 3583 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3584 } 3585 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3586 3587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3589 3590 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3591 if (container) { 3592 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3593 } 3594 bi = merge->bi; 3595 bj = merge->bj; 3596 buf_ri = merge->buf_ri; 3597 buf_rj = merge->buf_rj; 3598 3599 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3600 owners = merge->rowmap.range; 3601 len_s = merge->len_s; 3602 3603 /* send and recv matrix values */ 3604 /*-----------------------------*/ 3605 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3606 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3607 3608 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3609 for (proc=0,k=0; proc<size; proc++){ 3610 if (!len_s[proc]) continue; 3611 i = owners[proc]; 3612 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3613 k++; 3614 } 3615 3616 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3617 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3618 ierr = PetscFree(status);CHKERRQ(ierr); 3619 3620 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3621 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3622 3623 /* insert mat values of mpimat */ 3624 /*----------------------------*/ 3625 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3626 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3627 nextrow = buf_ri_k + merge->nrecv; 3628 nextai = nextrow + merge->nrecv; 3629 3630 for (k=0; k<merge->nrecv; k++){ 3631 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3632 nrows = *(buf_ri_k[k]); 3633 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3634 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3635 } 3636 3637 /* set values of ba */ 3638 m = merge->rowmap.n; 3639 for (i=0; i<m; i++) { 3640 arow = owners[rank] + i; 3641 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3642 bnzi = bi[i+1] - bi[i]; 3643 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3644 3645 /* add local non-zero vals of this proc's seqmat into ba */ 3646 anzi = ai[arow+1] - ai[arow]; 3647 aj = a->j + ai[arow]; 3648 aa = a->a + ai[arow]; 3649 nextaj = 0; 3650 for (j=0; nextaj<anzi; j++){ 3651 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3652 ba_i[j] += aa[nextaj++]; 3653 } 3654 } 3655 3656 /* add received vals into ba */ 3657 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3658 /* i-th row */ 3659 if (i == *nextrow[k]) { 3660 anzi = *(nextai[k]+1) - *nextai[k]; 3661 aj = buf_rj[k] + *(nextai[k]); 3662 aa = abuf_r[k] + *(nextai[k]); 3663 nextaj = 0; 3664 for (j=0; nextaj<anzi; j++){ 3665 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3666 ba_i[j] += aa[nextaj++]; 3667 } 3668 } 3669 nextrow[k]++; nextai[k]++; 3670 } 3671 } 3672 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3673 } 3674 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3675 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 3677 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3678 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3679 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3680 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3681 PetscFunctionReturn(0); 3682 } 3683 3684 static PetscEvent logkey_seqstompisym = 0; 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3687 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3688 { 3689 PetscErrorCode ierr; 3690 Mat B_mpi; 3691 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3692 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3693 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3694 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3695 PetscInt len,proc,*dnz,*onz; 3696 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3697 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3698 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3699 MPI_Status *status; 3700 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3701 PetscBT lnkbt; 3702 Mat_Merge_SeqsToMPI *merge; 3703 PetscContainer container; 3704 3705 PetscFunctionBegin; 3706 if (!logkey_seqstompisym) { 3707 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3708 } 3709 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3710 3711 /* make sure it is a PETSc comm */ 3712 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3713 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3714 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3715 3716 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3717 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3718 3719 /* determine row ownership */ 3720 /*---------------------------------------------------------*/ 3721 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3722 merge->rowmap.n = m; 3723 merge->rowmap.N = M; 3724 merge->rowmap.bs = 1; 3725 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3726 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3727 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3728 3729 m = merge->rowmap.n; 3730 M = merge->rowmap.N; 3731 owners = merge->rowmap.range; 3732 3733 /* determine the number of messages to send, their lengths */ 3734 /*---------------------------------------------------------*/ 3735 len_s = merge->len_s; 3736 3737 len = 0; /* length of buf_si[] */ 3738 merge->nsend = 0; 3739 for (proc=0; proc<size; proc++){ 3740 len_si[proc] = 0; 3741 if (proc == rank){ 3742 len_s[proc] = 0; 3743 } else { 3744 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3745 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3746 } 3747 if (len_s[proc]) { 3748 merge->nsend++; 3749 nrows = 0; 3750 for (i=owners[proc]; i<owners[proc+1]; i++){ 3751 if (ai[i+1] > ai[i]) nrows++; 3752 } 3753 len_si[proc] = 2*(nrows+1); 3754 len += len_si[proc]; 3755 } 3756 } 3757 3758 /* determine the number and length of messages to receive for ij-structure */ 3759 /*-------------------------------------------------------------------------*/ 3760 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3761 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3762 3763 /* post the Irecv of j-structure */ 3764 /*-------------------------------*/ 3765 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3766 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3767 3768 /* post the Isend of j-structure */ 3769 /*--------------------------------*/ 3770 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3771 sj_waits = si_waits + merge->nsend; 3772 3773 for (proc=0, k=0; proc<size; proc++){ 3774 if (!len_s[proc]) continue; 3775 i = owners[proc]; 3776 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3777 k++; 3778 } 3779 3780 /* receives and sends of j-structure are complete */ 3781 /*------------------------------------------------*/ 3782 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3783 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3784 3785 /* send and recv i-structure */ 3786 /*---------------------------*/ 3787 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3788 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3789 3790 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3791 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3792 for (proc=0,k=0; proc<size; proc++){ 3793 if (!len_s[proc]) continue; 3794 /* form outgoing message for i-structure: 3795 buf_si[0]: nrows to be sent 3796 [1:nrows]: row index (global) 3797 [nrows+1:2*nrows+1]: i-structure index 3798 */ 3799 /*-------------------------------------------*/ 3800 nrows = len_si[proc]/2 - 1; 3801 buf_si_i = buf_si + nrows+1; 3802 buf_si[0] = nrows; 3803 buf_si_i[0] = 0; 3804 nrows = 0; 3805 for (i=owners[proc]; i<owners[proc+1]; i++){ 3806 anzi = ai[i+1] - ai[i]; 3807 if (anzi) { 3808 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3809 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3810 nrows++; 3811 } 3812 } 3813 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3814 k++; 3815 buf_si += len_si[proc]; 3816 } 3817 3818 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3819 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3820 3821 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3822 for (i=0; i<merge->nrecv; i++){ 3823 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3824 } 3825 3826 ierr = PetscFree(len_si);CHKERRQ(ierr); 3827 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3828 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3829 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3830 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3831 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3832 ierr = PetscFree(status);CHKERRQ(ierr); 3833 3834 /* compute a local seq matrix in each processor */ 3835 /*----------------------------------------------*/ 3836 /* allocate bi array and free space for accumulating nonzero column info */ 3837 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3838 bi[0] = 0; 3839 3840 /* create and initialize a linked list */ 3841 nlnk = N+1; 3842 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3843 3844 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3845 len = 0; 3846 len = ai[owners[rank+1]] - ai[owners[rank]]; 3847 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3848 current_space = free_space; 3849 3850 /* determine symbolic info for each local row */ 3851 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3852 nextrow = buf_ri_k + merge->nrecv; 3853 nextai = nextrow + merge->nrecv; 3854 for (k=0; k<merge->nrecv; k++){ 3855 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3856 nrows = *buf_ri_k[k]; 3857 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3858 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3859 } 3860 3861 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3862 len = 0; 3863 for (i=0;i<m;i++) { 3864 bnzi = 0; 3865 /* add local non-zero cols of this proc's seqmat into lnk */ 3866 arow = owners[rank] + i; 3867 anzi = ai[arow+1] - ai[arow]; 3868 aj = a->j + ai[arow]; 3869 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3870 bnzi += nlnk; 3871 /* add received col data into lnk */ 3872 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3873 if (i == *nextrow[k]) { /* i-th row */ 3874 anzi = *(nextai[k]+1) - *nextai[k]; 3875 aj = buf_rj[k] + *nextai[k]; 3876 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3877 bnzi += nlnk; 3878 nextrow[k]++; nextai[k]++; 3879 } 3880 } 3881 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3882 3883 /* if free space is not available, make more free space */ 3884 if (current_space->local_remaining<bnzi) { 3885 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3886 nspacedouble++; 3887 } 3888 /* copy data into free space, then initialize lnk */ 3889 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3890 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3891 3892 current_space->array += bnzi; 3893 current_space->local_used += bnzi; 3894 current_space->local_remaining -= bnzi; 3895 3896 bi[i+1] = bi[i] + bnzi; 3897 } 3898 3899 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3900 3901 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3902 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3903 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3904 3905 /* create symbolic parallel matrix B_mpi */ 3906 /*---------------------------------------*/ 3907 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3908 if (n==PETSC_DECIDE) { 3909 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3910 } else { 3911 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3912 } 3913 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3914 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3915 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3916 3917 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3918 B_mpi->assembled = PETSC_FALSE; 3919 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3920 merge->bi = bi; 3921 merge->bj = bj; 3922 merge->buf_ri = buf_ri; 3923 merge->buf_rj = buf_rj; 3924 merge->coi = PETSC_NULL; 3925 merge->coj = PETSC_NULL; 3926 merge->owners_co = PETSC_NULL; 3927 3928 /* attach the supporting struct to B_mpi for reuse */ 3929 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3930 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3931 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3932 *mpimat = B_mpi; 3933 3934 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3935 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3936 PetscFunctionReturn(0); 3937 } 3938 3939 static PetscEvent logkey_seqstompi = 0; 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatMerge_SeqsToMPI" 3942 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 if (!logkey_seqstompi) { 3948 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3949 } 3950 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3951 if (scall == MAT_INITIAL_MATRIX){ 3952 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3953 } 3954 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3955 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3956 PetscFunctionReturn(0); 3957 } 3958 static PetscEvent logkey_getlocalmat = 0; 3959 #undef __FUNCT__ 3960 #define __FUNCT__ "MatGetLocalMat" 3961 /*@C 3962 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3963 3964 Not Collective 3965 3966 Input Parameters: 3967 + A - the matrix 3968 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3969 3970 Output Parameter: 3971 . A_loc - the local sequential matrix generated 3972 3973 Level: developer 3974 3975 @*/ 3976 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3977 { 3978 PetscErrorCode ierr; 3979 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3980 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3981 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3982 PetscScalar *aa=a->a,*ba=b->a,*ca; 3983 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3984 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3985 3986 PetscFunctionBegin; 3987 if (!logkey_getlocalmat) { 3988 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3989 } 3990 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3991 if (scall == MAT_INITIAL_MATRIX){ 3992 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3993 ci[0] = 0; 3994 for (i=0; i<am; i++){ 3995 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3996 } 3997 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3998 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3999 k = 0; 4000 for (i=0; i<am; i++) { 4001 ncols_o = bi[i+1] - bi[i]; 4002 ncols_d = ai[i+1] - ai[i]; 4003 /* off-diagonal portion of A */ 4004 for (jo=0; jo<ncols_o; jo++) { 4005 col = cmap[*bj]; 4006 if (col >= cstart) break; 4007 cj[k] = col; bj++; 4008 ca[k++] = *ba++; 4009 } 4010 /* diagonal portion of A */ 4011 for (j=0; j<ncols_d; j++) { 4012 cj[k] = cstart + *aj++; 4013 ca[k++] = *aa++; 4014 } 4015 /* off-diagonal portion of A */ 4016 for (j=jo; j<ncols_o; j++) { 4017 cj[k] = cmap[*bj++]; 4018 ca[k++] = *ba++; 4019 } 4020 } 4021 /* put together the new matrix */ 4022 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4023 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4024 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4025 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4026 mat->free_a = PETSC_TRUE; 4027 mat->free_ij = PETSC_TRUE; 4028 mat->nonew = 0; 4029 } else if (scall == MAT_REUSE_MATRIX){ 4030 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4031 ci = mat->i; cj = mat->j; ca = mat->a; 4032 for (i=0; i<am; i++) { 4033 /* off-diagonal portion of A */ 4034 ncols_o = bi[i+1] - bi[i]; 4035 for (jo=0; jo<ncols_o; jo++) { 4036 col = cmap[*bj]; 4037 if (col >= cstart) break; 4038 *ca++ = *ba++; bj++; 4039 } 4040 /* diagonal portion of A */ 4041 ncols_d = ai[i+1] - ai[i]; 4042 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4043 /* off-diagonal portion of A */ 4044 for (j=jo; j<ncols_o; j++) { 4045 *ca++ = *ba++; bj++; 4046 } 4047 } 4048 } else { 4049 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4050 } 4051 4052 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4053 PetscFunctionReturn(0); 4054 } 4055 4056 static PetscEvent logkey_getlocalmatcondensed = 0; 4057 #undef __FUNCT__ 4058 #define __FUNCT__ "MatGetLocalMatCondensed" 4059 /*@C 4060 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4061 4062 Not Collective 4063 4064 Input Parameters: 4065 + A - the matrix 4066 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4067 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4068 4069 Output Parameter: 4070 . A_loc - the local sequential matrix generated 4071 4072 Level: developer 4073 4074 @*/ 4075 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4076 { 4077 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4078 PetscErrorCode ierr; 4079 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4080 IS isrowa,iscola; 4081 Mat *aloc; 4082 4083 PetscFunctionBegin; 4084 if (!logkey_getlocalmatcondensed) { 4085 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4086 } 4087 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4088 if (!row){ 4089 start = A->rmap.rstart; end = A->rmap.rend; 4090 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4091 } else { 4092 isrowa = *row; 4093 } 4094 if (!col){ 4095 start = A->cmap.rstart; 4096 cmap = a->garray; 4097 nzA = a->A->cmap.n; 4098 nzB = a->B->cmap.n; 4099 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4100 ncols = 0; 4101 for (i=0; i<nzB; i++) { 4102 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4103 else break; 4104 } 4105 imark = i; 4106 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4107 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4108 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4109 ierr = PetscFree(idx);CHKERRQ(ierr); 4110 } else { 4111 iscola = *col; 4112 } 4113 if (scall != MAT_INITIAL_MATRIX){ 4114 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4115 aloc[0] = *A_loc; 4116 } 4117 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4118 *A_loc = aloc[0]; 4119 ierr = PetscFree(aloc);CHKERRQ(ierr); 4120 if (!row){ 4121 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4122 } 4123 if (!col){ 4124 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4125 } 4126 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4127 PetscFunctionReturn(0); 4128 } 4129 4130 static PetscEvent logkey_GetBrowsOfAcols = 0; 4131 #undef __FUNCT__ 4132 #define __FUNCT__ "MatGetBrowsOfAcols" 4133 /*@C 4134 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4135 4136 Collective on Mat 4137 4138 Input Parameters: 4139 + A,B - the matrices in mpiaij format 4140 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4141 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4142 4143 Output Parameter: 4144 + rowb, colb - index sets of rows and columns of B to extract 4145 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4146 - B_seq - the sequential matrix generated 4147 4148 Level: developer 4149 4150 @*/ 4151 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4152 { 4153 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4154 PetscErrorCode ierr; 4155 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4156 IS isrowb,iscolb; 4157 Mat *bseq; 4158 4159 PetscFunctionBegin; 4160 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4161 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4162 } 4163 if (!logkey_GetBrowsOfAcols) { 4164 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4165 } 4166 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4167 4168 if (scall == MAT_INITIAL_MATRIX){ 4169 start = A->cmap.rstart; 4170 cmap = a->garray; 4171 nzA = a->A->cmap.n; 4172 nzB = a->B->cmap.n; 4173 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4174 ncols = 0; 4175 for (i=0; i<nzB; i++) { /* row < local row index */ 4176 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4177 else break; 4178 } 4179 imark = i; 4180 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4181 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4182 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4183 ierr = PetscFree(idx);CHKERRQ(ierr); 4184 *brstart = imark; 4185 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4186 } else { 4187 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4188 isrowb = *rowb; iscolb = *colb; 4189 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4190 bseq[0] = *B_seq; 4191 } 4192 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4193 *B_seq = bseq[0]; 4194 ierr = PetscFree(bseq);CHKERRQ(ierr); 4195 if (!rowb){ 4196 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4197 } else { 4198 *rowb = isrowb; 4199 } 4200 if (!colb){ 4201 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4202 } else { 4203 *colb = iscolb; 4204 } 4205 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4206 PetscFunctionReturn(0); 4207 } 4208 4209 static PetscEvent logkey_GetBrowsOfAocols = 0; 4210 #undef __FUNCT__ 4211 #define __FUNCT__ "MatGetBrowsOfAoCols" 4212 /*@C 4213 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4214 of the OFF-DIAGONAL portion of local A 4215 4216 Collective on Mat 4217 4218 Input Parameters: 4219 + A,B - the matrices in mpiaij format 4220 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4221 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4222 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4223 4224 Output Parameter: 4225 + B_oth - the sequential matrix generated 4226 4227 Level: developer 4228 4229 @*/ 4230 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4231 { 4232 VecScatter_MPI_General *gen_to,*gen_from; 4233 PetscErrorCode ierr; 4234 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4235 Mat_SeqAIJ *b_oth; 4236 VecScatter ctx=a->Mvctx; 4237 MPI_Comm comm=ctx->comm; 4238 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4239 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4240 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4241 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4242 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4243 MPI_Status *sstatus,rstatus; 4244 PetscMPIInt jj; 4245 PetscInt *cols,sbs,rbs; 4246 PetscScalar *vals; 4247 4248 PetscFunctionBegin; 4249 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4250 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4251 } 4252 if (!logkey_GetBrowsOfAocols) { 4253 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4254 } 4255 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4256 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4257 4258 gen_to = (VecScatter_MPI_General*)ctx->todata; 4259 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4260 rvalues = gen_from->values; /* holds the length of receiving row */ 4261 svalues = gen_to->values; /* holds the length of sending row */ 4262 nrecvs = gen_from->n; 4263 nsends = gen_to->n; 4264 4265 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4266 srow = gen_to->indices; /* local row index to be sent */ 4267 sstarts = gen_to->starts; 4268 sprocs = gen_to->procs; 4269 sstatus = gen_to->sstatus; 4270 sbs = gen_to->bs; 4271 rstarts = gen_from->starts; 4272 rprocs = gen_from->procs; 4273 rbs = gen_from->bs; 4274 4275 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4276 if (scall == MAT_INITIAL_MATRIX){ 4277 /* i-array */ 4278 /*---------*/ 4279 /* post receives */ 4280 for (i=0; i<nrecvs; i++){ 4281 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4282 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4283 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4284 } 4285 4286 /* pack the outgoing message */ 4287 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4288 rstartsj = sstartsj + nsends +1; 4289 sstartsj[0] = 0; rstartsj[0] = 0; 4290 len = 0; /* total length of j or a array to be sent */ 4291 k = 0; 4292 for (i=0; i<nsends; i++){ 4293 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4294 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4295 for (j=0; j<nrows; j++) { 4296 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4297 for (l=0; l<sbs; l++){ 4298 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4299 rowlen[j*sbs+l] = ncols; 4300 len += ncols; 4301 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4302 } 4303 k++; 4304 } 4305 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4306 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4307 } 4308 /* recvs and sends of i-array are completed */ 4309 i = nrecvs; 4310 while (i--) { 4311 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4312 } 4313 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4314 4315 /* allocate buffers for sending j and a arrays */ 4316 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4317 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4318 4319 /* create i-array of B_oth */ 4320 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4321 b_othi[0] = 0; 4322 len = 0; /* total length of j or a array to be received */ 4323 k = 0; 4324 for (i=0; i<nrecvs; i++){ 4325 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4326 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4327 for (j=0; j<nrows; j++) { 4328 b_othi[k+1] = b_othi[k] + rowlen[j]; 4329 len += rowlen[j]; k++; 4330 } 4331 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4332 } 4333 4334 /* allocate space for j and a arrrays of B_oth */ 4335 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4336 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4337 4338 /* j-array */ 4339 /*---------*/ 4340 /* post receives of j-array */ 4341 for (i=0; i<nrecvs; i++){ 4342 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4343 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4344 } 4345 4346 /* pack the outgoing message j-array */ 4347 k = 0; 4348 for (i=0; i<nsends; i++){ 4349 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4350 bufJ = bufj+sstartsj[i]; 4351 for (j=0; j<nrows; j++) { 4352 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4353 for (ll=0; ll<sbs; ll++){ 4354 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4355 for (l=0; l<ncols; l++){ 4356 *bufJ++ = cols[l]; 4357 } 4358 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4359 } 4360 } 4361 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4362 } 4363 4364 /* recvs and sends of j-array are completed */ 4365 i = nrecvs; 4366 while (i--) { 4367 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4368 } 4369 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4370 } else if (scall == MAT_REUSE_MATRIX){ 4371 sstartsj = *startsj; 4372 rstartsj = sstartsj + nsends +1; 4373 bufa = *bufa_ptr; 4374 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4375 b_otha = b_oth->a; 4376 } else { 4377 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4378 } 4379 4380 /* a-array */ 4381 /*---------*/ 4382 /* post receives of a-array */ 4383 for (i=0; i<nrecvs; i++){ 4384 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4385 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4386 } 4387 4388 /* pack the outgoing message a-array */ 4389 k = 0; 4390 for (i=0; i<nsends; i++){ 4391 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4392 bufA = bufa+sstartsj[i]; 4393 for (j=0; j<nrows; j++) { 4394 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4395 for (ll=0; ll<sbs; ll++){ 4396 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4397 for (l=0; l<ncols; l++){ 4398 *bufA++ = vals[l]; 4399 } 4400 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4401 } 4402 } 4403 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4404 } 4405 /* recvs and sends of a-array are completed */ 4406 i = nrecvs; 4407 while (i--) { 4408 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4409 } 4410 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4411 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4412 4413 if (scall == MAT_INITIAL_MATRIX){ 4414 /* put together the new matrix */ 4415 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4416 4417 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4418 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4419 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4420 b_oth->free_a = PETSC_TRUE; 4421 b_oth->free_ij = PETSC_TRUE; 4422 b_oth->nonew = 0; 4423 4424 ierr = PetscFree(bufj);CHKERRQ(ierr); 4425 if (!startsj || !bufa_ptr){ 4426 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4427 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4428 } else { 4429 *startsj = sstartsj; 4430 *bufa_ptr = bufa; 4431 } 4432 } 4433 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatGetCommunicationStructs" 4439 /*@C 4440 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4441 4442 Not Collective 4443 4444 Input Parameters: 4445 . A - The matrix in mpiaij format 4446 4447 Output Parameter: 4448 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4449 . colmap - A map from global column index to local index into lvec 4450 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4451 4452 Level: developer 4453 4454 @*/ 4455 #if defined (PETSC_USE_CTABLE) 4456 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4457 #else 4458 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4459 #endif 4460 { 4461 Mat_MPIAIJ *a; 4462 4463 PetscFunctionBegin; 4464 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4465 PetscValidPointer(lvec, 2) 4466 PetscValidPointer(colmap, 3) 4467 PetscValidPointer(multScatter, 4) 4468 a = (Mat_MPIAIJ *) A->data; 4469 if (lvec) *lvec = a->lvec; 4470 if (colmap) *colmap = a->colmap; 4471 if (multScatter) *multScatter = a->Mvctx; 4472 PetscFunctionReturn(0); 4473 } 4474 4475 EXTERN_C_BEGIN 4476 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4477 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4478 EXTERN_C_END 4479 4480 /*MC 4481 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4482 4483 Options Database Keys: 4484 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4485 4486 Level: beginner 4487 4488 .seealso: MatCreateMPIAIJ 4489 M*/ 4490 4491 EXTERN_C_BEGIN 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "MatCreate_MPIAIJ" 4494 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4495 { 4496 Mat_MPIAIJ *b; 4497 PetscErrorCode ierr; 4498 PetscMPIInt size; 4499 4500 PetscFunctionBegin; 4501 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4502 4503 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4504 B->data = (void*)b; 4505 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4506 B->factor = 0; 4507 B->rmap.bs = 1; 4508 B->assembled = PETSC_FALSE; 4509 B->mapping = 0; 4510 4511 B->insertmode = NOT_SET_VALUES; 4512 b->size = size; 4513 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4514 4515 /* build cache for off array entries formed */ 4516 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4517 b->donotstash = PETSC_FALSE; 4518 b->colmap = 0; 4519 b->garray = 0; 4520 b->roworiented = PETSC_TRUE; 4521 4522 /* stuff used for matrix vector multiply */ 4523 b->lvec = PETSC_NULL; 4524 b->Mvctx = PETSC_NULL; 4525 4526 /* stuff for MatGetRow() */ 4527 b->rowindices = 0; 4528 b->rowvalues = 0; 4529 b->getrowactive = PETSC_FALSE; 4530 4531 4532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4533 "MatStoreValues_MPIAIJ", 4534 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4536 "MatRetrieveValues_MPIAIJ", 4537 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4539 "MatGetDiagonalBlock_MPIAIJ", 4540 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4542 "MatIsTranspose_MPIAIJ", 4543 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4545 "MatMPIAIJSetPreallocation_MPIAIJ", 4546 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4547 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4548 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4549 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4551 "MatDiagonalScaleLocal_MPIAIJ", 4552 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4554 "MatConvert_MPIAIJ_MPICSRPERM", 4555 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4557 "MatConvert_MPIAIJ_MPICRL", 4558 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4559 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 EXTERN_C_END 4563 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4566 /*@C 4567 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4568 and "off-diagonal" part of the matrix in CSR format. 4569 4570 Collective on MPI_Comm 4571 4572 Input Parameters: 4573 + comm - MPI communicator 4574 . m - number of local rows (Cannot be PETSC_DECIDE) 4575 . n - This value should be the same as the local size used in creating the 4576 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4577 calculated if N is given) For square matrices n is almost always m. 4578 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4579 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4580 . i - row indices for "diagonal" portion of matrix 4581 . j - column indices 4582 . a - matrix values 4583 . oi - row indices for "off-diagonal" portion of matrix 4584 . oj - column indices 4585 - oa - matrix values 4586 4587 Output Parameter: 4588 . mat - the matrix 4589 4590 Level: advanced 4591 4592 Notes: 4593 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4594 4595 The i and j indices are 0 based 4596 4597 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4598 4599 4600 .keywords: matrix, aij, compressed row, sparse, parallel 4601 4602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4603 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4604 @*/ 4605 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4606 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4607 { 4608 PetscErrorCode ierr; 4609 Mat_MPIAIJ *maij; 4610 4611 PetscFunctionBegin; 4612 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4613 if (i[0]) { 4614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4615 } 4616 if (oi[0]) { 4617 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4618 } 4619 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4620 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4621 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4622 maij = (Mat_MPIAIJ*) (*mat)->data; 4623 maij->donotstash = PETSC_TRUE; 4624 (*mat)->preallocated = PETSC_TRUE; 4625 4626 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4627 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4628 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4629 4630 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4631 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4632 4633 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4634 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4635 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4636 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4637 4638 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4639 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 /* 4644 Special version for direct calls from Fortran 4645 */ 4646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4647 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4649 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4650 #endif 4651 4652 /* Change these macros so can be used in void function */ 4653 #undef CHKERRQ 4654 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4655 #undef SETERRQ2 4656 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4657 #undef SETERRQ 4658 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4659 4660 EXTERN_C_BEGIN 4661 #undef __FUNCT__ 4662 #define __FUNCT__ "matsetvaluesmpiaij_" 4663 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4664 { 4665 Mat mat = *mmat; 4666 PetscInt m = *mm, n = *mn; 4667 InsertMode addv = *maddv; 4668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4669 PetscScalar value; 4670 PetscErrorCode ierr; 4671 4672 MatPreallocated(mat); 4673 if (mat->insertmode == NOT_SET_VALUES) { 4674 mat->insertmode = addv; 4675 } 4676 #if defined(PETSC_USE_DEBUG) 4677 else if (mat->insertmode != addv) { 4678 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4679 } 4680 #endif 4681 { 4682 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4683 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4684 PetscTruth roworiented = aij->roworiented; 4685 4686 /* Some Variables required in the macro */ 4687 Mat A = aij->A; 4688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4689 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4690 PetscScalar *aa = a->a; 4691 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4692 Mat B = aij->B; 4693 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4694 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4695 PetscScalar *ba = b->a; 4696 4697 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4698 PetscInt nonew = a->nonew; 4699 PetscScalar *ap1,*ap2; 4700 4701 PetscFunctionBegin; 4702 for (i=0; i<m; i++) { 4703 if (im[i] < 0) continue; 4704 #if defined(PETSC_USE_DEBUG) 4705 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4706 #endif 4707 if (im[i] >= rstart && im[i] < rend) { 4708 row = im[i] - rstart; 4709 lastcol1 = -1; 4710 rp1 = aj + ai[row]; 4711 ap1 = aa + ai[row]; 4712 rmax1 = aimax[row]; 4713 nrow1 = ailen[row]; 4714 low1 = 0; 4715 high1 = nrow1; 4716 lastcol2 = -1; 4717 rp2 = bj + bi[row]; 4718 ap2 = ba + bi[row]; 4719 rmax2 = bimax[row]; 4720 nrow2 = bilen[row]; 4721 low2 = 0; 4722 high2 = nrow2; 4723 4724 for (j=0; j<n; j++) { 4725 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4726 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4727 if (in[j] >= cstart && in[j] < cend){ 4728 col = in[j] - cstart; 4729 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4730 } else if (in[j] < 0) continue; 4731 #if defined(PETSC_USE_DEBUG) 4732 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4733 #endif 4734 else { 4735 if (mat->was_assembled) { 4736 if (!aij->colmap) { 4737 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4738 } 4739 #if defined (PETSC_USE_CTABLE) 4740 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4741 col--; 4742 #else 4743 col = aij->colmap[in[j]] - 1; 4744 #endif 4745 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4746 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4747 col = in[j]; 4748 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4749 B = aij->B; 4750 b = (Mat_SeqAIJ*)B->data; 4751 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4752 rp2 = bj + bi[row]; 4753 ap2 = ba + bi[row]; 4754 rmax2 = bimax[row]; 4755 nrow2 = bilen[row]; 4756 low2 = 0; 4757 high2 = nrow2; 4758 bm = aij->B->rmap.n; 4759 ba = b->a; 4760 } 4761 } else col = in[j]; 4762 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4763 } 4764 } 4765 } else { 4766 if (!aij->donotstash) { 4767 if (roworiented) { 4768 if (ignorezeroentries && v[i*n] == 0.0) continue; 4769 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4770 } else { 4771 if (ignorezeroentries && v[i] == 0.0) continue; 4772 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4773 } 4774 } 4775 } 4776 }} 4777 PetscFunctionReturnVoid(); 4778 } 4779 EXTERN_C_END 4780 4781