1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_COLUMNS_SORTED: 1208 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1209 case MAT_KEEP_ZEROED_ROWS: 1210 case MAT_NEW_NONZERO_LOCATION_ERR: 1211 case MAT_USE_INODES: 1212 case MAT_IGNORE_ZERO_ENTRIES: 1213 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1214 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1215 break; 1216 case MAT_ROW_ORIENTED: 1217 a->roworiented = flg; 1218 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1219 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1220 break; 1221 case MAT_ROWS_SORTED: 1222 case MAT_NEW_DIAGONALS: 1223 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1224 break; 1225 case MAT_IGNORE_OFF_PROC_ENTRIES: 1226 a->donotstash = PETSC_TRUE; 1227 break; 1228 case MAT_SYMMETRIC: 1229 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1230 break; 1231 case MAT_STRUCTURALLY_SYMMETRIC: 1232 case MAT_HERMITIAN: 1233 case MAT_SYMMETRY_ETERNAL: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 default: 1237 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetRow_MPIAIJ" 1244 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1245 { 1246 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1247 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1248 PetscErrorCode ierr; 1249 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1250 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1251 PetscInt *cmap,*idx_p; 1252 1253 PetscFunctionBegin; 1254 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1255 mat->getrowactive = PETSC_TRUE; 1256 1257 if (!mat->rowvalues && (idx || v)) { 1258 /* 1259 allocate enough space to hold information from the longest row. 1260 */ 1261 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1262 PetscInt max = 1,tmp; 1263 for (i=0; i<matin->rmap.n; i++) { 1264 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1265 if (max < tmp) { max = tmp; } 1266 } 1267 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1268 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1269 } 1270 1271 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1272 lrow = row - rstart; 1273 1274 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1275 if (!v) {pvA = 0; pvB = 0;} 1276 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1277 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1278 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1279 nztot = nzA + nzB; 1280 1281 cmap = mat->garray; 1282 if (v || idx) { 1283 if (nztot) { 1284 /* Sort by increasing column numbers, assuming A and B already sorted */ 1285 PetscInt imark = -1; 1286 if (v) { 1287 *v = v_p = mat->rowvalues; 1288 for (i=0; i<nzB; i++) { 1289 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1290 else break; 1291 } 1292 imark = i; 1293 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1294 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1295 } 1296 if (idx) { 1297 *idx = idx_p = mat->rowindices; 1298 if (imark > -1) { 1299 for (i=0; i<imark; i++) { 1300 idx_p[i] = cmap[cworkB[i]]; 1301 } 1302 } else { 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1305 else break; 1306 } 1307 imark = i; 1308 } 1309 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1310 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1311 } 1312 } else { 1313 if (idx) *idx = 0; 1314 if (v) *v = 0; 1315 } 1316 } 1317 *nz = nztot; 1318 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1325 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 1329 PetscFunctionBegin; 1330 if (!aij->getrowactive) { 1331 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1332 } 1333 aij->getrowactive = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatNorm_MPIAIJ" 1339 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1343 PetscErrorCode ierr; 1344 PetscInt i,j,cstart = mat->cmap.rstart; 1345 PetscReal sum = 0.0; 1346 PetscScalar *v; 1347 1348 PetscFunctionBegin; 1349 if (aij->size == 1) { 1350 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1351 } else { 1352 if (type == NORM_FROBENIUS) { 1353 v = amat->a; 1354 for (i=0; i<amat->nz; i++) { 1355 #if defined(PETSC_USE_COMPLEX) 1356 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1357 #else 1358 sum += (*v)*(*v); v++; 1359 #endif 1360 } 1361 v = bmat->a; 1362 for (i=0; i<bmat->nz; i++) { 1363 #if defined(PETSC_USE_COMPLEX) 1364 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1365 #else 1366 sum += (*v)*(*v); v++; 1367 #endif 1368 } 1369 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1370 *norm = sqrt(*norm); 1371 } else if (type == NORM_1) { /* max column norm */ 1372 PetscReal *tmp,*tmp2; 1373 PetscInt *jj,*garray = aij->garray; 1374 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1375 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1376 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1377 *norm = 0.0; 1378 v = amat->a; jj = amat->j; 1379 for (j=0; j<amat->nz; j++) { 1380 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1381 } 1382 v = bmat->a; jj = bmat->j; 1383 for (j=0; j<bmat->nz; j++) { 1384 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1385 } 1386 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 for (j=0; j<mat->cmap.N; j++) { 1388 if (tmp2[j] > *norm) *norm = tmp2[j]; 1389 } 1390 ierr = PetscFree(tmp);CHKERRQ(ierr); 1391 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1392 } else if (type == NORM_INFINITY) { /* max row norm */ 1393 PetscReal ntemp = 0.0; 1394 for (j=0; j<aij->A->rmap.n; j++) { 1395 v = amat->a + amat->i[j]; 1396 sum = 0.0; 1397 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1398 sum += PetscAbsScalar(*v); v++; 1399 } 1400 v = bmat->a + bmat->i[j]; 1401 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1402 sum += PetscAbsScalar(*v); v++; 1403 } 1404 if (sum > ntemp) ntemp = sum; 1405 } 1406 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1407 } else { 1408 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1409 } 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatTranspose_MPIAIJ" 1416 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1417 { 1418 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1419 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1420 PetscErrorCode ierr; 1421 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1422 PetscInt cstart=A->cmap.rstart,ncol; 1423 Mat B; 1424 PetscScalar *array; 1425 1426 PetscFunctionBegin; 1427 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1428 1429 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1430 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1431 ai = Aloc->i; aj = Aloc->j; 1432 bi = Bloc->i; bj = Bloc->j; 1433 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1434 cols = d_nnz + na + 1; /* work space to be used by B part */ 1435 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1436 for (i=0; i<ai[ma]; i++){ 1437 d_nnz[aj[i]] ++; 1438 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1439 } 1440 1441 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1442 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1443 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1444 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1445 1446 /* copy over the A part */ 1447 array = Aloc->a; 1448 row = A->rmap.rstart; 1449 for (i=0; i<ma; i++) { 1450 ncol = ai[i+1]-ai[i]; 1451 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1452 row++; array += ncol; aj += ncol; 1453 } 1454 aj = Aloc->j; 1455 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1456 1457 /* copy over the B part */ 1458 array = Bloc->a; 1459 row = A->rmap.rstart; 1460 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1461 for (i=0; i<mb; i++) { 1462 ncol = bi[i+1]-bi[i]; 1463 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1464 row++; array += ncol; cols += ncol; 1465 } 1466 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1469 if (matout) { 1470 *matout = B; 1471 } else { 1472 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1479 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1480 { 1481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1482 Mat a = aij->A,b = aij->B; 1483 PetscErrorCode ierr; 1484 PetscInt s1,s2,s3; 1485 1486 PetscFunctionBegin; 1487 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1488 if (rr) { 1489 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1490 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1491 /* Overlap communication with computation. */ 1492 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 } 1494 if (ll) { 1495 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1496 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1497 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1498 } 1499 /* scale the diagonal block */ 1500 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1501 1502 if (rr) { 1503 /* Do a scatter end and then right scale the off-diagonal block */ 1504 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1506 } 1507 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1513 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1514 { 1515 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1516 PetscErrorCode ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1520 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1525 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatEqual_MPIAIJ" 1537 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1538 { 1539 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1540 Mat a,b,c,d; 1541 PetscTruth flg; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 a = matA->A; b = matA->B; 1546 c = matB->A; d = matB->B; 1547 1548 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1549 if (flg) { 1550 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1551 } 1552 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCopy_MPIAIJ" 1558 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1559 { 1560 PetscErrorCode ierr; 1561 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1562 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1563 1564 PetscFunctionBegin; 1565 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1566 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1567 /* because of the column compression in the off-processor part of the matrix a->B, 1568 the number of columns in a->B and b->B may be different, hence we cannot call 1569 the MatCopy() directly on the two parts. If need be, we can provide a more 1570 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1571 then copying the submatrices */ 1572 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1575 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1582 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1583 { 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #include "petscblaslapack.h" 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatAXPY_MPIAIJ" 1594 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1595 { 1596 PetscErrorCode ierr; 1597 PetscInt i; 1598 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1599 PetscBLASInt bnz,one=1; 1600 Mat_SeqAIJ *x,*y; 1601 1602 PetscFunctionBegin; 1603 if (str == SAME_NONZERO_PATTERN) { 1604 PetscScalar alpha = a; 1605 x = (Mat_SeqAIJ *)xx->A->data; 1606 y = (Mat_SeqAIJ *)yy->A->data; 1607 bnz = (PetscBLASInt)x->nz; 1608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1609 x = (Mat_SeqAIJ *)xx->B->data; 1610 y = (Mat_SeqAIJ *)yy->B->data; 1611 bnz = (PetscBLASInt)x->nz; 1612 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1613 } else if (str == SUBSET_NONZERO_PATTERN) { 1614 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1615 1616 x = (Mat_SeqAIJ *)xx->B->data; 1617 y = (Mat_SeqAIJ *)yy->B->data; 1618 if (y->xtoy && y->XtoY != xx->B) { 1619 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1620 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1621 } 1622 if (!y->xtoy) { /* get xtoy */ 1623 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1624 y->XtoY = xx->B; 1625 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1626 } 1627 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1628 } else { 1629 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatConjugate_MPIAIJ" 1638 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1639 { 1640 #if defined(PETSC_USE_COMPLEX) 1641 PetscErrorCode ierr; 1642 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1643 1644 PetscFunctionBegin; 1645 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1646 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1647 #else 1648 PetscFunctionBegin; 1649 #endif 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatRealPart_MPIAIJ" 1655 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1656 { 1657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1662 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1668 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1669 { 1670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1671 PetscErrorCode ierr; 1672 1673 PetscFunctionBegin; 1674 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1675 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #ifdef PETSC_HAVE_PBGL 1680 1681 #include <boost/parallel/mpi/bsp_process_group.hpp> 1682 #include <boost/graph/distributed/ilu_default_graph.hpp> 1683 #include <boost/graph/distributed/ilu_0_block.hpp> 1684 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1685 #include <boost/graph/distributed/petsc/interface.hpp> 1686 #include <boost/multi_array.hpp> 1687 #include <boost/parallel/distributed_property_map.hpp> 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1691 /* 1692 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1693 */ 1694 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1695 { 1696 namespace petsc = boost::distributed::petsc; 1697 1698 namespace graph_dist = boost::graph::distributed; 1699 using boost::graph::distributed::ilu_default::process_group_type; 1700 using boost::graph::ilu_permuted; 1701 1702 PetscTruth row_identity, col_identity; 1703 PetscContainer c; 1704 PetscInt m, n, M, N; 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1709 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1710 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1711 if (!row_identity || !col_identity) { 1712 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1713 } 1714 1715 process_group_type pg; 1716 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1717 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1718 lgraph_type& level_graph = *lgraph_p; 1719 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1720 1721 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1722 ilu_permuted(level_graph); 1723 1724 /* put together the new matrix */ 1725 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1726 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1727 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1728 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1729 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1730 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1731 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1732 (*fact)->factor = FACTOR_LU; 1733 1734 ierr = PetscContainerCreate(A->comm, &c); 1735 ierr = PetscContainerSetPointer(c, lgraph_p); 1736 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1742 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1743 { 1744 PetscFunctionBegin; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatSolve_MPIAIJ" 1750 /* 1751 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1752 */ 1753 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1754 { 1755 namespace graph_dist = boost::graph::distributed; 1756 1757 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1758 lgraph_type* lgraph_p; 1759 PetscContainer c; 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1764 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1765 ierr = VecCopy(b, x); CHKERRQ(ierr); 1766 1767 PetscScalar* array_x; 1768 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1769 PetscInt sx; 1770 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1771 1772 PetscScalar* array_b; 1773 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1774 PetscInt sb; 1775 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1776 1777 lgraph_type& level_graph = *lgraph_p; 1778 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1779 1780 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1781 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1782 ref_x(array_x, boost::extents[num_vertices(graph)]); 1783 1784 typedef boost::iterator_property_map<array_ref_type::iterator, 1785 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1786 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1787 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1788 1789 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1790 1791 PetscFunctionReturn(0); 1792 } 1793 #endif 1794 1795 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1796 PetscInt nzlocal,nsends,nrecvs; 1797 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1798 PetscScalar *sbuf_a,**rbuf_a; 1799 PetscErrorCode (*MatDestroy)(Mat); 1800 } Mat_Redundant; 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1804 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1805 { 1806 PetscErrorCode ierr; 1807 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1808 PetscInt i; 1809 1810 PetscFunctionBegin; 1811 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1812 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1813 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1814 for (i=0; i<redund->nrecvs; i++){ 1815 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1816 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1817 } 1818 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1819 ierr = PetscFree(redund);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatDestroy_MatRedundant" 1825 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1826 { 1827 PetscErrorCode ierr; 1828 PetscContainer container; 1829 Mat_Redundant *redund=PETSC_NULL; 1830 1831 PetscFunctionBegin; 1832 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1833 if (container) { 1834 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1835 } else { 1836 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1837 } 1838 A->ops->destroy = redund->MatDestroy; 1839 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1840 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1841 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1847 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1848 { 1849 PetscMPIInt rank,size; 1850 MPI_Comm comm=mat->comm; 1851 PetscErrorCode ierr; 1852 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1853 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1854 PetscInt *rowrange=mat->rmap.range; 1855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1856 Mat A=aij->A,B=aij->B,C=*matredundant; 1857 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1858 PetscScalar *sbuf_a; 1859 PetscInt nzlocal=a->nz+b->nz; 1860 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1861 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1862 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1863 PetscScalar *vals,*aworkA,*aworkB; 1864 PetscMPIInt tag1,tag2,tag3,imdex; 1865 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1866 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1867 MPI_Status recv_status,*send_status; 1868 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1869 PetscInt **rbuf_j=PETSC_NULL; 1870 PetscScalar **rbuf_a=PETSC_NULL; 1871 Mat_Redundant *redund=PETSC_NULL; 1872 PetscContainer container; 1873 1874 PetscFunctionBegin; 1875 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1877 1878 if (reuse == MAT_REUSE_MATRIX) { 1879 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1880 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1881 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1882 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1883 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1884 if (container) { 1885 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1886 } else { 1887 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1888 } 1889 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1890 1891 nsends = redund->nsends; 1892 nrecvs = redund->nrecvs; 1893 send_rank = redund->send_rank; recv_rank = send_rank + size; 1894 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1895 sbuf_j = redund->sbuf_j; 1896 sbuf_a = redund->sbuf_a; 1897 rbuf_j = redund->rbuf_j; 1898 rbuf_a = redund->rbuf_a; 1899 } 1900 1901 if (reuse == MAT_INITIAL_MATRIX){ 1902 PetscMPIInt subrank,subsize; 1903 PetscInt nleftover,np_subcomm; 1904 /* get the destination processors' id send_rank, nsends and nrecvs */ 1905 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1906 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1907 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1908 recv_rank = send_rank + size; 1909 np_subcomm = size/nsubcomm; 1910 nleftover = size - nsubcomm*np_subcomm; 1911 nsends = 0; nrecvs = 0; 1912 for (i=0; i<size; i++){ /* i=rank*/ 1913 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1914 send_rank[nsends] = i; nsends++; 1915 recv_rank[nrecvs++] = i; 1916 } 1917 } 1918 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1919 i = size-nleftover-1; 1920 j = 0; 1921 while (j < nsubcomm - nleftover){ 1922 send_rank[nsends++] = i; 1923 i--; j++; 1924 } 1925 } 1926 1927 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1928 for (i=0; i<nleftover; i++){ 1929 recv_rank[nrecvs++] = size-nleftover+i; 1930 } 1931 } 1932 1933 /* allocate sbuf_j, sbuf_a */ 1934 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1935 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1936 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1937 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1938 1939 /* copy mat's local entries into the buffers */ 1940 if (reuse == MAT_INITIAL_MATRIX){ 1941 rownz_max = 0; 1942 rptr = sbuf_j; 1943 cols = sbuf_j + rend-rstart + 1; 1944 vals = sbuf_a; 1945 rptr[0] = 0; 1946 for (i=0; i<rend-rstart; i++){ 1947 row = i + rstart; 1948 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1949 ncols = nzA + nzB; 1950 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1951 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1952 /* load the column indices for this row into cols */ 1953 lwrite = 0; 1954 for (l=0; l<nzB; l++) { 1955 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1956 vals[lwrite] = aworkB[l]; 1957 cols[lwrite++] = ctmp; 1958 } 1959 } 1960 for (l=0; l<nzA; l++){ 1961 vals[lwrite] = aworkA[l]; 1962 cols[lwrite++] = cstart + cworkA[l]; 1963 } 1964 for (l=0; l<nzB; l++) { 1965 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1966 vals[lwrite] = aworkB[l]; 1967 cols[lwrite++] = ctmp; 1968 } 1969 } 1970 vals += ncols; 1971 cols += ncols; 1972 rptr[i+1] = rptr[i] + ncols; 1973 if (rownz_max < ncols) rownz_max = ncols; 1974 } 1975 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1976 } else { /* only copy matrix values into sbuf_a */ 1977 rptr = sbuf_j; 1978 vals = sbuf_a; 1979 rptr[0] = 0; 1980 for (i=0; i<rend-rstart; i++){ 1981 row = i + rstart; 1982 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1983 ncols = nzA + nzB; 1984 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1985 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1986 lwrite = 0; 1987 for (l=0; l<nzB; l++) { 1988 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1989 } 1990 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1991 for (l=0; l<nzB; l++) { 1992 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1993 } 1994 vals += ncols; 1995 rptr[i+1] = rptr[i] + ncols; 1996 } 1997 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1998 1999 /* send nzlocal to others, and recv other's nzlocal */ 2000 /*--------------------------------------------------*/ 2001 if (reuse == MAT_INITIAL_MATRIX){ 2002 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2003 s_waits2 = s_waits3 + nsends; 2004 s_waits1 = s_waits2 + nsends; 2005 r_waits1 = s_waits1 + nsends; 2006 r_waits2 = r_waits1 + nrecvs; 2007 r_waits3 = r_waits2 + nrecvs; 2008 } else { 2009 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2010 r_waits3 = s_waits3 + nsends; 2011 } 2012 2013 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2014 if (reuse == MAT_INITIAL_MATRIX){ 2015 /* get new tags to keep the communication clean */ 2016 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2017 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2018 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2019 rbuf_nz = sbuf_nz + nsends; 2020 2021 /* post receives of other's nzlocal */ 2022 for (i=0; i<nrecvs; i++){ 2023 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2024 } 2025 /* send nzlocal to others */ 2026 for (i=0; i<nsends; i++){ 2027 sbuf_nz[i] = nzlocal; 2028 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2029 } 2030 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2031 count = nrecvs; 2032 while (count) { 2033 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2034 recv_rank[imdex] = recv_status.MPI_SOURCE; 2035 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2036 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2037 2038 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2039 rbuf_nz[imdex] += i + 2; 2040 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2041 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2042 count--; 2043 } 2044 /* wait on sends of nzlocal */ 2045 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2046 /* send mat->i,j to others, and recv from other's */ 2047 /*------------------------------------------------*/ 2048 for (i=0; i<nsends; i++){ 2049 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2050 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2051 } 2052 /* wait on receives of mat->i,j */ 2053 /*------------------------------*/ 2054 count = nrecvs; 2055 while (count) { 2056 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2057 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2058 count--; 2059 } 2060 /* wait on sends of mat->i,j */ 2061 /*---------------------------*/ 2062 if (nsends) { 2063 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2064 } 2065 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2066 2067 /* post receives, send and receive mat->a */ 2068 /*----------------------------------------*/ 2069 for (imdex=0; imdex<nrecvs; imdex++) { 2070 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2071 } 2072 for (i=0; i<nsends; i++){ 2073 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2074 } 2075 count = nrecvs; 2076 while (count) { 2077 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2078 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2079 count--; 2080 } 2081 if (nsends) { 2082 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2083 } 2084 2085 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2086 2087 /* create redundant matrix */ 2088 /*-------------------------*/ 2089 if (reuse == MAT_INITIAL_MATRIX){ 2090 /* compute rownz_max for preallocation */ 2091 for (imdex=0; imdex<nrecvs; imdex++){ 2092 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2093 rptr = rbuf_j[imdex]; 2094 for (i=0; i<j; i++){ 2095 ncols = rptr[i+1] - rptr[i]; 2096 if (rownz_max < ncols) rownz_max = ncols; 2097 } 2098 } 2099 2100 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2101 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2102 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2103 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2104 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2105 } else { 2106 C = *matredundant; 2107 } 2108 2109 /* insert local matrix entries */ 2110 rptr = sbuf_j; 2111 cols = sbuf_j + rend-rstart + 1; 2112 vals = sbuf_a; 2113 for (i=0; i<rend-rstart; i++){ 2114 row = i + rstart; 2115 ncols = rptr[i+1] - rptr[i]; 2116 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2117 vals += ncols; 2118 cols += ncols; 2119 } 2120 /* insert received matrix entries */ 2121 for (imdex=0; imdex<nrecvs; imdex++){ 2122 rstart = rowrange[recv_rank[imdex]]; 2123 rend = rowrange[recv_rank[imdex]+1]; 2124 rptr = rbuf_j[imdex]; 2125 cols = rbuf_j[imdex] + rend-rstart + 1; 2126 vals = rbuf_a[imdex]; 2127 for (i=0; i<rend-rstart; i++){ 2128 row = i + rstart; 2129 ncols = rptr[i+1] - rptr[i]; 2130 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2131 vals += ncols; 2132 cols += ncols; 2133 } 2134 } 2135 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2138 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2139 if (reuse == MAT_INITIAL_MATRIX){ 2140 PetscContainer container; 2141 *matredundant = C; 2142 /* create a supporting struct and attach it to C for reuse */ 2143 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2144 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2145 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2146 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2147 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2148 2149 redund->nzlocal = nzlocal; 2150 redund->nsends = nsends; 2151 redund->nrecvs = nrecvs; 2152 redund->send_rank = send_rank; 2153 redund->sbuf_nz = sbuf_nz; 2154 redund->sbuf_j = sbuf_j; 2155 redund->sbuf_a = sbuf_a; 2156 redund->rbuf_j = rbuf_j; 2157 redund->rbuf_a = rbuf_a; 2158 2159 redund->MatDestroy = C->ops->destroy; 2160 C->ops->destroy = MatDestroy_MatRedundant; 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 /* -------------------------------------------------------------------*/ 2166 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2167 MatGetRow_MPIAIJ, 2168 MatRestoreRow_MPIAIJ, 2169 MatMult_MPIAIJ, 2170 /* 4*/ MatMultAdd_MPIAIJ, 2171 MatMultTranspose_MPIAIJ, 2172 MatMultTransposeAdd_MPIAIJ, 2173 #ifdef PETSC_HAVE_PBGL 2174 MatSolve_MPIAIJ, 2175 #else 2176 0, 2177 #endif 2178 0, 2179 0, 2180 /*10*/ 0, 2181 0, 2182 0, 2183 MatRelax_MPIAIJ, 2184 MatTranspose_MPIAIJ, 2185 /*15*/ MatGetInfo_MPIAIJ, 2186 MatEqual_MPIAIJ, 2187 MatGetDiagonal_MPIAIJ, 2188 MatDiagonalScale_MPIAIJ, 2189 MatNorm_MPIAIJ, 2190 /*20*/ MatAssemblyBegin_MPIAIJ, 2191 MatAssemblyEnd_MPIAIJ, 2192 0, 2193 MatSetOption_MPIAIJ, 2194 MatZeroEntries_MPIAIJ, 2195 /*25*/ MatZeroRows_MPIAIJ, 2196 0, 2197 #ifdef PETSC_HAVE_PBGL 2198 MatLUFactorNumeric_MPIAIJ, 2199 #else 2200 0, 2201 #endif 2202 0, 2203 0, 2204 /*30*/ MatSetUpPreallocation_MPIAIJ, 2205 #ifdef PETSC_HAVE_PBGL 2206 MatILUFactorSymbolic_MPIAIJ, 2207 #else 2208 0, 2209 #endif 2210 0, 2211 0, 2212 0, 2213 /*35*/ MatDuplicate_MPIAIJ, 2214 0, 2215 0, 2216 0, 2217 0, 2218 /*40*/ MatAXPY_MPIAIJ, 2219 MatGetSubMatrices_MPIAIJ, 2220 MatIncreaseOverlap_MPIAIJ, 2221 MatGetValues_MPIAIJ, 2222 MatCopy_MPIAIJ, 2223 /*45*/ 0, 2224 MatScale_MPIAIJ, 2225 0, 2226 0, 2227 0, 2228 /*50*/ MatSetBlockSize_MPIAIJ, 2229 0, 2230 0, 2231 0, 2232 0, 2233 /*55*/ MatFDColoringCreate_MPIAIJ, 2234 0, 2235 MatSetUnfactored_MPIAIJ, 2236 MatPermute_MPIAIJ, 2237 0, 2238 /*60*/ MatGetSubMatrix_MPIAIJ, 2239 MatDestroy_MPIAIJ, 2240 MatView_MPIAIJ, 2241 0, 2242 0, 2243 /*65*/ 0, 2244 0, 2245 0, 2246 0, 2247 0, 2248 /*70*/ 0, 2249 0, 2250 MatSetColoring_MPIAIJ, 2251 #if defined(PETSC_HAVE_ADIC) 2252 MatSetValuesAdic_MPIAIJ, 2253 #else 2254 0, 2255 #endif 2256 MatSetValuesAdifor_MPIAIJ, 2257 /*75*/ 0, 2258 0, 2259 0, 2260 0, 2261 0, 2262 /*80*/ 0, 2263 0, 2264 0, 2265 0, 2266 /*84*/ MatLoad_MPIAIJ, 2267 0, 2268 0, 2269 0, 2270 0, 2271 0, 2272 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2273 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2274 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2275 MatPtAP_Basic, 2276 MatPtAPSymbolic_MPIAIJ, 2277 /*95*/ MatPtAPNumeric_MPIAIJ, 2278 0, 2279 0, 2280 0, 2281 0, 2282 /*100*/0, 2283 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2284 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2285 MatConjugate_MPIAIJ, 2286 0, 2287 /*105*/MatSetValuesRow_MPIAIJ, 2288 MatRealPart_MPIAIJ, 2289 MatImaginaryPart_MPIAIJ, 2290 0, 2291 0, 2292 /*110*/0, 2293 MatGetRedundantMatrix_MPIAIJ}; 2294 2295 /* ----------------------------------------------------------------------------------------*/ 2296 2297 EXTERN_C_BEGIN 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2300 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2301 { 2302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2307 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 EXTERN_C_END 2311 2312 EXTERN_C_BEGIN 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2315 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2316 { 2317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2322 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 EXTERN_C_END 2326 2327 #include "petscpc.h" 2328 EXTERN_C_BEGIN 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2331 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2332 { 2333 Mat_MPIAIJ *b; 2334 PetscErrorCode ierr; 2335 PetscInt i; 2336 2337 PetscFunctionBegin; 2338 B->preallocated = PETSC_TRUE; 2339 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2340 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2341 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2342 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2343 2344 B->rmap.bs = B->cmap.bs = 1; 2345 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2346 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2347 if (d_nnz) { 2348 for (i=0; i<B->rmap.n; i++) { 2349 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2350 } 2351 } 2352 if (o_nnz) { 2353 for (i=0; i<B->rmap.n; i++) { 2354 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2355 } 2356 } 2357 b = (Mat_MPIAIJ*)B->data; 2358 2359 /* Explicitly create 2 MATSEQAIJ matrices. */ 2360 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2361 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2362 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2363 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2364 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2365 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2366 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2367 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2368 2369 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2370 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2371 2372 PetscFunctionReturn(0); 2373 } 2374 EXTERN_C_END 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2378 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2379 { 2380 Mat mat; 2381 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 *newmat = 0; 2386 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2387 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2388 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2389 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2390 a = (Mat_MPIAIJ*)mat->data; 2391 2392 mat->factor = matin->factor; 2393 mat->rmap.bs = matin->rmap.bs; 2394 mat->assembled = PETSC_TRUE; 2395 mat->insertmode = NOT_SET_VALUES; 2396 mat->preallocated = PETSC_TRUE; 2397 2398 a->size = oldmat->size; 2399 a->rank = oldmat->rank; 2400 a->donotstash = oldmat->donotstash; 2401 a->roworiented = oldmat->roworiented; 2402 a->rowindices = 0; 2403 a->rowvalues = 0; 2404 a->getrowactive = PETSC_FALSE; 2405 2406 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2407 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2408 2409 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2410 if (oldmat->colmap) { 2411 #if defined (PETSC_USE_CTABLE) 2412 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2413 #else 2414 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2415 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2416 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2417 #endif 2418 } else a->colmap = 0; 2419 if (oldmat->garray) { 2420 PetscInt len; 2421 len = oldmat->B->cmap.n; 2422 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2423 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2424 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2425 } else a->garray = 0; 2426 2427 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2428 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2429 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2430 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2431 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2432 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2433 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2434 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2435 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2436 *newmat = mat; 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #include "petscsys.h" 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatLoad_MPIAIJ" 2444 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2445 { 2446 Mat A; 2447 PetscScalar *vals,*svals; 2448 MPI_Comm comm = ((PetscObject)viewer)->comm; 2449 MPI_Status status; 2450 PetscErrorCode ierr; 2451 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2452 PetscInt i,nz,j,rstart,rend,mmax; 2453 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2454 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2455 PetscInt cend,cstart,n,*rowners; 2456 int fd; 2457 2458 PetscFunctionBegin; 2459 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2460 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2461 if (!rank) { 2462 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2463 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2464 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2465 } 2466 2467 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2468 M = header[1]; N = header[2]; 2469 /* determine ownership of all rows */ 2470 m = M/size + ((M % size) > rank); 2471 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2472 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2473 2474 /* First process needs enough room for process with most rows */ 2475 if (!rank) { 2476 mmax = rowners[1]; 2477 for (i=2; i<size; i++) { 2478 mmax = PetscMax(mmax,rowners[i]); 2479 } 2480 } else mmax = m; 2481 2482 rowners[0] = 0; 2483 for (i=2; i<=size; i++) { 2484 rowners[i] += rowners[i-1]; 2485 } 2486 rstart = rowners[rank]; 2487 rend = rowners[rank+1]; 2488 2489 /* distribute row lengths to all processors */ 2490 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2491 if (!rank) { 2492 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2493 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2494 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2495 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2496 for (j=0; j<m; j++) { 2497 procsnz[0] += ourlens[j]; 2498 } 2499 for (i=1; i<size; i++) { 2500 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2501 /* calculate the number of nonzeros on each processor */ 2502 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2503 procsnz[i] += rowlengths[j]; 2504 } 2505 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2506 } 2507 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2508 } else { 2509 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2510 } 2511 2512 if (!rank) { 2513 /* determine max buffer needed and allocate it */ 2514 maxnz = 0; 2515 for (i=0; i<size; i++) { 2516 maxnz = PetscMax(maxnz,procsnz[i]); 2517 } 2518 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2519 2520 /* read in my part of the matrix column indices */ 2521 nz = procsnz[0]; 2522 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2523 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2524 2525 /* read in every one elses and ship off */ 2526 for (i=1; i<size; i++) { 2527 nz = procsnz[i]; 2528 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2529 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2530 } 2531 ierr = PetscFree(cols);CHKERRQ(ierr); 2532 } else { 2533 /* determine buffer space needed for message */ 2534 nz = 0; 2535 for (i=0; i<m; i++) { 2536 nz += ourlens[i]; 2537 } 2538 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2539 2540 /* receive message of column indices*/ 2541 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2542 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2543 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2544 } 2545 2546 /* determine column ownership if matrix is not square */ 2547 if (N != M) { 2548 n = N/size + ((N % size) > rank); 2549 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2550 cstart = cend - n; 2551 } else { 2552 cstart = rstart; 2553 cend = rend; 2554 n = cend - cstart; 2555 } 2556 2557 /* loop over local rows, determining number of off diagonal entries */ 2558 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2559 jj = 0; 2560 for (i=0; i<m; i++) { 2561 for (j=0; j<ourlens[i]; j++) { 2562 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2563 jj++; 2564 } 2565 } 2566 2567 /* create our matrix */ 2568 for (i=0; i<m; i++) { 2569 ourlens[i] -= offlens[i]; 2570 } 2571 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2572 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2573 ierr = MatSetType(A,type);CHKERRQ(ierr); 2574 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2575 2576 ierr = MatSetOption(A,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 2577 for (i=0; i<m; i++) { 2578 ourlens[i] += offlens[i]; 2579 } 2580 2581 if (!rank) { 2582 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2583 2584 /* read in my part of the matrix numerical values */ 2585 nz = procsnz[0]; 2586 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2587 2588 /* insert into matrix */ 2589 jj = rstart; 2590 smycols = mycols; 2591 svals = vals; 2592 for (i=0; i<m; i++) { 2593 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2594 smycols += ourlens[i]; 2595 svals += ourlens[i]; 2596 jj++; 2597 } 2598 2599 /* read in other processors and ship out */ 2600 for (i=1; i<size; i++) { 2601 nz = procsnz[i]; 2602 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2604 } 2605 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2606 } else { 2607 /* receive numeric values */ 2608 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2609 2610 /* receive message of values*/ 2611 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2612 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2613 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2614 2615 /* insert into matrix */ 2616 jj = rstart; 2617 smycols = mycols; 2618 svals = vals; 2619 for (i=0; i<m; i++) { 2620 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2621 smycols += ourlens[i]; 2622 svals += ourlens[i]; 2623 jj++; 2624 } 2625 } 2626 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2627 ierr = PetscFree(vals);CHKERRQ(ierr); 2628 ierr = PetscFree(mycols);CHKERRQ(ierr); 2629 ierr = PetscFree(rowners);CHKERRQ(ierr); 2630 2631 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 *newmat = A; 2634 PetscFunctionReturn(0); 2635 } 2636 2637 #undef __FUNCT__ 2638 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2639 /* 2640 Not great since it makes two copies of the submatrix, first an SeqAIJ 2641 in local and then by concatenating the local matrices the end result. 2642 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2643 */ 2644 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2645 { 2646 PetscErrorCode ierr; 2647 PetscMPIInt rank,size; 2648 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2649 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2650 Mat *local,M,Mreuse; 2651 PetscScalar *vwork,*aa; 2652 MPI_Comm comm = mat->comm; 2653 Mat_SeqAIJ *aij; 2654 2655 2656 PetscFunctionBegin; 2657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2659 2660 if (call == MAT_REUSE_MATRIX) { 2661 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2662 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2663 local = &Mreuse; 2664 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2665 } else { 2666 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2667 Mreuse = *local; 2668 ierr = PetscFree(local);CHKERRQ(ierr); 2669 } 2670 2671 /* 2672 m - number of local rows 2673 n - number of columns (same on all processors) 2674 rstart - first row in new global matrix generated 2675 */ 2676 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2677 if (call == MAT_INITIAL_MATRIX) { 2678 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2679 ii = aij->i; 2680 jj = aij->j; 2681 2682 /* 2683 Determine the number of non-zeros in the diagonal and off-diagonal 2684 portions of the matrix in order to do correct preallocation 2685 */ 2686 2687 /* first get start and end of "diagonal" columns */ 2688 if (csize == PETSC_DECIDE) { 2689 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2690 if (mglobal == n) { /* square matrix */ 2691 nlocal = m; 2692 } else { 2693 nlocal = n/size + ((n % size) > rank); 2694 } 2695 } else { 2696 nlocal = csize; 2697 } 2698 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2699 rstart = rend - nlocal; 2700 if (rank == size - 1 && rend != n) { 2701 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2702 } 2703 2704 /* next, compute all the lengths */ 2705 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2706 olens = dlens + m; 2707 for (i=0; i<m; i++) { 2708 jend = ii[i+1] - ii[i]; 2709 olen = 0; 2710 dlen = 0; 2711 for (j=0; j<jend; j++) { 2712 if (*jj < rstart || *jj >= rend) olen++; 2713 else dlen++; 2714 jj++; 2715 } 2716 olens[i] = olen; 2717 dlens[i] = dlen; 2718 } 2719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2721 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2722 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2723 ierr = PetscFree(dlens);CHKERRQ(ierr); 2724 } else { 2725 PetscInt ml,nl; 2726 2727 M = *newmat; 2728 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2729 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2731 /* 2732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2733 rather than the slower MatSetValues(). 2734 */ 2735 M->was_assembled = PETSC_TRUE; 2736 M->assembled = PETSC_FALSE; 2737 } 2738 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2739 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2740 ii = aij->i; 2741 jj = aij->j; 2742 aa = aij->a; 2743 for (i=0; i<m; i++) { 2744 row = rstart + i; 2745 nz = ii[i+1] - ii[i]; 2746 cwork = jj; jj += nz; 2747 vwork = aa; aa += nz; 2748 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2749 } 2750 2751 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2752 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2753 *newmat = M; 2754 2755 /* save submatrix used in processor for next request */ 2756 if (call == MAT_INITIAL_MATRIX) { 2757 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2758 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2759 } 2760 2761 PetscFunctionReturn(0); 2762 } 2763 2764 EXTERN_C_BEGIN 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2768 { 2769 PetscInt m,cstart, cend,j,nnz,i,d; 2770 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2771 const PetscInt *JJ; 2772 PetscScalar *values; 2773 PetscErrorCode ierr; 2774 2775 PetscFunctionBegin; 2776 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2777 2778 B->rmap.bs = B->cmap.bs = 1; 2779 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2780 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2781 m = B->rmap.n; 2782 cstart = B->cmap.rstart; 2783 cend = B->cmap.rend; 2784 rstart = B->rmap.rstart; 2785 2786 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2787 o_nnz = d_nnz + m; 2788 2789 for (i=0; i<m; i++) { 2790 nnz = Ii[i+1]- Ii[i]; 2791 JJ = J + Ii[i]; 2792 nnz_max = PetscMax(nnz_max,nnz); 2793 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2794 for (j=0; j<nnz; j++) { 2795 if (*JJ >= cstart) break; 2796 JJ++; 2797 } 2798 d = 0; 2799 for (; j<nnz; j++) { 2800 if (*JJ++ >= cend) break; 2801 d++; 2802 } 2803 d_nnz[i] = d; 2804 o_nnz[i] = nnz - d; 2805 } 2806 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2807 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2808 2809 if (v) values = (PetscScalar*)v; 2810 else { 2811 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2812 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2813 } 2814 2815 ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 2816 for (i=0; i<m; i++) { 2817 ii = i + rstart; 2818 nnz = Ii[i+1]- Ii[i]; 2819 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2820 } 2821 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2822 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2823 ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_FALSE);CHKERRQ(ierr); 2824 2825 if (!v) { 2826 ierr = PetscFree(values);CHKERRQ(ierr); 2827 } 2828 PetscFunctionReturn(0); 2829 } 2830 EXTERN_C_END 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2834 /*@ 2835 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2836 (the default parallel PETSc format). 2837 2838 Collective on MPI_Comm 2839 2840 Input Parameters: 2841 + B - the matrix 2842 . i - the indices into j for the start of each local row (starts with zero) 2843 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2844 - v - optional values in the matrix 2845 2846 Level: developer 2847 2848 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2849 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2850 2851 .keywords: matrix, aij, compressed row, sparse, parallel 2852 2853 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2854 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2855 @*/ 2856 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2857 { 2858 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2859 2860 PetscFunctionBegin; 2861 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2862 if (f) { 2863 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2864 } 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2870 /*@C 2871 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2872 (the default parallel PETSc format). For good matrix assembly performance 2873 the user should preallocate the matrix storage by setting the parameters 2874 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2875 performance can be increased by more than a factor of 50. 2876 2877 Collective on MPI_Comm 2878 2879 Input Parameters: 2880 + A - the matrix 2881 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2882 (same value is used for all local rows) 2883 . d_nnz - array containing the number of nonzeros in the various rows of the 2884 DIAGONAL portion of the local submatrix (possibly different for each row) 2885 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2886 The size of this array is equal to the number of local rows, i.e 'm'. 2887 You must leave room for the diagonal entry even if it is zero. 2888 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2889 submatrix (same value is used for all local rows). 2890 - o_nnz - array containing the number of nonzeros in the various rows of the 2891 OFF-DIAGONAL portion of the local submatrix (possibly different for 2892 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2893 structure. The size of this array is equal to the number 2894 of local rows, i.e 'm'. 2895 2896 If the *_nnz parameter is given then the *_nz parameter is ignored 2897 2898 The AIJ format (also called the Yale sparse matrix format or 2899 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2900 storage. The stored row and column indices begin with zero. See the users manual for details. 2901 2902 The parallel matrix is partitioned such that the first m0 rows belong to 2903 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2904 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2905 2906 The DIAGONAL portion of the local submatrix of a processor can be defined 2907 as the submatrix which is obtained by extraction the part corresponding 2908 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2909 first row that belongs to the processor, and r2 is the last row belonging 2910 to the this processor. This is a square mxm matrix. The remaining portion 2911 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2912 2913 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2914 2915 Example usage: 2916 2917 Consider the following 8x8 matrix with 34 non-zero values, that is 2918 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2919 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2920 as follows: 2921 2922 .vb 2923 1 2 0 | 0 3 0 | 0 4 2924 Proc0 0 5 6 | 7 0 0 | 8 0 2925 9 0 10 | 11 0 0 | 12 0 2926 ------------------------------------- 2927 13 0 14 | 15 16 17 | 0 0 2928 Proc1 0 18 0 | 19 20 21 | 0 0 2929 0 0 0 | 22 23 0 | 24 0 2930 ------------------------------------- 2931 Proc2 25 26 27 | 0 0 28 | 29 0 2932 30 0 0 | 31 32 33 | 0 34 2933 .ve 2934 2935 This can be represented as a collection of submatrices as: 2936 2937 .vb 2938 A B C 2939 D E F 2940 G H I 2941 .ve 2942 2943 Where the submatrices A,B,C are owned by proc0, D,E,F are 2944 owned by proc1, G,H,I are owned by proc2. 2945 2946 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2947 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2948 The 'M','N' parameters are 8,8, and have the same values on all procs. 2949 2950 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2951 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2952 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2953 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2954 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2955 matrix, ans [DF] as another SeqAIJ matrix. 2956 2957 When d_nz, o_nz parameters are specified, d_nz storage elements are 2958 allocated for every row of the local diagonal submatrix, and o_nz 2959 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2960 One way to choose d_nz and o_nz is to use the max nonzerors per local 2961 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2962 In this case, the values of d_nz,o_nz are: 2963 .vb 2964 proc0 : dnz = 2, o_nz = 2 2965 proc1 : dnz = 3, o_nz = 2 2966 proc2 : dnz = 1, o_nz = 4 2967 .ve 2968 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2969 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2970 for proc3. i.e we are using 12+15+10=37 storage locations to store 2971 34 values. 2972 2973 When d_nnz, o_nnz parameters are specified, the storage is specified 2974 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2975 In the above case the values for d_nnz,o_nnz are: 2976 .vb 2977 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2978 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2979 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2980 .ve 2981 Here the space allocated is sum of all the above values i.e 34, and 2982 hence pre-allocation is perfect. 2983 2984 Level: intermediate 2985 2986 .keywords: matrix, aij, compressed row, sparse, parallel 2987 2988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2989 MPIAIJ 2990 @*/ 2991 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2992 { 2993 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2994 2995 PetscFunctionBegin; 2996 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2997 if (f) { 2998 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2999 } 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3005 /*@C 3006 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3007 CSR format the local rows. 3008 3009 Collective on MPI_Comm 3010 3011 Input Parameters: 3012 + comm - MPI communicator 3013 . m - number of local rows (Cannot be PETSC_DECIDE) 3014 . n - This value should be the same as the local size used in creating the 3015 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3016 calculated if N is given) For square matrices n is almost always m. 3017 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3018 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3019 . i - row indices 3020 . j - column indices 3021 - a - matrix values 3022 3023 Output Parameter: 3024 . mat - the matrix 3025 3026 Level: intermediate 3027 3028 Notes: 3029 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3030 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3031 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3032 3033 The i and j indices are 0 based 3034 3035 .keywords: matrix, aij, compressed row, sparse, parallel 3036 3037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3038 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3039 @*/ 3040 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3041 { 3042 PetscErrorCode ierr; 3043 3044 PetscFunctionBegin; 3045 if (i[0]) { 3046 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3047 } 3048 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3049 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3050 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3051 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatCreateMPIAIJ" 3057 /*@C 3058 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3059 (the default parallel PETSc format). For good matrix assembly performance 3060 the user should preallocate the matrix storage by setting the parameters 3061 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3062 performance can be increased by more than a factor of 50. 3063 3064 Collective on MPI_Comm 3065 3066 Input Parameters: 3067 + comm - MPI communicator 3068 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3069 This value should be the same as the local size used in creating the 3070 y vector for the matrix-vector product y = Ax. 3071 . n - This value should be the same as the local size used in creating the 3072 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3073 calculated if N is given) For square matrices n is almost always m. 3074 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3075 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3076 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3077 (same value is used for all local rows) 3078 . d_nnz - array containing the number of nonzeros in the various rows of the 3079 DIAGONAL portion of the local submatrix (possibly different for each row) 3080 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3081 The size of this array is equal to the number of local rows, i.e 'm'. 3082 You must leave room for the diagonal entry even if it is zero. 3083 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3084 submatrix (same value is used for all local rows). 3085 - o_nnz - array containing the number of nonzeros in the various rows of the 3086 OFF-DIAGONAL portion of the local submatrix (possibly different for 3087 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3088 structure. The size of this array is equal to the number 3089 of local rows, i.e 'm'. 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 Notes: 3095 If the *_nnz parameter is given then the *_nz parameter is ignored 3096 3097 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3098 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3099 storage requirements for this matrix. 3100 3101 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3102 processor than it must be used on all processors that share the object for 3103 that argument. 3104 3105 The user MUST specify either the local or global matrix dimensions 3106 (possibly both). 3107 3108 The parallel matrix is partitioned across processors such that the 3109 first m0 rows belong to process 0, the next m1 rows belong to 3110 process 1, the next m2 rows belong to process 2 etc.. where 3111 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3112 values corresponding to [m x N] submatrix. 3113 3114 The columns are logically partitioned with the n0 columns belonging 3115 to 0th partition, the next n1 columns belonging to the next 3116 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3117 3118 The DIAGONAL portion of the local submatrix on any given processor 3119 is the submatrix corresponding to the rows and columns m,n 3120 corresponding to the given processor. i.e diagonal matrix on 3121 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3122 etc. The remaining portion of the local submatrix [m x (N-n)] 3123 constitute the OFF-DIAGONAL portion. The example below better 3124 illustrates this concept. 3125 3126 For a square global matrix we define each processor's diagonal portion 3127 to be its local rows and the corresponding columns (a square submatrix); 3128 each processor's off-diagonal portion encompasses the remainder of the 3129 local matrix (a rectangular submatrix). 3130 3131 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3132 3133 When calling this routine with a single process communicator, a matrix of 3134 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3135 type of communicator, use the construction mechanism: 3136 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3137 3138 By default, this format uses inodes (identical nodes) when possible. 3139 We search for consecutive rows with the same nonzero structure, thereby 3140 reusing matrix information to achieve increased efficiency. 3141 3142 Options Database Keys: 3143 + -mat_no_inode - Do not use inodes 3144 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3145 - -mat_aij_oneindex - Internally use indexing starting at 1 3146 rather than 0. Note that when calling MatSetValues(), 3147 the user still MUST index entries starting at 0! 3148 3149 3150 Example usage: 3151 3152 Consider the following 8x8 matrix with 34 non-zero values, that is 3153 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3154 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3155 as follows: 3156 3157 .vb 3158 1 2 0 | 0 3 0 | 0 4 3159 Proc0 0 5 6 | 7 0 0 | 8 0 3160 9 0 10 | 11 0 0 | 12 0 3161 ------------------------------------- 3162 13 0 14 | 15 16 17 | 0 0 3163 Proc1 0 18 0 | 19 20 21 | 0 0 3164 0 0 0 | 22 23 0 | 24 0 3165 ------------------------------------- 3166 Proc2 25 26 27 | 0 0 28 | 29 0 3167 30 0 0 | 31 32 33 | 0 34 3168 .ve 3169 3170 This can be represented as a collection of submatrices as: 3171 3172 .vb 3173 A B C 3174 D E F 3175 G H I 3176 .ve 3177 3178 Where the submatrices A,B,C are owned by proc0, D,E,F are 3179 owned by proc1, G,H,I are owned by proc2. 3180 3181 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3182 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3183 The 'M','N' parameters are 8,8, and have the same values on all procs. 3184 3185 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3186 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3187 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3188 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3189 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3190 matrix, ans [DF] as another SeqAIJ matrix. 3191 3192 When d_nz, o_nz parameters are specified, d_nz storage elements are 3193 allocated for every row of the local diagonal submatrix, and o_nz 3194 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3195 One way to choose d_nz and o_nz is to use the max nonzerors per local 3196 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3197 In this case, the values of d_nz,o_nz are: 3198 .vb 3199 proc0 : dnz = 2, o_nz = 2 3200 proc1 : dnz = 3, o_nz = 2 3201 proc2 : dnz = 1, o_nz = 4 3202 .ve 3203 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3204 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3205 for proc3. i.e we are using 12+15+10=37 storage locations to store 3206 34 values. 3207 3208 When d_nnz, o_nnz parameters are specified, the storage is specified 3209 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3210 In the above case the values for d_nnz,o_nnz are: 3211 .vb 3212 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3213 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3214 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3215 .ve 3216 Here the space allocated is sum of all the above values i.e 34, and 3217 hence pre-allocation is perfect. 3218 3219 Level: intermediate 3220 3221 .keywords: matrix, aij, compressed row, sparse, parallel 3222 3223 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3224 MPIAIJ, MatCreateMPIAIJWithArrays() 3225 @*/ 3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3227 { 3228 PetscErrorCode ierr; 3229 PetscMPIInt size; 3230 3231 PetscFunctionBegin; 3232 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3233 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3234 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3235 if (size > 1) { 3236 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3237 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3238 } else { 3239 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3240 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3241 } 3242 PetscFunctionReturn(0); 3243 } 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3248 { 3249 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3250 3251 PetscFunctionBegin; 3252 *Ad = a->A; 3253 *Ao = a->B; 3254 *colmap = a->garray; 3255 PetscFunctionReturn(0); 3256 } 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3260 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3261 { 3262 PetscErrorCode ierr; 3263 PetscInt i; 3264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3265 3266 PetscFunctionBegin; 3267 if (coloring->ctype == IS_COLORING_GLOBAL) { 3268 ISColoringValue *allcolors,*colors; 3269 ISColoring ocoloring; 3270 3271 /* set coloring for diagonal portion */ 3272 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3273 3274 /* set coloring for off-diagonal portion */ 3275 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3276 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3277 for (i=0; i<a->B->cmap.n; i++) { 3278 colors[i] = allcolors[a->garray[i]]; 3279 } 3280 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3281 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3282 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3283 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3284 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3285 ISColoringValue *colors; 3286 PetscInt *larray; 3287 ISColoring ocoloring; 3288 3289 /* set coloring for diagonal portion */ 3290 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3291 for (i=0; i<a->A->cmap.n; i++) { 3292 larray[i] = i + A->cmap.rstart; 3293 } 3294 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3295 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3296 for (i=0; i<a->A->cmap.n; i++) { 3297 colors[i] = coloring->colors[larray[i]]; 3298 } 3299 ierr = PetscFree(larray);CHKERRQ(ierr); 3300 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3301 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3302 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3303 3304 /* set coloring for off-diagonal portion */ 3305 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3306 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3307 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3308 for (i=0; i<a->B->cmap.n; i++) { 3309 colors[i] = coloring->colors[larray[i]]; 3310 } 3311 ierr = PetscFree(larray);CHKERRQ(ierr); 3312 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3313 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3314 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3315 } else { 3316 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3317 } 3318 3319 PetscFunctionReturn(0); 3320 } 3321 3322 #if defined(PETSC_HAVE_ADIC) 3323 #undef __FUNCT__ 3324 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3325 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3326 { 3327 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3328 PetscErrorCode ierr; 3329 3330 PetscFunctionBegin; 3331 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3332 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 #endif 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3339 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3340 { 3341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3342 PetscErrorCode ierr; 3343 3344 PetscFunctionBegin; 3345 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3346 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3347 PetscFunctionReturn(0); 3348 } 3349 3350 #undef __FUNCT__ 3351 #define __FUNCT__ "MatMerge" 3352 /*@C 3353 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3354 matrices from each processor 3355 3356 Collective on MPI_Comm 3357 3358 Input Parameters: 3359 + comm - the communicators the parallel matrix will live on 3360 . inmat - the input sequential matrices 3361 . n - number of local columns (or PETSC_DECIDE) 3362 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3363 3364 Output Parameter: 3365 . outmat - the parallel matrix generated 3366 3367 Level: advanced 3368 3369 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3370 3371 @*/ 3372 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3373 { 3374 PetscErrorCode ierr; 3375 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3376 PetscInt *indx; 3377 PetscScalar *values; 3378 3379 PetscFunctionBegin; 3380 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3381 if (scall == MAT_INITIAL_MATRIX){ 3382 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3383 if (n == PETSC_DECIDE){ 3384 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3385 } 3386 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3387 rstart -= m; 3388 3389 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3390 for (i=0;i<m;i++) { 3391 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3392 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3393 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3394 } 3395 /* This routine will ONLY return MPIAIJ type matrix */ 3396 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3397 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3398 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3399 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3400 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3401 3402 } else if (scall == MAT_REUSE_MATRIX){ 3403 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3404 } else { 3405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3406 } 3407 3408 for (i=0;i<m;i++) { 3409 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3410 Ii = i + rstart; 3411 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3412 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3413 } 3414 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3415 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3416 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 3418 PetscFunctionReturn(0); 3419 } 3420 3421 #undef __FUNCT__ 3422 #define __FUNCT__ "MatFileSplit" 3423 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3424 { 3425 PetscErrorCode ierr; 3426 PetscMPIInt rank; 3427 PetscInt m,N,i,rstart,nnz; 3428 size_t len; 3429 const PetscInt *indx; 3430 PetscViewer out; 3431 char *name; 3432 Mat B; 3433 const PetscScalar *values; 3434 3435 PetscFunctionBegin; 3436 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3437 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3438 /* Should this be the type of the diagonal block of A? */ 3439 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3440 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3441 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3442 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3443 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3444 for (i=0;i<m;i++) { 3445 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3446 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3447 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3448 } 3449 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 3452 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3453 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3454 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3455 sprintf(name,"%s.%d",outfile,rank); 3456 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3457 ierr = PetscFree(name); 3458 ierr = MatView(B,out);CHKERRQ(ierr); 3459 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3460 ierr = MatDestroy(B);CHKERRQ(ierr); 3461 PetscFunctionReturn(0); 3462 } 3463 3464 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3467 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3468 { 3469 PetscErrorCode ierr; 3470 Mat_Merge_SeqsToMPI *merge; 3471 PetscContainer container; 3472 3473 PetscFunctionBegin; 3474 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3475 if (container) { 3476 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3482 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3483 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3484 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3485 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3486 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3487 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3488 3489 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3490 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3491 } 3492 ierr = PetscFree(merge);CHKERRQ(ierr); 3493 3494 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #include "src/mat/utils/freespace.h" 3499 #include "petscbt.h" 3500 static PetscEvent logkey_seqstompinum = 0; 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3503 /*@C 3504 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3505 matrices from each processor 3506 3507 Collective on MPI_Comm 3508 3509 Input Parameters: 3510 + comm - the communicators the parallel matrix will live on 3511 . seqmat - the input sequential matrices 3512 . m - number of local rows (or PETSC_DECIDE) 3513 . n - number of local columns (or PETSC_DECIDE) 3514 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3515 3516 Output Parameter: 3517 . mpimat - the parallel matrix generated 3518 3519 Level: advanced 3520 3521 Notes: 3522 The dimensions of the sequential matrix in each processor MUST be the same. 3523 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3524 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3525 @*/ 3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3527 { 3528 PetscErrorCode ierr; 3529 MPI_Comm comm=mpimat->comm; 3530 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3531 PetscMPIInt size,rank,taga,*len_s; 3532 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3533 PetscInt proc,m; 3534 PetscInt **buf_ri,**buf_rj; 3535 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3536 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3537 MPI_Request *s_waits,*r_waits; 3538 MPI_Status *status; 3539 MatScalar *aa=a->a,**abuf_r,*ba_i; 3540 Mat_Merge_SeqsToMPI *merge; 3541 PetscContainer container; 3542 3543 PetscFunctionBegin; 3544 if (!logkey_seqstompinum) { 3545 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3546 } 3547 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3548 3549 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3550 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3551 3552 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3553 if (container) { 3554 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3555 } 3556 bi = merge->bi; 3557 bj = merge->bj; 3558 buf_ri = merge->buf_ri; 3559 buf_rj = merge->buf_rj; 3560 3561 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3562 owners = merge->rowmap.range; 3563 len_s = merge->len_s; 3564 3565 /* send and recv matrix values */ 3566 /*-----------------------------*/ 3567 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3568 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3569 3570 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3571 for (proc=0,k=0; proc<size; proc++){ 3572 if (!len_s[proc]) continue; 3573 i = owners[proc]; 3574 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3575 k++; 3576 } 3577 3578 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3579 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3580 ierr = PetscFree(status);CHKERRQ(ierr); 3581 3582 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3583 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3584 3585 /* insert mat values of mpimat */ 3586 /*----------------------------*/ 3587 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3588 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3589 nextrow = buf_ri_k + merge->nrecv; 3590 nextai = nextrow + merge->nrecv; 3591 3592 for (k=0; k<merge->nrecv; k++){ 3593 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3594 nrows = *(buf_ri_k[k]); 3595 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3596 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3597 } 3598 3599 /* set values of ba */ 3600 m = merge->rowmap.n; 3601 for (i=0; i<m; i++) { 3602 arow = owners[rank] + i; 3603 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3604 bnzi = bi[i+1] - bi[i]; 3605 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3606 3607 /* add local non-zero vals of this proc's seqmat into ba */ 3608 anzi = ai[arow+1] - ai[arow]; 3609 aj = a->j + ai[arow]; 3610 aa = a->a + ai[arow]; 3611 nextaj = 0; 3612 for (j=0; nextaj<anzi; j++){ 3613 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3614 ba_i[j] += aa[nextaj++]; 3615 } 3616 } 3617 3618 /* add received vals into ba */ 3619 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3620 /* i-th row */ 3621 if (i == *nextrow[k]) { 3622 anzi = *(nextai[k]+1) - *nextai[k]; 3623 aj = buf_rj[k] + *(nextai[k]); 3624 aa = abuf_r[k] + *(nextai[k]); 3625 nextaj = 0; 3626 for (j=0; nextaj<anzi; j++){ 3627 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3628 ba_i[j] += aa[nextaj++]; 3629 } 3630 } 3631 nextrow[k]++; nextai[k]++; 3632 } 3633 } 3634 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3635 } 3636 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3638 3639 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3640 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3641 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3642 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3643 PetscFunctionReturn(0); 3644 } 3645 3646 static PetscEvent logkey_seqstompisym = 0; 3647 #undef __FUNCT__ 3648 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3649 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3650 { 3651 PetscErrorCode ierr; 3652 Mat B_mpi; 3653 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3654 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3655 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3656 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3657 PetscInt len,proc,*dnz,*onz; 3658 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3659 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3660 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3661 MPI_Status *status; 3662 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3663 PetscBT lnkbt; 3664 Mat_Merge_SeqsToMPI *merge; 3665 PetscContainer container; 3666 3667 PetscFunctionBegin; 3668 if (!logkey_seqstompisym) { 3669 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3670 } 3671 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3672 3673 /* make sure it is a PETSc comm */ 3674 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 3678 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3679 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3680 3681 /* determine row ownership */ 3682 /*---------------------------------------------------------*/ 3683 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3684 merge->rowmap.n = m; 3685 merge->rowmap.N = M; 3686 merge->rowmap.bs = 1; 3687 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3688 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3689 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3690 3691 m = merge->rowmap.n; 3692 M = merge->rowmap.N; 3693 owners = merge->rowmap.range; 3694 3695 /* determine the number of messages to send, their lengths */ 3696 /*---------------------------------------------------------*/ 3697 len_s = merge->len_s; 3698 3699 len = 0; /* length of buf_si[] */ 3700 merge->nsend = 0; 3701 for (proc=0; proc<size; proc++){ 3702 len_si[proc] = 0; 3703 if (proc == rank){ 3704 len_s[proc] = 0; 3705 } else { 3706 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3707 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3708 } 3709 if (len_s[proc]) { 3710 merge->nsend++; 3711 nrows = 0; 3712 for (i=owners[proc]; i<owners[proc+1]; i++){ 3713 if (ai[i+1] > ai[i]) nrows++; 3714 } 3715 len_si[proc] = 2*(nrows+1); 3716 len += len_si[proc]; 3717 } 3718 } 3719 3720 /* determine the number and length of messages to receive for ij-structure */ 3721 /*-------------------------------------------------------------------------*/ 3722 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3723 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3724 3725 /* post the Irecv of j-structure */ 3726 /*-------------------------------*/ 3727 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3728 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3729 3730 /* post the Isend of j-structure */ 3731 /*--------------------------------*/ 3732 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3733 sj_waits = si_waits + merge->nsend; 3734 3735 for (proc=0, k=0; proc<size; proc++){ 3736 if (!len_s[proc]) continue; 3737 i = owners[proc]; 3738 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3739 k++; 3740 } 3741 3742 /* receives and sends of j-structure are complete */ 3743 /*------------------------------------------------*/ 3744 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3745 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3746 3747 /* send and recv i-structure */ 3748 /*---------------------------*/ 3749 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3750 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3751 3752 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3753 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3754 for (proc=0,k=0; proc<size; proc++){ 3755 if (!len_s[proc]) continue; 3756 /* form outgoing message for i-structure: 3757 buf_si[0]: nrows to be sent 3758 [1:nrows]: row index (global) 3759 [nrows+1:2*nrows+1]: i-structure index 3760 */ 3761 /*-------------------------------------------*/ 3762 nrows = len_si[proc]/2 - 1; 3763 buf_si_i = buf_si + nrows+1; 3764 buf_si[0] = nrows; 3765 buf_si_i[0] = 0; 3766 nrows = 0; 3767 for (i=owners[proc]; i<owners[proc+1]; i++){ 3768 anzi = ai[i+1] - ai[i]; 3769 if (anzi) { 3770 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3771 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3772 nrows++; 3773 } 3774 } 3775 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3776 k++; 3777 buf_si += len_si[proc]; 3778 } 3779 3780 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3781 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3782 3783 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3784 for (i=0; i<merge->nrecv; i++){ 3785 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3786 } 3787 3788 ierr = PetscFree(len_si);CHKERRQ(ierr); 3789 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3790 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3791 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3792 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3793 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3794 ierr = PetscFree(status);CHKERRQ(ierr); 3795 3796 /* compute a local seq matrix in each processor */ 3797 /*----------------------------------------------*/ 3798 /* allocate bi array and free space for accumulating nonzero column info */ 3799 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3800 bi[0] = 0; 3801 3802 /* create and initialize a linked list */ 3803 nlnk = N+1; 3804 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3805 3806 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3807 len = 0; 3808 len = ai[owners[rank+1]] - ai[owners[rank]]; 3809 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3810 current_space = free_space; 3811 3812 /* determine symbolic info for each local row */ 3813 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3814 nextrow = buf_ri_k + merge->nrecv; 3815 nextai = nextrow + merge->nrecv; 3816 for (k=0; k<merge->nrecv; k++){ 3817 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3818 nrows = *buf_ri_k[k]; 3819 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3820 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3821 } 3822 3823 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3824 len = 0; 3825 for (i=0;i<m;i++) { 3826 bnzi = 0; 3827 /* add local non-zero cols of this proc's seqmat into lnk */ 3828 arow = owners[rank] + i; 3829 anzi = ai[arow+1] - ai[arow]; 3830 aj = a->j + ai[arow]; 3831 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3832 bnzi += nlnk; 3833 /* add received col data into lnk */ 3834 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3835 if (i == *nextrow[k]) { /* i-th row */ 3836 anzi = *(nextai[k]+1) - *nextai[k]; 3837 aj = buf_rj[k] + *nextai[k]; 3838 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3839 bnzi += nlnk; 3840 nextrow[k]++; nextai[k]++; 3841 } 3842 } 3843 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3844 3845 /* if free space is not available, make more free space */ 3846 if (current_space->local_remaining<bnzi) { 3847 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3848 nspacedouble++; 3849 } 3850 /* copy data into free space, then initialize lnk */ 3851 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3852 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3853 3854 current_space->array += bnzi; 3855 current_space->local_used += bnzi; 3856 current_space->local_remaining -= bnzi; 3857 3858 bi[i+1] = bi[i] + bnzi; 3859 } 3860 3861 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3862 3863 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3864 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3865 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3866 3867 /* create symbolic parallel matrix B_mpi */ 3868 /*---------------------------------------*/ 3869 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3870 if (n==PETSC_DECIDE) { 3871 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3872 } else { 3873 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3874 } 3875 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3876 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3877 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3878 3879 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3880 B_mpi->assembled = PETSC_FALSE; 3881 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3882 merge->bi = bi; 3883 merge->bj = bj; 3884 merge->buf_ri = buf_ri; 3885 merge->buf_rj = buf_rj; 3886 merge->coi = PETSC_NULL; 3887 merge->coj = PETSC_NULL; 3888 merge->owners_co = PETSC_NULL; 3889 3890 /* attach the supporting struct to B_mpi for reuse */ 3891 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3892 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3893 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3894 *mpimat = B_mpi; 3895 3896 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3897 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 static PetscEvent logkey_seqstompi = 0; 3902 #undef __FUNCT__ 3903 #define __FUNCT__ "MatMerge_SeqsToMPI" 3904 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3905 { 3906 PetscErrorCode ierr; 3907 3908 PetscFunctionBegin; 3909 if (!logkey_seqstompi) { 3910 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3911 } 3912 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3913 if (scall == MAT_INITIAL_MATRIX){ 3914 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3915 } 3916 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3917 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 static PetscEvent logkey_getlocalmat = 0; 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatGetLocalMat" 3923 /*@C 3924 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3925 3926 Not Collective 3927 3928 Input Parameters: 3929 + A - the matrix 3930 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3931 3932 Output Parameter: 3933 . A_loc - the local sequential matrix generated 3934 3935 Level: developer 3936 3937 @*/ 3938 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3939 { 3940 PetscErrorCode ierr; 3941 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3942 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3943 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3944 PetscScalar *aa=a->a,*ba=b->a,*ca; 3945 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3946 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3947 3948 PetscFunctionBegin; 3949 if (!logkey_getlocalmat) { 3950 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3951 } 3952 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3953 if (scall == MAT_INITIAL_MATRIX){ 3954 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3955 ci[0] = 0; 3956 for (i=0; i<am; i++){ 3957 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3958 } 3959 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3960 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3961 k = 0; 3962 for (i=0; i<am; i++) { 3963 ncols_o = bi[i+1] - bi[i]; 3964 ncols_d = ai[i+1] - ai[i]; 3965 /* off-diagonal portion of A */ 3966 for (jo=0; jo<ncols_o; jo++) { 3967 col = cmap[*bj]; 3968 if (col >= cstart) break; 3969 cj[k] = col; bj++; 3970 ca[k++] = *ba++; 3971 } 3972 /* diagonal portion of A */ 3973 for (j=0; j<ncols_d; j++) { 3974 cj[k] = cstart + *aj++; 3975 ca[k++] = *aa++; 3976 } 3977 /* off-diagonal portion of A */ 3978 for (j=jo; j<ncols_o; j++) { 3979 cj[k] = cmap[*bj++]; 3980 ca[k++] = *ba++; 3981 } 3982 } 3983 /* put together the new matrix */ 3984 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3985 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3986 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3987 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3988 mat->free_a = PETSC_TRUE; 3989 mat->free_ij = PETSC_TRUE; 3990 mat->nonew = 0; 3991 } else if (scall == MAT_REUSE_MATRIX){ 3992 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3993 ci = mat->i; cj = mat->j; ca = mat->a; 3994 for (i=0; i<am; i++) { 3995 /* off-diagonal portion of A */ 3996 ncols_o = bi[i+1] - bi[i]; 3997 for (jo=0; jo<ncols_o; jo++) { 3998 col = cmap[*bj]; 3999 if (col >= cstart) break; 4000 *ca++ = *ba++; bj++; 4001 } 4002 /* diagonal portion of A */ 4003 ncols_d = ai[i+1] - ai[i]; 4004 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 4005 /* off-diagonal portion of A */ 4006 for (j=jo; j<ncols_o; j++) { 4007 *ca++ = *ba++; bj++; 4008 } 4009 } 4010 } else { 4011 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4012 } 4013 4014 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 static PetscEvent logkey_getlocalmatcondensed = 0; 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatGetLocalMatCondensed" 4021 /*@C 4022 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4023 4024 Not Collective 4025 4026 Input Parameters: 4027 + A - the matrix 4028 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4029 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4030 4031 Output Parameter: 4032 . A_loc - the local sequential matrix generated 4033 4034 Level: developer 4035 4036 @*/ 4037 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4038 { 4039 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4040 PetscErrorCode ierr; 4041 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4042 IS isrowa,iscola; 4043 Mat *aloc; 4044 4045 PetscFunctionBegin; 4046 if (!logkey_getlocalmatcondensed) { 4047 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4048 } 4049 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4050 if (!row){ 4051 start = A->rmap.rstart; end = A->rmap.rend; 4052 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4053 } else { 4054 isrowa = *row; 4055 } 4056 if (!col){ 4057 start = A->cmap.rstart; 4058 cmap = a->garray; 4059 nzA = a->A->cmap.n; 4060 nzB = a->B->cmap.n; 4061 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4062 ncols = 0; 4063 for (i=0; i<nzB; i++) { 4064 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4065 else break; 4066 } 4067 imark = i; 4068 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4069 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4070 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4071 ierr = PetscFree(idx);CHKERRQ(ierr); 4072 } else { 4073 iscola = *col; 4074 } 4075 if (scall != MAT_INITIAL_MATRIX){ 4076 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4077 aloc[0] = *A_loc; 4078 } 4079 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4080 *A_loc = aloc[0]; 4081 ierr = PetscFree(aloc);CHKERRQ(ierr); 4082 if (!row){ 4083 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4084 } 4085 if (!col){ 4086 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4087 } 4088 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4089 PetscFunctionReturn(0); 4090 } 4091 4092 static PetscEvent logkey_GetBrowsOfAcols = 0; 4093 #undef __FUNCT__ 4094 #define __FUNCT__ "MatGetBrowsOfAcols" 4095 /*@C 4096 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4097 4098 Collective on Mat 4099 4100 Input Parameters: 4101 + A,B - the matrices in mpiaij format 4102 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4103 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4104 4105 Output Parameter: 4106 + rowb, colb - index sets of rows and columns of B to extract 4107 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4108 - B_seq - the sequential matrix generated 4109 4110 Level: developer 4111 4112 @*/ 4113 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4114 { 4115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4116 PetscErrorCode ierr; 4117 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4118 IS isrowb,iscolb; 4119 Mat *bseq; 4120 4121 PetscFunctionBegin; 4122 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4123 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4124 } 4125 if (!logkey_GetBrowsOfAcols) { 4126 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4127 } 4128 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4129 4130 if (scall == MAT_INITIAL_MATRIX){ 4131 start = A->cmap.rstart; 4132 cmap = a->garray; 4133 nzA = a->A->cmap.n; 4134 nzB = a->B->cmap.n; 4135 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4136 ncols = 0; 4137 for (i=0; i<nzB; i++) { /* row < local row index */ 4138 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4139 else break; 4140 } 4141 imark = i; 4142 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4143 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4144 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4145 ierr = PetscFree(idx);CHKERRQ(ierr); 4146 *brstart = imark; 4147 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4148 } else { 4149 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4150 isrowb = *rowb; iscolb = *colb; 4151 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4152 bseq[0] = *B_seq; 4153 } 4154 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4155 *B_seq = bseq[0]; 4156 ierr = PetscFree(bseq);CHKERRQ(ierr); 4157 if (!rowb){ 4158 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4159 } else { 4160 *rowb = isrowb; 4161 } 4162 if (!colb){ 4163 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4164 } else { 4165 *colb = iscolb; 4166 } 4167 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4168 PetscFunctionReturn(0); 4169 } 4170 4171 static PetscEvent logkey_GetBrowsOfAocols = 0; 4172 #undef __FUNCT__ 4173 #define __FUNCT__ "MatGetBrowsOfAoCols" 4174 /*@C 4175 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4176 of the OFF-DIAGONAL portion of local A 4177 4178 Collective on Mat 4179 4180 Input Parameters: 4181 + A,B - the matrices in mpiaij format 4182 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4183 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4184 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4185 4186 Output Parameter: 4187 + B_oth - the sequential matrix generated 4188 4189 Level: developer 4190 4191 @*/ 4192 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4193 { 4194 VecScatter_MPI_General *gen_to,*gen_from; 4195 PetscErrorCode ierr; 4196 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4197 Mat_SeqAIJ *b_oth; 4198 VecScatter ctx=a->Mvctx; 4199 MPI_Comm comm=ctx->comm; 4200 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4201 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4202 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4203 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4204 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4205 MPI_Status *sstatus,rstatus; 4206 PetscInt *cols,sbs,rbs; 4207 PetscScalar *vals; 4208 4209 PetscFunctionBegin; 4210 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4211 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4212 } 4213 if (!logkey_GetBrowsOfAocols) { 4214 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4215 } 4216 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4217 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4218 4219 gen_to = (VecScatter_MPI_General*)ctx->todata; 4220 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4221 rvalues = gen_from->values; /* holds the length of receiving row */ 4222 svalues = gen_to->values; /* holds the length of sending row */ 4223 nrecvs = gen_from->n; 4224 nsends = gen_to->n; 4225 4226 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4227 srow = gen_to->indices; /* local row index to be sent */ 4228 sstarts = gen_to->starts; 4229 sprocs = gen_to->procs; 4230 sstatus = gen_to->sstatus; 4231 sbs = gen_to->bs; 4232 rstarts = gen_from->starts; 4233 rprocs = gen_from->procs; 4234 rbs = gen_from->bs; 4235 4236 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4237 if (scall == MAT_INITIAL_MATRIX){ 4238 /* i-array */ 4239 /*---------*/ 4240 /* post receives */ 4241 for (i=0; i<nrecvs; i++){ 4242 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4243 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4244 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4245 } 4246 4247 /* pack the outgoing message */ 4248 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4249 rstartsj = sstartsj + nsends +1; 4250 sstartsj[0] = 0; rstartsj[0] = 0; 4251 len = 0; /* total length of j or a array to be sent */ 4252 k = 0; 4253 for (i=0; i<nsends; i++){ 4254 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4255 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4256 for (j=0; j<nrows; j++) { 4257 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4258 for (l=0; l<sbs; l++){ 4259 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4260 rowlen[j*sbs+l] = ncols; 4261 len += ncols; 4262 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4263 } 4264 k++; 4265 } 4266 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4267 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4268 } 4269 /* recvs and sends of i-array are completed */ 4270 i = nrecvs; 4271 while (i--) { 4272 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4273 } 4274 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4275 4276 /* allocate buffers for sending j and a arrays */ 4277 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4278 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4279 4280 /* create i-array of B_oth */ 4281 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4282 b_othi[0] = 0; 4283 len = 0; /* total length of j or a array to be received */ 4284 k = 0; 4285 for (i=0; i<nrecvs; i++){ 4286 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4287 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4288 for (j=0; j<nrows; j++) { 4289 b_othi[k+1] = b_othi[k] + rowlen[j]; 4290 len += rowlen[j]; k++; 4291 } 4292 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4293 } 4294 4295 /* allocate space for j and a arrrays of B_oth */ 4296 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4297 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4298 4299 /* j-array */ 4300 /*---------*/ 4301 /* post receives of j-array */ 4302 for (i=0; i<nrecvs; i++){ 4303 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4304 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4305 } 4306 4307 /* pack the outgoing message j-array */ 4308 k = 0; 4309 for (i=0; i<nsends; i++){ 4310 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4311 bufJ = bufj+sstartsj[i]; 4312 for (j=0; j<nrows; j++) { 4313 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4314 for (ll=0; ll<sbs; ll++){ 4315 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4316 for (l=0; l<ncols; l++){ 4317 *bufJ++ = cols[l]; 4318 } 4319 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4320 } 4321 } 4322 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4323 } 4324 4325 /* recvs and sends of j-array are completed */ 4326 i = nrecvs; 4327 while (i--) { 4328 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4329 } 4330 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4331 } else if (scall == MAT_REUSE_MATRIX){ 4332 sstartsj = *startsj; 4333 rstartsj = sstartsj + nsends +1; 4334 bufa = *bufa_ptr; 4335 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4336 b_otha = b_oth->a; 4337 } else { 4338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4339 } 4340 4341 /* a-array */ 4342 /*---------*/ 4343 /* post receives of a-array */ 4344 for (i=0; i<nrecvs; i++){ 4345 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4346 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4347 } 4348 4349 /* pack the outgoing message a-array */ 4350 k = 0; 4351 for (i=0; i<nsends; i++){ 4352 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4353 bufA = bufa+sstartsj[i]; 4354 for (j=0; j<nrows; j++) { 4355 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4356 for (ll=0; ll<sbs; ll++){ 4357 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4358 for (l=0; l<ncols; l++){ 4359 *bufA++ = vals[l]; 4360 } 4361 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4362 } 4363 } 4364 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4365 } 4366 /* recvs and sends of a-array are completed */ 4367 i = nrecvs; 4368 while (i--) { 4369 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4370 } 4371 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4372 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4373 4374 if (scall == MAT_INITIAL_MATRIX){ 4375 /* put together the new matrix */ 4376 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4377 4378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4379 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4380 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4381 b_oth->free_a = PETSC_TRUE; 4382 b_oth->free_ij = PETSC_TRUE; 4383 b_oth->nonew = 0; 4384 4385 ierr = PetscFree(bufj);CHKERRQ(ierr); 4386 if (!startsj || !bufa_ptr){ 4387 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4388 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4389 } else { 4390 *startsj = sstartsj; 4391 *bufa_ptr = bufa; 4392 } 4393 } 4394 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatGetCommunicationStructs" 4400 /*@C 4401 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4402 4403 Not Collective 4404 4405 Input Parameters: 4406 . A - The matrix in mpiaij format 4407 4408 Output Parameter: 4409 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4410 . colmap - A map from global column index to local index into lvec 4411 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4412 4413 Level: developer 4414 4415 @*/ 4416 #if defined (PETSC_USE_CTABLE) 4417 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4418 #else 4419 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4420 #endif 4421 { 4422 Mat_MPIAIJ *a; 4423 4424 PetscFunctionBegin; 4425 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4426 PetscValidPointer(lvec, 2) 4427 PetscValidPointer(colmap, 3) 4428 PetscValidPointer(multScatter, 4) 4429 a = (Mat_MPIAIJ *) A->data; 4430 if (lvec) *lvec = a->lvec; 4431 if (colmap) *colmap = a->colmap; 4432 if (multScatter) *multScatter = a->Mvctx; 4433 PetscFunctionReturn(0); 4434 } 4435 4436 EXTERN_C_BEGIN 4437 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4438 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4439 EXTERN_C_END 4440 4441 /*MC 4442 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4443 4444 Options Database Keys: 4445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4446 4447 Level: beginner 4448 4449 .seealso: MatCreateMPIAIJ 4450 M*/ 4451 4452 EXTERN_C_BEGIN 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatCreate_MPIAIJ" 4455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4456 { 4457 Mat_MPIAIJ *b; 4458 PetscErrorCode ierr; 4459 PetscMPIInt size; 4460 4461 PetscFunctionBegin; 4462 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4463 4464 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4465 B->data = (void*)b; 4466 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4467 B->factor = 0; 4468 B->rmap.bs = 1; 4469 B->assembled = PETSC_FALSE; 4470 B->mapping = 0; 4471 4472 B->insertmode = NOT_SET_VALUES; 4473 b->size = size; 4474 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4475 4476 /* build cache for off array entries formed */ 4477 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4478 b->donotstash = PETSC_FALSE; 4479 b->colmap = 0; 4480 b->garray = 0; 4481 b->roworiented = PETSC_TRUE; 4482 4483 /* stuff used for matrix vector multiply */ 4484 b->lvec = PETSC_NULL; 4485 b->Mvctx = PETSC_NULL; 4486 4487 /* stuff for MatGetRow() */ 4488 b->rowindices = 0; 4489 b->rowvalues = 0; 4490 b->getrowactive = PETSC_FALSE; 4491 4492 4493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4494 "MatStoreValues_MPIAIJ", 4495 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4497 "MatRetrieveValues_MPIAIJ", 4498 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4500 "MatGetDiagonalBlock_MPIAIJ", 4501 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4503 "MatIsTranspose_MPIAIJ", 4504 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4506 "MatMPIAIJSetPreallocation_MPIAIJ", 4507 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4509 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4510 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4512 "MatDiagonalScaleLocal_MPIAIJ", 4513 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4515 "MatConvert_MPIAIJ_MPICSRPERM", 4516 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4518 "MatConvert_MPIAIJ_MPICRL", 4519 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4520 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 EXTERN_C_END 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4527 /*@C 4528 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4529 and "off-diagonal" part of the matrix in CSR format. 4530 4531 Collective on MPI_Comm 4532 4533 Input Parameters: 4534 + comm - MPI communicator 4535 . m - number of local rows (Cannot be PETSC_DECIDE) 4536 . n - This value should be the same as the local size used in creating the 4537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4538 calculated if N is given) For square matrices n is almost always m. 4539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4541 . i - row indices for "diagonal" portion of matrix 4542 . j - column indices 4543 . a - matrix values 4544 . oi - row indices for "off-diagonal" portion of matrix 4545 . oj - column indices 4546 - oa - matrix values 4547 4548 Output Parameter: 4549 . mat - the matrix 4550 4551 Level: advanced 4552 4553 Notes: 4554 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4555 4556 The i and j indices are 0 based 4557 4558 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4559 4560 4561 .keywords: matrix, aij, compressed row, sparse, parallel 4562 4563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4564 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4565 @*/ 4566 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4567 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4568 { 4569 PetscErrorCode ierr; 4570 Mat_MPIAIJ *maij; 4571 4572 PetscFunctionBegin; 4573 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4574 if (i[0]) { 4575 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4576 } 4577 if (oi[0]) { 4578 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4579 } 4580 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4581 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4582 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4583 maij = (Mat_MPIAIJ*) (*mat)->data; 4584 maij->donotstash = PETSC_TRUE; 4585 (*mat)->preallocated = PETSC_TRUE; 4586 4587 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4588 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4589 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4590 4591 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4592 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4593 4594 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4595 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 4599 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4600 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4601 PetscFunctionReturn(0); 4602 } 4603 4604 /* 4605 Special version for direct calls from Fortran 4606 */ 4607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4608 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4610 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4611 #endif 4612 4613 /* Change these macros so can be used in void function */ 4614 #undef CHKERRQ 4615 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4616 #undef SETERRQ2 4617 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4618 #undef SETERRQ 4619 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4620 4621 EXTERN_C_BEGIN 4622 #undef __FUNCT__ 4623 #define __FUNCT__ "matsetvaluesmpiaij_" 4624 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4625 { 4626 Mat mat = *mmat; 4627 PetscInt m = *mm, n = *mn; 4628 InsertMode addv = *maddv; 4629 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4630 PetscScalar value; 4631 PetscErrorCode ierr; 4632 4633 MatPreallocated(mat); 4634 if (mat->insertmode == NOT_SET_VALUES) { 4635 mat->insertmode = addv; 4636 } 4637 #if defined(PETSC_USE_DEBUG) 4638 else if (mat->insertmode != addv) { 4639 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4640 } 4641 #endif 4642 { 4643 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4644 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4645 PetscTruth roworiented = aij->roworiented; 4646 4647 /* Some Variables required in the macro */ 4648 Mat A = aij->A; 4649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4650 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4651 PetscScalar *aa = a->a; 4652 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4653 Mat B = aij->B; 4654 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4655 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4656 PetscScalar *ba = b->a; 4657 4658 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4659 PetscInt nonew = a->nonew; 4660 PetscScalar *ap1,*ap2; 4661 4662 PetscFunctionBegin; 4663 for (i=0; i<m; i++) { 4664 if (im[i] < 0) continue; 4665 #if defined(PETSC_USE_DEBUG) 4666 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4667 #endif 4668 if (im[i] >= rstart && im[i] < rend) { 4669 row = im[i] - rstart; 4670 lastcol1 = -1; 4671 rp1 = aj + ai[row]; 4672 ap1 = aa + ai[row]; 4673 rmax1 = aimax[row]; 4674 nrow1 = ailen[row]; 4675 low1 = 0; 4676 high1 = nrow1; 4677 lastcol2 = -1; 4678 rp2 = bj + bi[row]; 4679 ap2 = ba + bi[row]; 4680 rmax2 = bimax[row]; 4681 nrow2 = bilen[row]; 4682 low2 = 0; 4683 high2 = nrow2; 4684 4685 for (j=0; j<n; j++) { 4686 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4687 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4688 if (in[j] >= cstart && in[j] < cend){ 4689 col = in[j] - cstart; 4690 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4691 } else if (in[j] < 0) continue; 4692 #if defined(PETSC_USE_DEBUG) 4693 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4694 #endif 4695 else { 4696 if (mat->was_assembled) { 4697 if (!aij->colmap) { 4698 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4699 } 4700 #if defined (PETSC_USE_CTABLE) 4701 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4702 col--; 4703 #else 4704 col = aij->colmap[in[j]] - 1; 4705 #endif 4706 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4707 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4708 col = in[j]; 4709 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4710 B = aij->B; 4711 b = (Mat_SeqAIJ*)B->data; 4712 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4713 rp2 = bj + bi[row]; 4714 ap2 = ba + bi[row]; 4715 rmax2 = bimax[row]; 4716 nrow2 = bilen[row]; 4717 low2 = 0; 4718 high2 = nrow2; 4719 bm = aij->B->rmap.n; 4720 ba = b->a; 4721 } 4722 } else col = in[j]; 4723 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4724 } 4725 } 4726 } else { 4727 if (!aij->donotstash) { 4728 if (roworiented) { 4729 if (ignorezeroentries && v[i*n] == 0.0) continue; 4730 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4731 } else { 4732 if (ignorezeroentries && v[i] == 0.0) continue; 4733 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4734 } 4735 } 4736 } 4737 }} 4738 PetscFunctionReturnVoid(); 4739 } 4740 EXTERN_C_END 4741 4742