1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatRelax_MPIAIJ" 1015 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1016 { 1017 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1018 PetscErrorCode ierr; 1019 Vec bb1; 1020 1021 PetscFunctionBegin; 1022 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1023 1024 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1025 1026 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1027 if (flag & SOR_ZERO_INITIAL_GUESS) { 1028 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1029 its--; 1030 } 1031 1032 while (its--) { 1033 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 1036 /* update rhs: bb1 = bb - B*x */ 1037 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1038 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1039 1040 /* local sweep */ 1041 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1042 CHKERRQ(ierr); 1043 } 1044 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1047 its--; 1048 } 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else { 1079 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1080 } 1081 1082 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatPermute_MPIAIJ" 1088 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1089 { 1090 MPI_Comm comm,pcomm; 1091 PetscInt first,local_size,nrows,*rows; 1092 int ntids; 1093 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1098 /* make a collective version of 'rowp' */ 1099 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1100 if (pcomm==comm) { 1101 crowp = rowp; 1102 } else { 1103 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1104 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1106 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1107 } 1108 /* collect the global row permutation and invert it */ 1109 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1110 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1111 if (pcomm!=comm) { 1112 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1113 } 1114 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1115 /* get the local target indices */ 1116 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1117 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1120 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1122 /* the column permutation is so much easier; 1123 make a local version of 'colp' and invert it */ 1124 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1125 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1126 if (ntids==1) { 1127 lcolp = colp; 1128 } else { 1129 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1130 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1132 } 1133 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1134 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1135 if (ntids>1) { 1136 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1138 } 1139 /* now we just get the submatrix */ 1140 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1141 /* clean up */ 1142 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1143 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1149 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1150 { 1151 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1152 Mat A = mat->A,B = mat->B; 1153 PetscErrorCode ierr; 1154 PetscReal isend[5],irecv[5]; 1155 1156 PetscFunctionBegin; 1157 info->block_size = 1.0; 1158 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1159 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1160 isend[3] = info->memory; isend[4] = info->mallocs; 1161 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1162 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1163 isend[3] += info->memory; isend[4] += info->mallocs; 1164 if (flag == MAT_LOCAL) { 1165 info->nz_used = isend[0]; 1166 info->nz_allocated = isend[1]; 1167 info->nz_unneeded = isend[2]; 1168 info->memory = isend[3]; 1169 info->mallocs = isend[4]; 1170 } else if (flag == MAT_GLOBAL_MAX) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else if (flag == MAT_GLOBAL_SUM) { 1178 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1179 info->nz_used = irecv[0]; 1180 info->nz_allocated = irecv[1]; 1181 info->nz_unneeded = irecv[2]; 1182 info->memory = irecv[3]; 1183 info->mallocs = irecv[4]; 1184 } 1185 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1186 info->fill_ratio_needed = 0; 1187 info->factor_mallocs = 0; 1188 info->rows_global = (double)matin->rmap.N; 1189 info->columns_global = (double)matin->cmap.N; 1190 info->rows_local = (double)matin->rmap.n; 1191 info->columns_local = (double)matin->cmap.N; 1192 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatSetOption_MPIAIJ" 1198 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1199 { 1200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 switch (op) { 1205 case MAT_NO_NEW_NONZERO_LOCATIONS: 1206 case MAT_YES_NEW_NONZERO_LOCATIONS: 1207 case MAT_COLUMNS_UNSORTED: 1208 case MAT_COLUMNS_SORTED: 1209 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1210 case MAT_KEEP_ZEROED_ROWS: 1211 case MAT_NEW_NONZERO_LOCATION_ERR: 1212 case MAT_USE_INODES: 1213 case MAT_DO_NOT_USE_INODES: 1214 case MAT_IGNORE_ZERO_ENTRIES: 1215 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1216 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1217 break; 1218 case MAT_ROW_ORIENTED: 1219 a->roworiented = PETSC_TRUE; 1220 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1221 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1222 break; 1223 case MAT_ROWS_SORTED: 1224 case MAT_ROWS_UNSORTED: 1225 case MAT_YES_NEW_DIAGONALS: 1226 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1227 break; 1228 case MAT_COLUMN_ORIENTED: 1229 a->roworiented = PETSC_FALSE; 1230 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1231 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1232 break; 1233 case MAT_IGNORE_OFF_PROC_ENTRIES: 1234 a->donotstash = PETSC_TRUE; 1235 break; 1236 case MAT_NO_NEW_DIAGONALS: 1237 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1238 case MAT_SYMMETRIC: 1239 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1240 break; 1241 case MAT_STRUCTURALLY_SYMMETRIC: 1242 case MAT_HERMITIAN: 1243 case MAT_SYMMETRY_ETERNAL: 1244 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1245 break; 1246 case MAT_NOT_SYMMETRIC: 1247 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1248 case MAT_NOT_HERMITIAN: 1249 case MAT_NOT_SYMMETRY_ETERNAL: 1250 break; 1251 default: 1252 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIAIJ" 1259 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1265 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1266 PetscInt *cmap,*idx_p; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1277 PetscInt max = 1,tmp; 1278 for (i=0; i<matin->rmap.n; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1284 } 1285 1286 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1287 lrow = row - rstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]]; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1320 else break; 1321 } 1322 imark = i; 1323 } 1324 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1325 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1326 } 1327 } else { 1328 if (idx) *idx = 0; 1329 if (v) *v = 0; 1330 } 1331 } 1332 *nz = nztot; 1333 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1334 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1340 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1343 1344 PetscFunctionBegin; 1345 if (!aij->getrowactive) { 1346 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1347 } 1348 aij->getrowactive = PETSC_FALSE; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatNorm_MPIAIJ" 1354 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1355 { 1356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1357 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1358 PetscErrorCode ierr; 1359 PetscInt i,j,cstart = mat->cmap.rstart; 1360 PetscReal sum = 0.0; 1361 PetscScalar *v; 1362 1363 PetscFunctionBegin; 1364 if (aij->size == 1) { 1365 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1366 } else { 1367 if (type == NORM_FROBENIUS) { 1368 v = amat->a; 1369 for (i=0; i<amat->nz; i++) { 1370 #if defined(PETSC_USE_COMPLEX) 1371 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1372 #else 1373 sum += (*v)*(*v); v++; 1374 #endif 1375 } 1376 v = bmat->a; 1377 for (i=0; i<bmat->nz; i++) { 1378 #if defined(PETSC_USE_COMPLEX) 1379 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1380 #else 1381 sum += (*v)*(*v); v++; 1382 #endif 1383 } 1384 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1385 *norm = sqrt(*norm); 1386 } else if (type == NORM_1) { /* max column norm */ 1387 PetscReal *tmp,*tmp2; 1388 PetscInt *jj,*garray = aij->garray; 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1391 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1392 *norm = 0.0; 1393 v = amat->a; jj = amat->j; 1394 for (j=0; j<amat->nz; j++) { 1395 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1396 } 1397 v = bmat->a; jj = bmat->j; 1398 for (j=0; j<bmat->nz; j++) { 1399 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1400 } 1401 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1402 for (j=0; j<mat->cmap.N; j++) { 1403 if (tmp2[j] > *norm) *norm = tmp2[j]; 1404 } 1405 ierr = PetscFree(tmp);CHKERRQ(ierr); 1406 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1407 } else if (type == NORM_INFINITY) { /* max row norm */ 1408 PetscReal ntemp = 0.0; 1409 for (j=0; j<aij->A->rmap.n; j++) { 1410 v = amat->a + amat->i[j]; 1411 sum = 0.0; 1412 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1413 sum += PetscAbsScalar(*v); v++; 1414 } 1415 v = bmat->a + bmat->i[j]; 1416 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 if (sum > ntemp) ntemp = sum; 1420 } 1421 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1422 } else { 1423 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1424 } 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatTranspose_MPIAIJ" 1431 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1432 { 1433 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1434 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1435 PetscErrorCode ierr; 1436 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1437 Mat B; 1438 PetscScalar *array; 1439 1440 PetscFunctionBegin; 1441 if (!matout && M != N) { 1442 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1443 } 1444 1445 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1446 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1447 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1448 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1449 1450 /* copy over the A part */ 1451 Aloc = (Mat_SeqAIJ*)a->A->data; 1452 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1453 row = A->rmap.rstart; 1454 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1455 for (i=0; i<m; i++) { 1456 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1457 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1458 } 1459 aj = Aloc->j; 1460 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1461 1462 /* copy over the B part */ 1463 Aloc = (Mat_SeqAIJ*)a->B->data; 1464 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1465 row = A->rmap.rstart; 1466 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1467 ct = cols; 1468 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1469 for (i=0; i<m; i++) { 1470 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1471 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1472 } 1473 ierr = PetscFree(ct);CHKERRQ(ierr); 1474 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 if (matout) { 1477 *matout = B; 1478 } else { 1479 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1486 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1487 { 1488 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1489 Mat a = aij->A,b = aij->B; 1490 PetscErrorCode ierr; 1491 PetscInt s1,s2,s3; 1492 1493 PetscFunctionBegin; 1494 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1495 if (rr) { 1496 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1497 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1498 /* Overlap communication with computation. */ 1499 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1500 } 1501 if (ll) { 1502 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1503 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1504 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1505 } 1506 /* scale the diagonal block */ 1507 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1508 1509 if (rr) { 1510 /* Do a scatter end and then right scale the off-diagonal block */ 1511 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1512 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1513 } 1514 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1520 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1521 { 1522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1527 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1532 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1533 { 1534 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatEqual_MPIAIJ" 1544 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1545 { 1546 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1547 Mat a,b,c,d; 1548 PetscTruth flg; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 a = matA->A; b = matA->B; 1553 c = matB->A; d = matB->B; 1554 1555 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1556 if (flg) { 1557 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1558 } 1559 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatCopy_MPIAIJ" 1565 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1566 { 1567 PetscErrorCode ierr; 1568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1569 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1570 1571 PetscFunctionBegin; 1572 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1573 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1574 /* because of the column compression in the off-processor part of the matrix a->B, 1575 the number of columns in a->B and b->B may be different, hence we cannot call 1576 the MatCopy() directly on the two parts. If need be, we can provide a more 1577 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1578 then copying the submatrices */ 1579 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1580 } else { 1581 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1582 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1589 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1590 { 1591 PetscErrorCode ierr; 1592 1593 PetscFunctionBegin; 1594 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #include "petscblaslapack.h" 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatAXPY_MPIAIJ" 1601 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1602 { 1603 PetscErrorCode ierr; 1604 PetscInt i; 1605 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1606 PetscBLASInt bnz,one=1; 1607 Mat_SeqAIJ *x,*y; 1608 1609 PetscFunctionBegin; 1610 if (str == SAME_NONZERO_PATTERN) { 1611 PetscScalar alpha = a; 1612 x = (Mat_SeqAIJ *)xx->A->data; 1613 y = (Mat_SeqAIJ *)yy->A->data; 1614 bnz = (PetscBLASInt)x->nz; 1615 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1616 x = (Mat_SeqAIJ *)xx->B->data; 1617 y = (Mat_SeqAIJ *)yy->B->data; 1618 bnz = (PetscBLASInt)x->nz; 1619 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1620 } else if (str == SUBSET_NONZERO_PATTERN) { 1621 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1622 1623 x = (Mat_SeqAIJ *)xx->B->data; 1624 y = (Mat_SeqAIJ *)yy->B->data; 1625 if (y->xtoy && y->XtoY != xx->B) { 1626 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1627 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1628 } 1629 if (!y->xtoy) { /* get xtoy */ 1630 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1631 y->XtoY = xx->B; 1632 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1633 } 1634 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1635 } else { 1636 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatConjugate_MPIAIJ" 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1646 { 1647 #if defined(PETSC_USE_COMPLEX) 1648 PetscErrorCode ierr; 1649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1650 1651 PetscFunctionBegin; 1652 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1653 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1654 #else 1655 PetscFunctionBegin; 1656 #endif 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatRealPart_MPIAIJ" 1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 PetscErrorCode ierr; 1666 1667 PetscFunctionBegin; 1668 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1669 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1682 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #ifdef PETSC_HAVE_PBGL 1687 1688 #include <boost/parallel/mpi/bsp_process_group.hpp> 1689 #include <boost/graph/distributed/ilu_default_graph.hpp> 1690 #include <boost/graph/distributed/ilu_0_block.hpp> 1691 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1692 #include <boost/graph/distributed/petsc/interface.hpp> 1693 #include <boost/multi_array.hpp> 1694 #include <boost/parallel/distributed_property_map.hpp> 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1698 /* 1699 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1700 */ 1701 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1702 { 1703 namespace petsc = boost::distributed::petsc; 1704 1705 namespace graph_dist = boost::graph::distributed; 1706 using boost::graph::distributed::ilu_default::process_group_type; 1707 using boost::graph::ilu_permuted; 1708 1709 PetscTruth row_identity, col_identity; 1710 PetscContainer c; 1711 PetscInt m, n, M, N; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1716 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1717 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1718 if (!row_identity || !col_identity) { 1719 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1720 } 1721 1722 process_group_type pg; 1723 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1724 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1725 lgraph_type& level_graph = *lgraph_p; 1726 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1727 1728 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1729 ilu_permuted(level_graph); 1730 1731 /* put together the new matrix */ 1732 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1733 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1734 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1735 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1736 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1737 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1738 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 (*fact)->factor = FACTOR_LU; 1740 1741 ierr = PetscContainerCreate(A->comm, &c); 1742 ierr = PetscContainerSetPointer(c, lgraph_p); 1743 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1749 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1750 { 1751 PetscFunctionBegin; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSolve_MPIAIJ" 1757 /* 1758 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1759 */ 1760 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1761 { 1762 namespace graph_dist = boost::graph::distributed; 1763 1764 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1765 lgraph_type* lgraph_p; 1766 PetscContainer c; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1771 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1772 ierr = VecCopy(b, x); CHKERRQ(ierr); 1773 1774 PetscScalar* array_x; 1775 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1776 PetscInt sx; 1777 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1778 1779 PetscScalar* array_b; 1780 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1781 PetscInt sb; 1782 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1783 1784 lgraph_type& level_graph = *lgraph_p; 1785 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1786 1787 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1788 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1789 ref_x(array_x, boost::extents[num_vertices(graph)]); 1790 1791 typedef boost::iterator_property_map<array_ref_type::iterator, 1792 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1793 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1794 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1795 1796 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1797 1798 PetscFunctionReturn(0); 1799 } 1800 #endif 1801 1802 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1803 PetscInt nzlocal,nsends,nrecvs; 1804 PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j; 1805 PetscScalar *sbuf_a,**rbuf_a; 1806 PetscErrorCode (*MatDestroy)(Mat); 1807 } Mat_Redundant; 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1811 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1812 { 1813 PetscErrorCode ierr; 1814 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1815 PetscInt i; 1816 1817 PetscFunctionBegin; 1818 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1819 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1820 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1821 for (i=0; i<redund->nrecvs; i++){ 1822 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1823 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1824 } 1825 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1826 ierr = PetscFree(redund);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatDestroy_MatRedundant" 1832 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1833 { 1834 PetscErrorCode ierr; 1835 PetscContainer container; 1836 Mat_Redundant *redund=PETSC_NULL; 1837 1838 PetscFunctionBegin; 1839 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1840 if (container) { 1841 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1842 } else { 1843 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1844 } 1845 A->ops->destroy = redund->MatDestroy; 1846 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1847 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1848 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1854 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1855 { 1856 PetscMPIInt rank,size; 1857 MPI_Comm comm=mat->comm; 1858 PetscErrorCode ierr; 1859 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1860 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1861 PetscInt *rowrange=mat->rmap.range; 1862 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1863 Mat A=aij->A,B=aij->B,C=*matredundant; 1864 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1865 PetscScalar *sbuf_a; 1866 PetscInt nzlocal=a->nz+b->nz; 1867 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1868 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1869 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1870 PetscScalar *vals,*aworkA,*aworkB; 1871 PetscMPIInt tag1,tag2,tag3,imdex; 1872 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1873 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1874 MPI_Status recv_status,*send_status; 1875 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1876 PetscInt **rbuf_j=PETSC_NULL; 1877 PetscScalar **rbuf_a=PETSC_NULL; 1878 Mat_Redundant *redund=PETSC_NULL; 1879 PetscContainer container; 1880 1881 PetscFunctionBegin; 1882 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1884 1885 if (reuse == MAT_REUSE_MATRIX) { 1886 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1887 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1888 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1889 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1890 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1891 if (container) { 1892 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1893 } else { 1894 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1895 } 1896 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1897 1898 nsends = redund->nsends; 1899 nrecvs = redund->nrecvs; 1900 send_rank = redund->send_rank; recv_rank = send_rank + size; 1901 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1902 sbuf_j = redund->sbuf_j; 1903 sbuf_a = redund->sbuf_a; 1904 rbuf_j = redund->rbuf_j; 1905 rbuf_a = redund->rbuf_a; 1906 } 1907 1908 if (reuse == MAT_INITIAL_MATRIX){ 1909 PetscMPIInt subrank,subsize; 1910 PetscInt nleftover,np_subcomm; 1911 /* get the destination processors' id send_rank, nsends and nrecvs */ 1912 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1913 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1914 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1915 recv_rank = send_rank + size; 1916 np_subcomm = size/nsubcomm; 1917 nleftover = size - nsubcomm*np_subcomm; 1918 nsends = 0; nrecvs = 0; 1919 for (i=0; i<size; i++){ /* i=rank*/ 1920 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1921 send_rank[nsends] = i; nsends++; 1922 recv_rank[nrecvs++] = i; 1923 } 1924 } 1925 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1926 i = size-nleftover-1; 1927 j = 0; 1928 while (j < nsubcomm - nleftover){ 1929 send_rank[nsends++] = i; 1930 i--; j++; 1931 } 1932 } 1933 1934 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1935 for (i=0; i<nleftover; i++){ 1936 recv_rank[nrecvs++] = size-nleftover+i; 1937 } 1938 } 1939 1940 /* allocate sbuf_j, sbuf_a */ 1941 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1942 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1943 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1944 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1945 1946 /* copy mat's local entries into the buffers */ 1947 if (reuse == MAT_INITIAL_MATRIX){ 1948 rownz_max = 0; 1949 rptr = sbuf_j; 1950 cols = sbuf_j + rend-rstart + 1; 1951 vals = sbuf_a; 1952 rptr[0] = 0; 1953 for (i=0; i<rend-rstart; i++){ 1954 row = i + rstart; 1955 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1956 ncols = nzA + nzB; 1957 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1958 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1959 /* load the column indices for this row into cols */ 1960 lwrite = 0; 1961 for (l=0; l<nzB; l++) { 1962 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1963 vals[lwrite] = aworkB[l]; 1964 cols[lwrite++] = ctmp; 1965 } 1966 } 1967 for (l=0; l<nzA; l++){ 1968 vals[lwrite] = aworkA[l]; 1969 cols[lwrite++] = cstart + cworkA[l]; 1970 } 1971 for (l=0; l<nzB; l++) { 1972 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1973 vals[lwrite] = aworkB[l]; 1974 cols[lwrite++] = ctmp; 1975 } 1976 } 1977 vals += ncols; 1978 cols += ncols; 1979 rptr[i+1] = rptr[i] + ncols; 1980 if (rownz_max < ncols) rownz_max = ncols; 1981 } 1982 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1983 } else { /* only copy matrix values into sbuf_a */ 1984 rptr = sbuf_j; 1985 vals = sbuf_a; 1986 rptr[0] = 0; 1987 for (i=0; i<rend-rstart; i++){ 1988 row = i + rstart; 1989 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1990 ncols = nzA + nzB; 1991 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1992 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1993 lwrite = 0; 1994 for (l=0; l<nzB; l++) { 1995 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1996 } 1997 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1998 for (l=0; l<nzB; l++) { 1999 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2000 } 2001 vals += ncols; 2002 rptr[i+1] = rptr[i] + ncols; 2003 } 2004 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2005 2006 /* send nzlocal to others, and recv other's nzlocal */ 2007 /*--------------------------------------------------*/ 2008 if (reuse == MAT_INITIAL_MATRIX){ 2009 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2010 s_waits2 = s_waits3 + nsends; 2011 s_waits1 = s_waits2 + nsends; 2012 r_waits1 = s_waits1 + nsends; 2013 r_waits2 = r_waits1 + nrecvs; 2014 r_waits3 = r_waits2 + nrecvs; 2015 } else { 2016 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2017 r_waits3 = s_waits3 + nsends; 2018 } 2019 2020 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2021 if (reuse == MAT_INITIAL_MATRIX){ 2022 /* get new tags to keep the communication clean */ 2023 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2024 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2025 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2026 rbuf_nz = sbuf_nz + nsends; 2027 2028 /* post receives of other's nzlocal */ 2029 for (i=0; i<nrecvs; i++){ 2030 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2031 } 2032 /* send nzlocal to others */ 2033 for (i=0; i<nsends; i++){ 2034 sbuf_nz[i] = nzlocal; 2035 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2036 } 2037 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2038 count = nrecvs; 2039 while (count) { 2040 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2041 recv_rank[imdex] = recv_status.MPI_SOURCE; 2042 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2043 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2044 2045 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2046 rbuf_nz[imdex] += i + 2; 2047 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2048 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2049 count--; 2050 } 2051 /* wait on sends of nzlocal */ 2052 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2053 /* send mat->i,j to others, and recv from other's */ 2054 /*------------------------------------------------*/ 2055 for (i=0; i<nsends; i++){ 2056 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2057 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2058 } 2059 /* wait on receives of mat->i,j */ 2060 /*------------------------------*/ 2061 count = nrecvs; 2062 while (count) { 2063 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2064 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2065 count--; 2066 } 2067 /* wait on sends of mat->i,j */ 2068 /*---------------------------*/ 2069 if (nsends) { 2070 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2071 } 2072 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2073 2074 /* post receives, send and receive mat->a */ 2075 /*----------------------------------------*/ 2076 for (imdex=0; imdex<nrecvs; imdex++) { 2077 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2078 } 2079 for (i=0; i<nsends; i++){ 2080 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2081 } 2082 count = nrecvs; 2083 while (count) { 2084 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2085 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2086 count--; 2087 } 2088 if (nsends) { 2089 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2090 } 2091 2092 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2093 2094 /* create redundant matrix */ 2095 /*-------------------------*/ 2096 if (reuse == MAT_INITIAL_MATRIX){ 2097 /* compute rownz_max for preallocation */ 2098 for (imdex=0; imdex<nrecvs; imdex++){ 2099 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2100 rptr = rbuf_j[imdex]; 2101 for (i=0; i<j; i++){ 2102 ncols = rptr[i+1] - rptr[i]; 2103 if (rownz_max < ncols) rownz_max = ncols; 2104 } 2105 } 2106 2107 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2108 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2109 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2110 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2111 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2112 } else { 2113 C = *matredundant; 2114 } 2115 2116 /* insert local matrix entries */ 2117 rptr = sbuf_j; 2118 cols = sbuf_j + rend-rstart + 1; 2119 vals = sbuf_a; 2120 for (i=0; i<rend-rstart; i++){ 2121 row = i + rstart; 2122 ncols = rptr[i+1] - rptr[i]; 2123 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2124 vals += ncols; 2125 cols += ncols; 2126 } 2127 /* insert received matrix entries */ 2128 for (imdex=0; imdex<nrecvs; imdex++){ 2129 rstart = rowrange[recv_rank[imdex]]; 2130 rend = rowrange[recv_rank[imdex]+1]; 2131 rptr = rbuf_j[imdex]; 2132 cols = rbuf_j[imdex] + rend-rstart + 1; 2133 vals = rbuf_a[imdex]; 2134 for (i=0; i<rend-rstart; i++){ 2135 row = i + rstart; 2136 ncols = rptr[i+1] - rptr[i]; 2137 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2138 vals += ncols; 2139 cols += ncols; 2140 } 2141 } 2142 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2145 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2146 if (reuse == MAT_INITIAL_MATRIX){ 2147 PetscContainer container; 2148 *matredundant = C; 2149 /* create a supporting struct and attach it to C for reuse */ 2150 ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr); 2151 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2152 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2153 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2154 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2155 2156 redund->nzlocal = nzlocal; 2157 redund->nsends = nsends; 2158 redund->nrecvs = nrecvs; 2159 redund->send_rank = send_rank; 2160 redund->sbuf_nz = sbuf_nz; 2161 redund->sbuf_j = sbuf_j; 2162 redund->sbuf_a = sbuf_a; 2163 redund->rbuf_j = rbuf_j; 2164 redund->rbuf_a = rbuf_a; 2165 2166 redund->MatDestroy = C->ops->destroy; 2167 C->ops->destroy = MatDestroy_MatRedundant; 2168 } 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /* -------------------------------------------------------------------*/ 2173 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2174 MatGetRow_MPIAIJ, 2175 MatRestoreRow_MPIAIJ, 2176 MatMult_MPIAIJ, 2177 /* 4*/ MatMultAdd_MPIAIJ, 2178 MatMultTranspose_MPIAIJ, 2179 MatMultTransposeAdd_MPIAIJ, 2180 #ifdef PETSC_HAVE_PBGL 2181 MatSolve_MPIAIJ, 2182 #else 2183 0, 2184 #endif 2185 0, 2186 0, 2187 /*10*/ 0, 2188 0, 2189 0, 2190 MatRelax_MPIAIJ, 2191 MatTranspose_MPIAIJ, 2192 /*15*/ MatGetInfo_MPIAIJ, 2193 MatEqual_MPIAIJ, 2194 MatGetDiagonal_MPIAIJ, 2195 MatDiagonalScale_MPIAIJ, 2196 MatNorm_MPIAIJ, 2197 /*20*/ MatAssemblyBegin_MPIAIJ, 2198 MatAssemblyEnd_MPIAIJ, 2199 0, 2200 MatSetOption_MPIAIJ, 2201 MatZeroEntries_MPIAIJ, 2202 /*25*/ MatZeroRows_MPIAIJ, 2203 0, 2204 #ifdef PETSC_HAVE_PBGL 2205 MatLUFactorNumeric_MPIAIJ, 2206 #else 2207 0, 2208 #endif 2209 0, 2210 0, 2211 /*30*/ MatSetUpPreallocation_MPIAIJ, 2212 #ifdef PETSC_HAVE_PBGL 2213 MatILUFactorSymbolic_MPIAIJ, 2214 #else 2215 0, 2216 #endif 2217 0, 2218 0, 2219 0, 2220 /*35*/ MatDuplicate_MPIAIJ, 2221 0, 2222 0, 2223 0, 2224 0, 2225 /*40*/ MatAXPY_MPIAIJ, 2226 MatGetSubMatrices_MPIAIJ, 2227 MatIncreaseOverlap_MPIAIJ, 2228 MatGetValues_MPIAIJ, 2229 MatCopy_MPIAIJ, 2230 /*45*/ 0, 2231 MatScale_MPIAIJ, 2232 0, 2233 0, 2234 0, 2235 /*50*/ MatSetBlockSize_MPIAIJ, 2236 0, 2237 0, 2238 0, 2239 0, 2240 /*55*/ MatFDColoringCreate_MPIAIJ, 2241 0, 2242 MatSetUnfactored_MPIAIJ, 2243 MatPermute_MPIAIJ, 2244 0, 2245 /*60*/ MatGetSubMatrix_MPIAIJ, 2246 MatDestroy_MPIAIJ, 2247 MatView_MPIAIJ, 2248 0, 2249 0, 2250 /*65*/ 0, 2251 0, 2252 0, 2253 0, 2254 0, 2255 /*70*/ 0, 2256 0, 2257 MatSetColoring_MPIAIJ, 2258 #if defined(PETSC_HAVE_ADIC) 2259 MatSetValuesAdic_MPIAIJ, 2260 #else 2261 0, 2262 #endif 2263 MatSetValuesAdifor_MPIAIJ, 2264 /*75*/ 0, 2265 0, 2266 0, 2267 0, 2268 0, 2269 /*80*/ 0, 2270 0, 2271 0, 2272 0, 2273 /*84*/ MatLoad_MPIAIJ, 2274 0, 2275 0, 2276 0, 2277 0, 2278 0, 2279 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2280 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2281 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2282 MatPtAP_Basic, 2283 MatPtAPSymbolic_MPIAIJ, 2284 /*95*/ MatPtAPNumeric_MPIAIJ, 2285 0, 2286 0, 2287 0, 2288 0, 2289 /*100*/0, 2290 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2291 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2292 MatConjugate_MPIAIJ, 2293 0, 2294 /*105*/MatSetValuesRow_MPIAIJ, 2295 MatRealPart_MPIAIJ, 2296 MatImaginaryPart_MPIAIJ, 2297 0, 2298 0, 2299 /*110*/0, 2300 MatGetRedundantMatrix_MPIAIJ}; 2301 2302 /* ----------------------------------------------------------------------------------------*/ 2303 2304 EXTERN_C_BEGIN 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2307 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2308 { 2309 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2314 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 EXTERN_C_END 2318 2319 EXTERN_C_BEGIN 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2322 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2323 { 2324 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2325 PetscErrorCode ierr; 2326 2327 PetscFunctionBegin; 2328 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2329 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2330 PetscFunctionReturn(0); 2331 } 2332 EXTERN_C_END 2333 2334 #include "petscpc.h" 2335 EXTERN_C_BEGIN 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2338 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2339 { 2340 Mat_MPIAIJ *b; 2341 PetscErrorCode ierr; 2342 PetscInt i; 2343 2344 PetscFunctionBegin; 2345 B->preallocated = PETSC_TRUE; 2346 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2347 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2348 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2349 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2350 2351 B->rmap.bs = B->cmap.bs = 1; 2352 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2353 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2354 if (d_nnz) { 2355 for (i=0; i<B->rmap.n; i++) { 2356 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2357 } 2358 } 2359 if (o_nnz) { 2360 for (i=0; i<B->rmap.n; i++) { 2361 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2362 } 2363 } 2364 b = (Mat_MPIAIJ*)B->data; 2365 2366 /* Explicitly create 2 MATSEQAIJ matrices. */ 2367 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2368 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2369 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2370 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2371 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2372 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2373 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2374 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2375 2376 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2377 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2378 2379 PetscFunctionReturn(0); 2380 } 2381 EXTERN_C_END 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2385 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2386 { 2387 Mat mat; 2388 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 *newmat = 0; 2393 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2394 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2395 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2397 a = (Mat_MPIAIJ*)mat->data; 2398 2399 mat->factor = matin->factor; 2400 mat->rmap.bs = matin->rmap.bs; 2401 mat->assembled = PETSC_TRUE; 2402 mat->insertmode = NOT_SET_VALUES; 2403 mat->preallocated = PETSC_TRUE; 2404 2405 a->size = oldmat->size; 2406 a->rank = oldmat->rank; 2407 a->donotstash = oldmat->donotstash; 2408 a->roworiented = oldmat->roworiented; 2409 a->rowindices = 0; 2410 a->rowvalues = 0; 2411 a->getrowactive = PETSC_FALSE; 2412 2413 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2414 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2415 2416 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2417 if (oldmat->colmap) { 2418 #if defined (PETSC_USE_CTABLE) 2419 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2420 #else 2421 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2422 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2423 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2424 #endif 2425 } else a->colmap = 0; 2426 if (oldmat->garray) { 2427 PetscInt len; 2428 len = oldmat->B->cmap.n; 2429 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2430 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2431 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2432 } else a->garray = 0; 2433 2434 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2435 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2436 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2437 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2438 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2439 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2440 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2441 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2442 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2443 *newmat = mat; 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #include "petscsys.h" 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatLoad_MPIAIJ" 2451 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2452 { 2453 Mat A; 2454 PetscScalar *vals,*svals; 2455 MPI_Comm comm = ((PetscObject)viewer)->comm; 2456 MPI_Status status; 2457 PetscErrorCode ierr; 2458 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2459 PetscInt i,nz,j,rstart,rend,mmax; 2460 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2461 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2462 PetscInt cend,cstart,n,*rowners; 2463 int fd; 2464 2465 PetscFunctionBegin; 2466 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2467 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2468 if (!rank) { 2469 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2470 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2471 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2472 } 2473 2474 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2475 M = header[1]; N = header[2]; 2476 /* determine ownership of all rows */ 2477 m = M/size + ((M % size) > rank); 2478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2479 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2480 2481 /* First process needs enough room for process with most rows */ 2482 if (!rank) { 2483 mmax = rowners[1]; 2484 for (i=2; i<size; i++) { 2485 mmax = PetscMax(mmax,rowners[i]); 2486 } 2487 } else mmax = m; 2488 2489 rowners[0] = 0; 2490 for (i=2; i<=size; i++) { 2491 rowners[i] += rowners[i-1]; 2492 } 2493 rstart = rowners[rank]; 2494 rend = rowners[rank+1]; 2495 2496 /* distribute row lengths to all processors */ 2497 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2498 if (!rank) { 2499 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2500 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2501 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2502 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2503 for (j=0; j<m; j++) { 2504 procsnz[0] += ourlens[j]; 2505 } 2506 for (i=1; i<size; i++) { 2507 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2508 /* calculate the number of nonzeros on each processor */ 2509 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2510 procsnz[i] += rowlengths[j]; 2511 } 2512 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2513 } 2514 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2515 } else { 2516 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2517 } 2518 2519 if (!rank) { 2520 /* determine max buffer needed and allocate it */ 2521 maxnz = 0; 2522 for (i=0; i<size; i++) { 2523 maxnz = PetscMax(maxnz,procsnz[i]); 2524 } 2525 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2526 2527 /* read in my part of the matrix column indices */ 2528 nz = procsnz[0]; 2529 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2530 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2531 2532 /* read in every one elses and ship off */ 2533 for (i=1; i<size; i++) { 2534 nz = procsnz[i]; 2535 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2536 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2537 } 2538 ierr = PetscFree(cols);CHKERRQ(ierr); 2539 } else { 2540 /* determine buffer space needed for message */ 2541 nz = 0; 2542 for (i=0; i<m; i++) { 2543 nz += ourlens[i]; 2544 } 2545 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2546 2547 /* receive message of column indices*/ 2548 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2549 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2550 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2551 } 2552 2553 /* determine column ownership if matrix is not square */ 2554 if (N != M) { 2555 n = N/size + ((N % size) > rank); 2556 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2557 cstart = cend - n; 2558 } else { 2559 cstart = rstart; 2560 cend = rend; 2561 n = cend - cstart; 2562 } 2563 2564 /* loop over local rows, determining number of off diagonal entries */ 2565 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2566 jj = 0; 2567 for (i=0; i<m; i++) { 2568 for (j=0; j<ourlens[i]; j++) { 2569 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2570 jj++; 2571 } 2572 } 2573 2574 /* create our matrix */ 2575 for (i=0; i<m; i++) { 2576 ourlens[i] -= offlens[i]; 2577 } 2578 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2579 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2580 ierr = MatSetType(A,type);CHKERRQ(ierr); 2581 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2582 2583 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2584 for (i=0; i<m; i++) { 2585 ourlens[i] += offlens[i]; 2586 } 2587 2588 if (!rank) { 2589 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2590 2591 /* read in my part of the matrix numerical values */ 2592 nz = procsnz[0]; 2593 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2594 2595 /* insert into matrix */ 2596 jj = rstart; 2597 smycols = mycols; 2598 svals = vals; 2599 for (i=0; i<m; i++) { 2600 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2601 smycols += ourlens[i]; 2602 svals += ourlens[i]; 2603 jj++; 2604 } 2605 2606 /* read in other processors and ship out */ 2607 for (i=1; i<size; i++) { 2608 nz = procsnz[i]; 2609 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2610 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2611 } 2612 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2613 } else { 2614 /* receive numeric values */ 2615 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2616 2617 /* receive message of values*/ 2618 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2619 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2620 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2621 2622 /* insert into matrix */ 2623 jj = rstart; 2624 smycols = mycols; 2625 svals = vals; 2626 for (i=0; i<m; i++) { 2627 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2628 smycols += ourlens[i]; 2629 svals += ourlens[i]; 2630 jj++; 2631 } 2632 } 2633 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2634 ierr = PetscFree(vals);CHKERRQ(ierr); 2635 ierr = PetscFree(mycols);CHKERRQ(ierr); 2636 ierr = PetscFree(rowners);CHKERRQ(ierr); 2637 2638 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2639 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2640 *newmat = A; 2641 PetscFunctionReturn(0); 2642 } 2643 2644 #undef __FUNCT__ 2645 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2646 /* 2647 Not great since it makes two copies of the submatrix, first an SeqAIJ 2648 in local and then by concatenating the local matrices the end result. 2649 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2650 */ 2651 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2652 { 2653 PetscErrorCode ierr; 2654 PetscMPIInt rank,size; 2655 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2656 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2657 Mat *local,M,Mreuse; 2658 PetscScalar *vwork,*aa; 2659 MPI_Comm comm = mat->comm; 2660 Mat_SeqAIJ *aij; 2661 2662 2663 PetscFunctionBegin; 2664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2666 2667 if (call == MAT_REUSE_MATRIX) { 2668 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2669 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2670 local = &Mreuse; 2671 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2672 } else { 2673 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2674 Mreuse = *local; 2675 ierr = PetscFree(local);CHKERRQ(ierr); 2676 } 2677 2678 /* 2679 m - number of local rows 2680 n - number of columns (same on all processors) 2681 rstart - first row in new global matrix generated 2682 */ 2683 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2684 if (call == MAT_INITIAL_MATRIX) { 2685 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2686 ii = aij->i; 2687 jj = aij->j; 2688 2689 /* 2690 Determine the number of non-zeros in the diagonal and off-diagonal 2691 portions of the matrix in order to do correct preallocation 2692 */ 2693 2694 /* first get start and end of "diagonal" columns */ 2695 if (csize == PETSC_DECIDE) { 2696 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2697 if (mglobal == n) { /* square matrix */ 2698 nlocal = m; 2699 } else { 2700 nlocal = n/size + ((n % size) > rank); 2701 } 2702 } else { 2703 nlocal = csize; 2704 } 2705 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2706 rstart = rend - nlocal; 2707 if (rank == size - 1 && rend != n) { 2708 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2709 } 2710 2711 /* next, compute all the lengths */ 2712 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2713 olens = dlens + m; 2714 for (i=0; i<m; i++) { 2715 jend = ii[i+1] - ii[i]; 2716 olen = 0; 2717 dlen = 0; 2718 for (j=0; j<jend; j++) { 2719 if (*jj < rstart || *jj >= rend) olen++; 2720 else dlen++; 2721 jj++; 2722 } 2723 olens[i] = olen; 2724 dlens[i] = dlen; 2725 } 2726 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2727 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2728 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2729 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2730 ierr = PetscFree(dlens);CHKERRQ(ierr); 2731 } else { 2732 PetscInt ml,nl; 2733 2734 M = *newmat; 2735 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2736 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2737 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2738 /* 2739 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2740 rather than the slower MatSetValues(). 2741 */ 2742 M->was_assembled = PETSC_TRUE; 2743 M->assembled = PETSC_FALSE; 2744 } 2745 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2746 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2747 ii = aij->i; 2748 jj = aij->j; 2749 aa = aij->a; 2750 for (i=0; i<m; i++) { 2751 row = rstart + i; 2752 nz = ii[i+1] - ii[i]; 2753 cwork = jj; jj += nz; 2754 vwork = aa; aa += nz; 2755 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2756 } 2757 2758 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 *newmat = M; 2761 2762 /* save submatrix used in processor for next request */ 2763 if (call == MAT_INITIAL_MATRIX) { 2764 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2765 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2766 } 2767 2768 PetscFunctionReturn(0); 2769 } 2770 2771 EXTERN_C_BEGIN 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2774 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2775 { 2776 PetscInt m,cstart, cend,j,nnz,i,d; 2777 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2778 const PetscInt *JJ; 2779 PetscScalar *values; 2780 PetscErrorCode ierr; 2781 2782 PetscFunctionBegin; 2783 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2784 2785 B->rmap.bs = B->cmap.bs = 1; 2786 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2787 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2788 m = B->rmap.n; 2789 cstart = B->cmap.rstart; 2790 cend = B->cmap.rend; 2791 rstart = B->rmap.rstart; 2792 2793 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2794 o_nnz = d_nnz + m; 2795 2796 for (i=0; i<m; i++) { 2797 nnz = Ii[i+1]- Ii[i]; 2798 JJ = J + Ii[i]; 2799 nnz_max = PetscMax(nnz_max,nnz); 2800 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2801 for (j=0; j<nnz; j++) { 2802 if (*JJ >= cstart) break; 2803 JJ++; 2804 } 2805 d = 0; 2806 for (; j<nnz; j++) { 2807 if (*JJ++ >= cend) break; 2808 d++; 2809 } 2810 d_nnz[i] = d; 2811 o_nnz[i] = nnz - d; 2812 } 2813 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2814 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2815 2816 if (v) values = (PetscScalar*)v; 2817 else { 2818 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2819 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2820 } 2821 2822 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2823 for (i=0; i<m; i++) { 2824 ii = i + rstart; 2825 nnz = Ii[i+1]- Ii[i]; 2826 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2827 } 2828 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2829 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2830 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2831 2832 if (!v) { 2833 ierr = PetscFree(values);CHKERRQ(ierr); 2834 } 2835 PetscFunctionReturn(0); 2836 } 2837 EXTERN_C_END 2838 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2841 /*@ 2842 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2843 (the default parallel PETSc format). 2844 2845 Collective on MPI_Comm 2846 2847 Input Parameters: 2848 + B - the matrix 2849 . i - the indices into j for the start of each local row (starts with zero) 2850 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2851 - v - optional values in the matrix 2852 2853 Level: developer 2854 2855 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2856 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2857 2858 .keywords: matrix, aij, compressed row, sparse, parallel 2859 2860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2861 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2862 @*/ 2863 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2864 { 2865 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2866 2867 PetscFunctionBegin; 2868 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2869 if (f) { 2870 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2871 } 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2877 /*@C 2878 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2879 (the default parallel PETSc format). For good matrix assembly performance 2880 the user should preallocate the matrix storage by setting the parameters 2881 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2882 performance can be increased by more than a factor of 50. 2883 2884 Collective on MPI_Comm 2885 2886 Input Parameters: 2887 + A - the matrix 2888 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2889 (same value is used for all local rows) 2890 . d_nnz - array containing the number of nonzeros in the various rows of the 2891 DIAGONAL portion of the local submatrix (possibly different for each row) 2892 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2893 The size of this array is equal to the number of local rows, i.e 'm'. 2894 You must leave room for the diagonal entry even if it is zero. 2895 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2896 submatrix (same value is used for all local rows). 2897 - o_nnz - array containing the number of nonzeros in the various rows of the 2898 OFF-DIAGONAL portion of the local submatrix (possibly different for 2899 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2900 structure. The size of this array is equal to the number 2901 of local rows, i.e 'm'. 2902 2903 If the *_nnz parameter is given then the *_nz parameter is ignored 2904 2905 The AIJ format (also called the Yale sparse matrix format or 2906 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2907 storage. The stored row and column indices begin with zero. See the users manual for details. 2908 2909 The parallel matrix is partitioned such that the first m0 rows belong to 2910 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2911 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2912 2913 The DIAGONAL portion of the local submatrix of a processor can be defined 2914 as the submatrix which is obtained by extraction the part corresponding 2915 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2916 first row that belongs to the processor, and r2 is the last row belonging 2917 to the this processor. This is a square mxm matrix. The remaining portion 2918 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2919 2920 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2921 2922 Example usage: 2923 2924 Consider the following 8x8 matrix with 34 non-zero values, that is 2925 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2926 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2927 as follows: 2928 2929 .vb 2930 1 2 0 | 0 3 0 | 0 4 2931 Proc0 0 5 6 | 7 0 0 | 8 0 2932 9 0 10 | 11 0 0 | 12 0 2933 ------------------------------------- 2934 13 0 14 | 15 16 17 | 0 0 2935 Proc1 0 18 0 | 19 20 21 | 0 0 2936 0 0 0 | 22 23 0 | 24 0 2937 ------------------------------------- 2938 Proc2 25 26 27 | 0 0 28 | 29 0 2939 30 0 0 | 31 32 33 | 0 34 2940 .ve 2941 2942 This can be represented as a collection of submatrices as: 2943 2944 .vb 2945 A B C 2946 D E F 2947 G H I 2948 .ve 2949 2950 Where the submatrices A,B,C are owned by proc0, D,E,F are 2951 owned by proc1, G,H,I are owned by proc2. 2952 2953 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2954 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2955 The 'M','N' parameters are 8,8, and have the same values on all procs. 2956 2957 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2958 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2959 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2960 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2961 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2962 matrix, ans [DF] as another SeqAIJ matrix. 2963 2964 When d_nz, o_nz parameters are specified, d_nz storage elements are 2965 allocated for every row of the local diagonal submatrix, and o_nz 2966 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2967 One way to choose d_nz and o_nz is to use the max nonzerors per local 2968 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2969 In this case, the values of d_nz,o_nz are: 2970 .vb 2971 proc0 : dnz = 2, o_nz = 2 2972 proc1 : dnz = 3, o_nz = 2 2973 proc2 : dnz = 1, o_nz = 4 2974 .ve 2975 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2976 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2977 for proc3. i.e we are using 12+15+10=37 storage locations to store 2978 34 values. 2979 2980 When d_nnz, o_nnz parameters are specified, the storage is specified 2981 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2982 In the above case the values for d_nnz,o_nnz are: 2983 .vb 2984 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2985 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2986 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2987 .ve 2988 Here the space allocated is sum of all the above values i.e 34, and 2989 hence pre-allocation is perfect. 2990 2991 Level: intermediate 2992 2993 .keywords: matrix, aij, compressed row, sparse, parallel 2994 2995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2996 MPIAIJ 2997 @*/ 2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2999 { 3000 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3001 3002 PetscFunctionBegin; 3003 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3004 if (f) { 3005 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3006 } 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3012 /*@C 3013 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3014 CSR format the local rows. 3015 3016 Collective on MPI_Comm 3017 3018 Input Parameters: 3019 + comm - MPI communicator 3020 . m - number of local rows (Cannot be PETSC_DECIDE) 3021 . n - This value should be the same as the local size used in creating the 3022 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3023 calculated if N is given) For square matrices n is almost always m. 3024 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3025 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3026 . i - row indices 3027 . j - column indices 3028 - a - matrix values 3029 3030 Output Parameter: 3031 . mat - the matrix 3032 3033 Level: intermediate 3034 3035 Notes: 3036 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3037 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3038 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3039 3040 The i and j indices are 0 based 3041 3042 .keywords: matrix, aij, compressed row, sparse, parallel 3043 3044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3045 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3046 @*/ 3047 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3048 { 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 if (i[0]) { 3053 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3054 } 3055 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3056 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3057 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3058 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatCreateMPIAIJ" 3064 /*@C 3065 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3066 (the default parallel PETSc format). For good matrix assembly performance 3067 the user should preallocate the matrix storage by setting the parameters 3068 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3069 performance can be increased by more than a factor of 50. 3070 3071 Collective on MPI_Comm 3072 3073 Input Parameters: 3074 + comm - MPI communicator 3075 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3076 This value should be the same as the local size used in creating the 3077 y vector for the matrix-vector product y = Ax. 3078 . n - This value should be the same as the local size used in creating the 3079 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3080 calculated if N is given) For square matrices n is almost always m. 3081 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3082 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3083 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3084 (same value is used for all local rows) 3085 . d_nnz - array containing the number of nonzeros in the various rows of the 3086 DIAGONAL portion of the local submatrix (possibly different for each row) 3087 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3088 The size of this array is equal to the number of local rows, i.e 'm'. 3089 You must leave room for the diagonal entry even if it is zero. 3090 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3091 submatrix (same value is used for all local rows). 3092 - o_nnz - array containing the number of nonzeros in the various rows of the 3093 OFF-DIAGONAL portion of the local submatrix (possibly different for 3094 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3095 structure. The size of this array is equal to the number 3096 of local rows, i.e 'm'. 3097 3098 Output Parameter: 3099 . A - the matrix 3100 3101 Notes: 3102 If the *_nnz parameter is given then the *_nz parameter is ignored 3103 3104 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3105 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3106 storage requirements for this matrix. 3107 3108 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3109 processor than it must be used on all processors that share the object for 3110 that argument. 3111 3112 The user MUST specify either the local or global matrix dimensions 3113 (possibly both). 3114 3115 The parallel matrix is partitioned such that the first m0 rows belong to 3116 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3117 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3118 3119 The DIAGONAL portion of the local submatrix of a processor can be defined 3120 as the submatrix which is obtained by extraction the part corresponding 3121 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3122 first row that belongs to the processor, and r2 is the last row belonging 3123 to the this processor. This is a square mxm matrix. The remaining portion 3124 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3125 3126 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3127 3128 When calling this routine with a single process communicator, a matrix of 3129 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3130 type of communicator, use the construction mechanism: 3131 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3132 3133 By default, this format uses inodes (identical nodes) when possible. 3134 We search for consecutive rows with the same nonzero structure, thereby 3135 reusing matrix information to achieve increased efficiency. 3136 3137 Options Database Keys: 3138 + -mat_no_inode - Do not use inodes 3139 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3140 - -mat_aij_oneindex - Internally use indexing starting at 1 3141 rather than 0. Note that when calling MatSetValues(), 3142 the user still MUST index entries starting at 0! 3143 3144 3145 Example usage: 3146 3147 Consider the following 8x8 matrix with 34 non-zero values, that is 3148 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3149 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3150 as follows: 3151 3152 .vb 3153 1 2 0 | 0 3 0 | 0 4 3154 Proc0 0 5 6 | 7 0 0 | 8 0 3155 9 0 10 | 11 0 0 | 12 0 3156 ------------------------------------- 3157 13 0 14 | 15 16 17 | 0 0 3158 Proc1 0 18 0 | 19 20 21 | 0 0 3159 0 0 0 | 22 23 0 | 24 0 3160 ------------------------------------- 3161 Proc2 25 26 27 | 0 0 28 | 29 0 3162 30 0 0 | 31 32 33 | 0 34 3163 .ve 3164 3165 This can be represented as a collection of submatrices as: 3166 3167 .vb 3168 A B C 3169 D E F 3170 G H I 3171 .ve 3172 3173 Where the submatrices A,B,C are owned by proc0, D,E,F are 3174 owned by proc1, G,H,I are owned by proc2. 3175 3176 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3177 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3178 The 'M','N' parameters are 8,8, and have the same values on all procs. 3179 3180 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3181 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3182 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3183 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3184 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3185 matrix, ans [DF] as another SeqAIJ matrix. 3186 3187 When d_nz, o_nz parameters are specified, d_nz storage elements are 3188 allocated for every row of the local diagonal submatrix, and o_nz 3189 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3190 One way to choose d_nz and o_nz is to use the max nonzerors per local 3191 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3192 In this case, the values of d_nz,o_nz are: 3193 .vb 3194 proc0 : dnz = 2, o_nz = 2 3195 proc1 : dnz = 3, o_nz = 2 3196 proc2 : dnz = 1, o_nz = 4 3197 .ve 3198 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3199 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3200 for proc3. i.e we are using 12+15+10=37 storage locations to store 3201 34 values. 3202 3203 When d_nnz, o_nnz parameters are specified, the storage is specified 3204 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3205 In the above case the values for d_nnz,o_nnz are: 3206 .vb 3207 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3208 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3209 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3210 .ve 3211 Here the space allocated is sum of all the above values i.e 34, and 3212 hence pre-allocation is perfect. 3213 3214 Level: intermediate 3215 3216 .keywords: matrix, aij, compressed row, sparse, parallel 3217 3218 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3219 MPIAIJ, MatCreateMPIAIJWithArrays() 3220 @*/ 3221 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3222 { 3223 PetscErrorCode ierr; 3224 PetscMPIInt size; 3225 3226 PetscFunctionBegin; 3227 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3228 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3229 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3230 if (size > 1) { 3231 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3232 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3233 } else { 3234 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3235 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3243 { 3244 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3245 3246 PetscFunctionBegin; 3247 *Ad = a->A; 3248 *Ao = a->B; 3249 *colmap = a->garray; 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3255 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3256 { 3257 PetscErrorCode ierr; 3258 PetscInt i; 3259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3260 3261 PetscFunctionBegin; 3262 if (coloring->ctype == IS_COLORING_GLOBAL) { 3263 ISColoringValue *allcolors,*colors; 3264 ISColoring ocoloring; 3265 3266 /* set coloring for diagonal portion */ 3267 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3268 3269 /* set coloring for off-diagonal portion */ 3270 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3271 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3272 for (i=0; i<a->B->cmap.n; i++) { 3273 colors[i] = allcolors[a->garray[i]]; 3274 } 3275 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3276 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3277 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3278 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3279 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3280 ISColoringValue *colors; 3281 PetscInt *larray; 3282 ISColoring ocoloring; 3283 3284 /* set coloring for diagonal portion */ 3285 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3286 for (i=0; i<a->A->cmap.n; i++) { 3287 larray[i] = i + A->cmap.rstart; 3288 } 3289 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3290 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3291 for (i=0; i<a->A->cmap.n; i++) { 3292 colors[i] = coloring->colors[larray[i]]; 3293 } 3294 ierr = PetscFree(larray);CHKERRQ(ierr); 3295 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3296 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3297 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3298 3299 /* set coloring for off-diagonal portion */ 3300 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3301 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3302 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3303 for (i=0; i<a->B->cmap.n; i++) { 3304 colors[i] = coloring->colors[larray[i]]; 3305 } 3306 ierr = PetscFree(larray);CHKERRQ(ierr); 3307 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3308 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3309 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3310 } else { 3311 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3312 } 3313 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #if defined(PETSC_HAVE_ADIC) 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3320 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3321 { 3322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3327 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 #endif 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3334 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3335 { 3336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3337 PetscErrorCode ierr; 3338 3339 PetscFunctionBegin; 3340 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3341 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 #undef __FUNCT__ 3346 #define __FUNCT__ "MatMerge" 3347 /*@C 3348 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3349 matrices from each processor 3350 3351 Collective on MPI_Comm 3352 3353 Input Parameters: 3354 + comm - the communicators the parallel matrix will live on 3355 . inmat - the input sequential matrices 3356 . n - number of local columns (or PETSC_DECIDE) 3357 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3358 3359 Output Parameter: 3360 . outmat - the parallel matrix generated 3361 3362 Level: advanced 3363 3364 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3365 3366 @*/ 3367 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3368 { 3369 PetscErrorCode ierr; 3370 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3371 PetscInt *indx; 3372 PetscScalar *values; 3373 3374 PetscFunctionBegin; 3375 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3376 if (scall == MAT_INITIAL_MATRIX){ 3377 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3378 if (n == PETSC_DECIDE){ 3379 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3380 } 3381 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3382 rstart -= m; 3383 3384 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3385 for (i=0;i<m;i++) { 3386 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3387 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3388 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3389 } 3390 /* This routine will ONLY return MPIAIJ type matrix */ 3391 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3392 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3393 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3394 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3395 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3396 3397 } else if (scall == MAT_REUSE_MATRIX){ 3398 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3399 } else { 3400 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3401 } 3402 3403 for (i=0;i<m;i++) { 3404 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3405 Ii = i + rstart; 3406 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3407 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3408 } 3409 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3410 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3411 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 3413 PetscFunctionReturn(0); 3414 } 3415 3416 #undef __FUNCT__ 3417 #define __FUNCT__ "MatFileSplit" 3418 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3419 { 3420 PetscErrorCode ierr; 3421 PetscMPIInt rank; 3422 PetscInt m,N,i,rstart,nnz; 3423 size_t len; 3424 const PetscInt *indx; 3425 PetscViewer out; 3426 char *name; 3427 Mat B; 3428 const PetscScalar *values; 3429 3430 PetscFunctionBegin; 3431 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3432 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3433 /* Should this be the type of the diagonal block of A? */ 3434 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3435 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3436 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3437 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3438 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3439 for (i=0;i<m;i++) { 3440 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3441 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3442 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3443 } 3444 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3445 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3446 3447 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3448 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3449 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3450 sprintf(name,"%s.%d",outfile,rank); 3451 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3452 ierr = PetscFree(name); 3453 ierr = MatView(B,out);CHKERRQ(ierr); 3454 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3455 ierr = MatDestroy(B);CHKERRQ(ierr); 3456 PetscFunctionReturn(0); 3457 } 3458 3459 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3460 #undef __FUNCT__ 3461 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3462 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3463 { 3464 PetscErrorCode ierr; 3465 Mat_Merge_SeqsToMPI *merge; 3466 PetscContainer container; 3467 3468 PetscFunctionBegin; 3469 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3470 if (container) { 3471 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3472 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3473 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3474 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3475 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3476 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3479 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3480 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3481 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3482 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3483 3484 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3485 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3486 } 3487 ierr = PetscFree(merge);CHKERRQ(ierr); 3488 3489 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #include "src/mat/utils/freespace.h" 3494 #include "petscbt.h" 3495 static PetscEvent logkey_seqstompinum = 0; 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3498 /*@C 3499 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3500 matrices from each processor 3501 3502 Collective on MPI_Comm 3503 3504 Input Parameters: 3505 + comm - the communicators the parallel matrix will live on 3506 . seqmat - the input sequential matrices 3507 . m - number of local rows (or PETSC_DECIDE) 3508 . n - number of local columns (or PETSC_DECIDE) 3509 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3510 3511 Output Parameter: 3512 . mpimat - the parallel matrix generated 3513 3514 Level: advanced 3515 3516 Notes: 3517 The dimensions of the sequential matrix in each processor MUST be the same. 3518 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3519 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3520 @*/ 3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3522 { 3523 PetscErrorCode ierr; 3524 MPI_Comm comm=mpimat->comm; 3525 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3526 PetscMPIInt size,rank,taga,*len_s; 3527 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3528 PetscInt proc,m; 3529 PetscInt **buf_ri,**buf_rj; 3530 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3531 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3532 MPI_Request *s_waits,*r_waits; 3533 MPI_Status *status; 3534 MatScalar *aa=a->a,**abuf_r,*ba_i; 3535 Mat_Merge_SeqsToMPI *merge; 3536 PetscContainer container; 3537 3538 PetscFunctionBegin; 3539 if (!logkey_seqstompinum) { 3540 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3541 } 3542 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3543 3544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3545 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3546 3547 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3548 if (container) { 3549 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3550 } 3551 bi = merge->bi; 3552 bj = merge->bj; 3553 buf_ri = merge->buf_ri; 3554 buf_rj = merge->buf_rj; 3555 3556 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3557 owners = merge->rowmap.range; 3558 len_s = merge->len_s; 3559 3560 /* send and recv matrix values */ 3561 /*-----------------------------*/ 3562 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3563 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3564 3565 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3566 for (proc=0,k=0; proc<size; proc++){ 3567 if (!len_s[proc]) continue; 3568 i = owners[proc]; 3569 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3570 k++; 3571 } 3572 3573 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3574 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3575 ierr = PetscFree(status);CHKERRQ(ierr); 3576 3577 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3578 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3579 3580 /* insert mat values of mpimat */ 3581 /*----------------------------*/ 3582 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3583 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3584 nextrow = buf_ri_k + merge->nrecv; 3585 nextai = nextrow + merge->nrecv; 3586 3587 for (k=0; k<merge->nrecv; k++){ 3588 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3589 nrows = *(buf_ri_k[k]); 3590 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3591 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3592 } 3593 3594 /* set values of ba */ 3595 m = merge->rowmap.n; 3596 for (i=0; i<m; i++) { 3597 arow = owners[rank] + i; 3598 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3599 bnzi = bi[i+1] - bi[i]; 3600 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3601 3602 /* add local non-zero vals of this proc's seqmat into ba */ 3603 anzi = ai[arow+1] - ai[arow]; 3604 aj = a->j + ai[arow]; 3605 aa = a->a + ai[arow]; 3606 nextaj = 0; 3607 for (j=0; nextaj<anzi; j++){ 3608 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3609 ba_i[j] += aa[nextaj++]; 3610 } 3611 } 3612 3613 /* add received vals into ba */ 3614 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3615 /* i-th row */ 3616 if (i == *nextrow[k]) { 3617 anzi = *(nextai[k]+1) - *nextai[k]; 3618 aj = buf_rj[k] + *(nextai[k]); 3619 aa = abuf_r[k] + *(nextai[k]); 3620 nextaj = 0; 3621 for (j=0; nextaj<anzi; j++){ 3622 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3623 ba_i[j] += aa[nextaj++]; 3624 } 3625 } 3626 nextrow[k]++; nextai[k]++; 3627 } 3628 } 3629 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3630 } 3631 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3633 3634 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3635 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3636 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3637 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 static PetscEvent logkey_seqstompisym = 0; 3642 #undef __FUNCT__ 3643 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3645 { 3646 PetscErrorCode ierr; 3647 Mat B_mpi; 3648 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3649 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3650 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3651 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3652 PetscInt len,proc,*dnz,*onz; 3653 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3654 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3655 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3656 MPI_Status *status; 3657 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3658 PetscBT lnkbt; 3659 Mat_Merge_SeqsToMPI *merge; 3660 PetscContainer container; 3661 3662 PetscFunctionBegin; 3663 if (!logkey_seqstompisym) { 3664 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3665 } 3666 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3667 3668 /* make sure it is a PETSc comm */ 3669 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3672 3673 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3674 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3675 3676 /* determine row ownership */ 3677 /*---------------------------------------------------------*/ 3678 merge->rowmap.n = m; 3679 merge->rowmap.N = M; 3680 merge->rowmap.bs = 1; 3681 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3682 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3683 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3684 3685 m = merge->rowmap.n; 3686 M = merge->rowmap.N; 3687 owners = merge->rowmap.range; 3688 3689 /* determine the number of messages to send, their lengths */ 3690 /*---------------------------------------------------------*/ 3691 len_s = merge->len_s; 3692 3693 len = 0; /* length of buf_si[] */ 3694 merge->nsend = 0; 3695 for (proc=0; proc<size; proc++){ 3696 len_si[proc] = 0; 3697 if (proc == rank){ 3698 len_s[proc] = 0; 3699 } else { 3700 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3701 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3702 } 3703 if (len_s[proc]) { 3704 merge->nsend++; 3705 nrows = 0; 3706 for (i=owners[proc]; i<owners[proc+1]; i++){ 3707 if (ai[i+1] > ai[i]) nrows++; 3708 } 3709 len_si[proc] = 2*(nrows+1); 3710 len += len_si[proc]; 3711 } 3712 } 3713 3714 /* determine the number and length of messages to receive for ij-structure */ 3715 /*-------------------------------------------------------------------------*/ 3716 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3717 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3718 3719 /* post the Irecv of j-structure */ 3720 /*-------------------------------*/ 3721 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3722 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3723 3724 /* post the Isend of j-structure */ 3725 /*--------------------------------*/ 3726 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3727 sj_waits = si_waits + merge->nsend; 3728 3729 for (proc=0, k=0; proc<size; proc++){ 3730 if (!len_s[proc]) continue; 3731 i = owners[proc]; 3732 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3733 k++; 3734 } 3735 3736 /* receives and sends of j-structure are complete */ 3737 /*------------------------------------------------*/ 3738 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3739 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3740 3741 /* send and recv i-structure */ 3742 /*---------------------------*/ 3743 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3744 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3745 3746 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3747 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3748 for (proc=0,k=0; proc<size; proc++){ 3749 if (!len_s[proc]) continue; 3750 /* form outgoing message for i-structure: 3751 buf_si[0]: nrows to be sent 3752 [1:nrows]: row index (global) 3753 [nrows+1:2*nrows+1]: i-structure index 3754 */ 3755 /*-------------------------------------------*/ 3756 nrows = len_si[proc]/2 - 1; 3757 buf_si_i = buf_si + nrows+1; 3758 buf_si[0] = nrows; 3759 buf_si_i[0] = 0; 3760 nrows = 0; 3761 for (i=owners[proc]; i<owners[proc+1]; i++){ 3762 anzi = ai[i+1] - ai[i]; 3763 if (anzi) { 3764 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3765 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3766 nrows++; 3767 } 3768 } 3769 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3770 k++; 3771 buf_si += len_si[proc]; 3772 } 3773 3774 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3775 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3776 3777 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3778 for (i=0; i<merge->nrecv; i++){ 3779 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3780 } 3781 3782 ierr = PetscFree(len_si);CHKERRQ(ierr); 3783 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3784 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3785 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3786 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3787 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3788 ierr = PetscFree(status);CHKERRQ(ierr); 3789 3790 /* compute a local seq matrix in each processor */ 3791 /*----------------------------------------------*/ 3792 /* allocate bi array and free space for accumulating nonzero column info */ 3793 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3794 bi[0] = 0; 3795 3796 /* create and initialize a linked list */ 3797 nlnk = N+1; 3798 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3799 3800 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3801 len = 0; 3802 len = ai[owners[rank+1]] - ai[owners[rank]]; 3803 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3804 current_space = free_space; 3805 3806 /* determine symbolic info for each local row */ 3807 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3808 nextrow = buf_ri_k + merge->nrecv; 3809 nextai = nextrow + merge->nrecv; 3810 for (k=0; k<merge->nrecv; k++){ 3811 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3812 nrows = *buf_ri_k[k]; 3813 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3814 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3815 } 3816 3817 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3818 len = 0; 3819 for (i=0;i<m;i++) { 3820 bnzi = 0; 3821 /* add local non-zero cols of this proc's seqmat into lnk */ 3822 arow = owners[rank] + i; 3823 anzi = ai[arow+1] - ai[arow]; 3824 aj = a->j + ai[arow]; 3825 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3826 bnzi += nlnk; 3827 /* add received col data into lnk */ 3828 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3829 if (i == *nextrow[k]) { /* i-th row */ 3830 anzi = *(nextai[k]+1) - *nextai[k]; 3831 aj = buf_rj[k] + *nextai[k]; 3832 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3833 bnzi += nlnk; 3834 nextrow[k]++; nextai[k]++; 3835 } 3836 } 3837 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3838 3839 /* if free space is not available, make more free space */ 3840 if (current_space->local_remaining<bnzi) { 3841 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3842 nspacedouble++; 3843 } 3844 /* copy data into free space, then initialize lnk */ 3845 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3846 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3847 3848 current_space->array += bnzi; 3849 current_space->local_used += bnzi; 3850 current_space->local_remaining -= bnzi; 3851 3852 bi[i+1] = bi[i] + bnzi; 3853 } 3854 3855 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3856 3857 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3858 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3859 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3860 3861 /* create symbolic parallel matrix B_mpi */ 3862 /*---------------------------------------*/ 3863 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3864 if (n==PETSC_DECIDE) { 3865 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3866 } else { 3867 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3868 } 3869 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3870 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3871 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3872 3873 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3874 B_mpi->assembled = PETSC_FALSE; 3875 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3876 merge->bi = bi; 3877 merge->bj = bj; 3878 merge->buf_ri = buf_ri; 3879 merge->buf_rj = buf_rj; 3880 merge->coi = PETSC_NULL; 3881 merge->coj = PETSC_NULL; 3882 merge->owners_co = PETSC_NULL; 3883 3884 /* attach the supporting struct to B_mpi for reuse */ 3885 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3886 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3887 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3888 *mpimat = B_mpi; 3889 3890 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3891 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3892 PetscFunctionReturn(0); 3893 } 3894 3895 static PetscEvent logkey_seqstompi = 0; 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMerge_SeqsToMPI" 3898 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3899 { 3900 PetscErrorCode ierr; 3901 3902 PetscFunctionBegin; 3903 if (!logkey_seqstompi) { 3904 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3905 } 3906 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3907 if (scall == MAT_INITIAL_MATRIX){ 3908 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3909 } 3910 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3911 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 static PetscEvent logkey_getlocalmat = 0; 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "MatGetLocalMat" 3917 /*@C 3918 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3919 3920 Not Collective 3921 3922 Input Parameters: 3923 + A - the matrix 3924 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3925 3926 Output Parameter: 3927 . A_loc - the local sequential matrix generated 3928 3929 Level: developer 3930 3931 @*/ 3932 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3933 { 3934 PetscErrorCode ierr; 3935 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3936 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3937 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3938 PetscScalar *aa=a->a,*ba=b->a,*ca; 3939 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3940 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3941 3942 PetscFunctionBegin; 3943 if (!logkey_getlocalmat) { 3944 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3945 } 3946 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3947 if (scall == MAT_INITIAL_MATRIX){ 3948 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3949 ci[0] = 0; 3950 for (i=0; i<am; i++){ 3951 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3952 } 3953 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3954 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3955 k = 0; 3956 for (i=0; i<am; i++) { 3957 ncols_o = bi[i+1] - bi[i]; 3958 ncols_d = ai[i+1] - ai[i]; 3959 /* off-diagonal portion of A */ 3960 for (jo=0; jo<ncols_o; jo++) { 3961 col = cmap[*bj]; 3962 if (col >= cstart) break; 3963 cj[k] = col; bj++; 3964 ca[k++] = *ba++; 3965 } 3966 /* diagonal portion of A */ 3967 for (j=0; j<ncols_d; j++) { 3968 cj[k] = cstart + *aj++; 3969 ca[k++] = *aa++; 3970 } 3971 /* off-diagonal portion of A */ 3972 for (j=jo; j<ncols_o; j++) { 3973 cj[k] = cmap[*bj++]; 3974 ca[k++] = *ba++; 3975 } 3976 } 3977 /* put together the new matrix */ 3978 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3979 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3980 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3981 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3982 mat->free_a = PETSC_TRUE; 3983 mat->free_ij = PETSC_TRUE; 3984 mat->nonew = 0; 3985 } else if (scall == MAT_REUSE_MATRIX){ 3986 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3987 ci = mat->i; cj = mat->j; ca = mat->a; 3988 for (i=0; i<am; i++) { 3989 /* off-diagonal portion of A */ 3990 ncols_o = bi[i+1] - bi[i]; 3991 for (jo=0; jo<ncols_o; jo++) { 3992 col = cmap[*bj]; 3993 if (col >= cstart) break; 3994 *ca++ = *ba++; bj++; 3995 } 3996 /* diagonal portion of A */ 3997 ncols_d = ai[i+1] - ai[i]; 3998 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3999 /* off-diagonal portion of A */ 4000 for (j=jo; j<ncols_o; j++) { 4001 *ca++ = *ba++; bj++; 4002 } 4003 } 4004 } else { 4005 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4006 } 4007 4008 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4009 PetscFunctionReturn(0); 4010 } 4011 4012 static PetscEvent logkey_getlocalmatcondensed = 0; 4013 #undef __FUNCT__ 4014 #define __FUNCT__ "MatGetLocalMatCondensed" 4015 /*@C 4016 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4017 4018 Not Collective 4019 4020 Input Parameters: 4021 + A - the matrix 4022 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4023 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4024 4025 Output Parameter: 4026 . A_loc - the local sequential matrix generated 4027 4028 Level: developer 4029 4030 @*/ 4031 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4032 { 4033 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4034 PetscErrorCode ierr; 4035 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4036 IS isrowa,iscola; 4037 Mat *aloc; 4038 4039 PetscFunctionBegin; 4040 if (!logkey_getlocalmatcondensed) { 4041 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4042 } 4043 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4044 if (!row){ 4045 start = A->rmap.rstart; end = A->rmap.rend; 4046 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4047 } else { 4048 isrowa = *row; 4049 } 4050 if (!col){ 4051 start = A->cmap.rstart; 4052 cmap = a->garray; 4053 nzA = a->A->cmap.n; 4054 nzB = a->B->cmap.n; 4055 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4056 ncols = 0; 4057 for (i=0; i<nzB; i++) { 4058 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4059 else break; 4060 } 4061 imark = i; 4062 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4063 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4064 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4065 ierr = PetscFree(idx);CHKERRQ(ierr); 4066 } else { 4067 iscola = *col; 4068 } 4069 if (scall != MAT_INITIAL_MATRIX){ 4070 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4071 aloc[0] = *A_loc; 4072 } 4073 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4074 *A_loc = aloc[0]; 4075 ierr = PetscFree(aloc);CHKERRQ(ierr); 4076 if (!row){ 4077 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4078 } 4079 if (!col){ 4080 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4081 } 4082 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4083 PetscFunctionReturn(0); 4084 } 4085 4086 static PetscEvent logkey_GetBrowsOfAcols = 0; 4087 #undef __FUNCT__ 4088 #define __FUNCT__ "MatGetBrowsOfAcols" 4089 /*@C 4090 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4091 4092 Collective on Mat 4093 4094 Input Parameters: 4095 + A,B - the matrices in mpiaij format 4096 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4097 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4098 4099 Output Parameter: 4100 + rowb, colb - index sets of rows and columns of B to extract 4101 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4102 - B_seq - the sequential matrix generated 4103 4104 Level: developer 4105 4106 @*/ 4107 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4108 { 4109 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4110 PetscErrorCode ierr; 4111 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4112 IS isrowb,iscolb; 4113 Mat *bseq; 4114 4115 PetscFunctionBegin; 4116 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4117 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4118 } 4119 if (!logkey_GetBrowsOfAcols) { 4120 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4121 } 4122 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4123 4124 if (scall == MAT_INITIAL_MATRIX){ 4125 start = A->cmap.rstart; 4126 cmap = a->garray; 4127 nzA = a->A->cmap.n; 4128 nzB = a->B->cmap.n; 4129 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4130 ncols = 0; 4131 for (i=0; i<nzB; i++) { /* row < local row index */ 4132 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4133 else break; 4134 } 4135 imark = i; 4136 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4137 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4138 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4139 ierr = PetscFree(idx);CHKERRQ(ierr); 4140 *brstart = imark; 4141 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4142 } else { 4143 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4144 isrowb = *rowb; iscolb = *colb; 4145 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4146 bseq[0] = *B_seq; 4147 } 4148 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4149 *B_seq = bseq[0]; 4150 ierr = PetscFree(bseq);CHKERRQ(ierr); 4151 if (!rowb){ 4152 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4153 } else { 4154 *rowb = isrowb; 4155 } 4156 if (!colb){ 4157 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4158 } else { 4159 *colb = iscolb; 4160 } 4161 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4162 PetscFunctionReturn(0); 4163 } 4164 4165 static PetscEvent logkey_GetBrowsOfAocols = 0; 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "MatGetBrowsOfAoCols" 4168 /*@C 4169 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4170 of the OFF-DIAGONAL portion of local A 4171 4172 Collective on Mat 4173 4174 Input Parameters: 4175 + A,B - the matrices in mpiaij format 4176 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4177 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4178 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4179 4180 Output Parameter: 4181 + B_oth - the sequential matrix generated 4182 4183 Level: developer 4184 4185 @*/ 4186 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4187 { 4188 VecScatter_MPI_General *gen_to,*gen_from; 4189 PetscErrorCode ierr; 4190 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4191 Mat_SeqAIJ *b_oth; 4192 VecScatter ctx=a->Mvctx; 4193 MPI_Comm comm=ctx->comm; 4194 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4195 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4196 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4197 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4198 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4199 MPI_Status *sstatus,rstatus; 4200 PetscInt *cols,sbs,rbs; 4201 PetscScalar *vals; 4202 4203 PetscFunctionBegin; 4204 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4205 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4206 } 4207 if (!logkey_GetBrowsOfAocols) { 4208 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4209 } 4210 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4211 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4212 4213 gen_to = (VecScatter_MPI_General*)ctx->todata; 4214 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4215 rvalues = gen_from->values; /* holds the length of receiving row */ 4216 svalues = gen_to->values; /* holds the length of sending row */ 4217 nrecvs = gen_from->n; 4218 nsends = gen_to->n; 4219 4220 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4221 srow = gen_to->indices; /* local row index to be sent */ 4222 sstarts = gen_to->starts; 4223 sprocs = gen_to->procs; 4224 sstatus = gen_to->sstatus; 4225 sbs = gen_to->bs; 4226 rstarts = gen_from->starts; 4227 rprocs = gen_from->procs; 4228 rbs = gen_from->bs; 4229 4230 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4231 if (scall == MAT_INITIAL_MATRIX){ 4232 /* i-array */ 4233 /*---------*/ 4234 /* post receives */ 4235 for (i=0; i<nrecvs; i++){ 4236 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4237 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4238 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4239 } 4240 4241 /* pack the outgoing message */ 4242 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4243 rstartsj = sstartsj + nsends +1; 4244 sstartsj[0] = 0; rstartsj[0] = 0; 4245 len = 0; /* total length of j or a array to be sent */ 4246 k = 0; 4247 for (i=0; i<nsends; i++){ 4248 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4249 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4250 for (j=0; j<nrows; j++) { 4251 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4252 for (l=0; l<sbs; l++){ 4253 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4254 rowlen[j*sbs+l] = ncols; 4255 len += ncols; 4256 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4257 } 4258 k++; 4259 } 4260 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4261 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4262 } 4263 /* recvs and sends of i-array are completed */ 4264 i = nrecvs; 4265 while (i--) { 4266 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4267 } 4268 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4269 4270 /* allocate buffers for sending j and a arrays */ 4271 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4272 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4273 4274 /* create i-array of B_oth */ 4275 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4276 b_othi[0] = 0; 4277 len = 0; /* total length of j or a array to be received */ 4278 k = 0; 4279 for (i=0; i<nrecvs; i++){ 4280 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4281 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4282 for (j=0; j<nrows; j++) { 4283 b_othi[k+1] = b_othi[k] + rowlen[j]; 4284 len += rowlen[j]; k++; 4285 } 4286 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4287 } 4288 4289 /* allocate space for j and a arrrays of B_oth */ 4290 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4291 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4292 4293 /* j-array */ 4294 /*---------*/ 4295 /* post receives of j-array */ 4296 for (i=0; i<nrecvs; i++){ 4297 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4298 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4299 } 4300 4301 /* pack the outgoing message j-array */ 4302 k = 0; 4303 for (i=0; i<nsends; i++){ 4304 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4305 bufJ = bufj+sstartsj[i]; 4306 for (j=0; j<nrows; j++) { 4307 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4308 for (ll=0; ll<sbs; ll++){ 4309 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4310 for (l=0; l<ncols; l++){ 4311 *bufJ++ = cols[l]; 4312 } 4313 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4314 } 4315 } 4316 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4317 } 4318 4319 /* recvs and sends of j-array are completed */ 4320 i = nrecvs; 4321 while (i--) { 4322 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4323 } 4324 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4325 } else if (scall == MAT_REUSE_MATRIX){ 4326 sstartsj = *startsj; 4327 rstartsj = sstartsj + nsends +1; 4328 bufa = *bufa_ptr; 4329 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4330 b_otha = b_oth->a; 4331 } else { 4332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4333 } 4334 4335 /* a-array */ 4336 /*---------*/ 4337 /* post receives of a-array */ 4338 for (i=0; i<nrecvs; i++){ 4339 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4340 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4341 } 4342 4343 /* pack the outgoing message a-array */ 4344 k = 0; 4345 for (i=0; i<nsends; i++){ 4346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4347 bufA = bufa+sstartsj[i]; 4348 for (j=0; j<nrows; j++) { 4349 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4350 for (ll=0; ll<sbs; ll++){ 4351 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4352 for (l=0; l<ncols; l++){ 4353 *bufA++ = vals[l]; 4354 } 4355 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4356 } 4357 } 4358 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4359 } 4360 /* recvs and sends of a-array are completed */ 4361 i = nrecvs; 4362 while (i--) { 4363 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 4364 } 4365 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4366 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4367 4368 if (scall == MAT_INITIAL_MATRIX){ 4369 /* put together the new matrix */ 4370 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4371 4372 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4373 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4374 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4375 b_oth->free_a = PETSC_TRUE; 4376 b_oth->free_ij = PETSC_TRUE; 4377 b_oth->nonew = 0; 4378 4379 ierr = PetscFree(bufj);CHKERRQ(ierr); 4380 if (!startsj || !bufa_ptr){ 4381 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4382 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4383 } else { 4384 *startsj = sstartsj; 4385 *bufa_ptr = bufa; 4386 } 4387 } 4388 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatGetCommunicationStructs" 4394 /*@C 4395 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4396 4397 Not Collective 4398 4399 Input Parameters: 4400 . A - The matrix in mpiaij format 4401 4402 Output Parameter: 4403 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4404 . colmap - A map from global column index to local index into lvec 4405 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4406 4407 Level: developer 4408 4409 @*/ 4410 #if defined (PETSC_USE_CTABLE) 4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4412 #else 4413 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4414 #endif 4415 { 4416 Mat_MPIAIJ *a; 4417 4418 PetscFunctionBegin; 4419 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4420 PetscValidPointer(lvec, 2) 4421 PetscValidPointer(colmap, 3) 4422 PetscValidPointer(multScatter, 4) 4423 a = (Mat_MPIAIJ *) A->data; 4424 if (lvec) *lvec = a->lvec; 4425 if (colmap) *colmap = a->colmap; 4426 if (multScatter) *multScatter = a->Mvctx; 4427 PetscFunctionReturn(0); 4428 } 4429 4430 EXTERN_C_BEGIN 4431 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4432 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4433 EXTERN_C_END 4434 4435 /*MC 4436 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4437 4438 Options Database Keys: 4439 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4440 4441 Level: beginner 4442 4443 .seealso: MatCreateMPIAIJ 4444 M*/ 4445 4446 EXTERN_C_BEGIN 4447 #undef __FUNCT__ 4448 #define __FUNCT__ "MatCreate_MPIAIJ" 4449 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4450 { 4451 Mat_MPIAIJ *b; 4452 PetscErrorCode ierr; 4453 PetscMPIInt size; 4454 4455 PetscFunctionBegin; 4456 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4457 4458 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4459 B->data = (void*)b; 4460 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4461 B->factor = 0; 4462 B->rmap.bs = 1; 4463 B->assembled = PETSC_FALSE; 4464 B->mapping = 0; 4465 4466 B->insertmode = NOT_SET_VALUES; 4467 b->size = size; 4468 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4469 4470 /* build cache for off array entries formed */ 4471 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4472 b->donotstash = PETSC_FALSE; 4473 b->colmap = 0; 4474 b->garray = 0; 4475 b->roworiented = PETSC_TRUE; 4476 4477 /* stuff used for matrix vector multiply */ 4478 b->lvec = PETSC_NULL; 4479 b->Mvctx = PETSC_NULL; 4480 4481 /* stuff for MatGetRow() */ 4482 b->rowindices = 0; 4483 b->rowvalues = 0; 4484 b->getrowactive = PETSC_FALSE; 4485 4486 4487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4488 "MatStoreValues_MPIAIJ", 4489 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4491 "MatRetrieveValues_MPIAIJ", 4492 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4494 "MatGetDiagonalBlock_MPIAIJ", 4495 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4497 "MatIsTranspose_MPIAIJ", 4498 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4500 "MatMPIAIJSetPreallocation_MPIAIJ", 4501 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4503 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4504 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4506 "MatDiagonalScaleLocal_MPIAIJ", 4507 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4509 "MatConvert_MPIAIJ_MPICSRPERM", 4510 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4512 "MatConvert_MPIAIJ_MPICRL", 4513 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4514 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4515 PetscFunctionReturn(0); 4516 } 4517 EXTERN_C_END 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4521 /*@C 4522 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4523 and "off-diagonal" part of the matrix in CSR format. 4524 4525 Collective on MPI_Comm 4526 4527 Input Parameters: 4528 + comm - MPI communicator 4529 . m - number of local rows (Cannot be PETSC_DECIDE) 4530 . n - This value should be the same as the local size used in creating the 4531 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4532 calculated if N is given) For square matrices n is almost always m. 4533 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4534 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4535 . i - row indices for "diagonal" portion of matrix 4536 . j - column indices 4537 . a - matrix values 4538 . oi - row indices for "off-diagonal" portion of matrix 4539 . oj - column indices 4540 - oa - matrix values 4541 4542 Output Parameter: 4543 . mat - the matrix 4544 4545 Level: advanced 4546 4547 Notes: 4548 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4549 4550 The i and j indices are 0 based 4551 4552 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4553 4554 4555 .keywords: matrix, aij, compressed row, sparse, parallel 4556 4557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4558 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4559 @*/ 4560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4561 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4562 { 4563 PetscErrorCode ierr; 4564 Mat_MPIAIJ *maij; 4565 4566 PetscFunctionBegin; 4567 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4568 if (i[0]) { 4569 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4570 } 4571 if (oi[0]) { 4572 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4573 } 4574 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4575 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4576 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4577 maij = (Mat_MPIAIJ*) (*mat)->data; 4578 maij->donotstash = PETSC_TRUE; 4579 (*mat)->preallocated = PETSC_TRUE; 4580 4581 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4582 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4583 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4584 4585 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4586 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4587 4588 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4591 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 4593 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4594 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4595 PetscFunctionReturn(0); 4596 } 4597 4598 /* 4599 Special version for direct calls from Fortran 4600 */ 4601 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4602 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4603 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4604 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4605 #endif 4606 4607 /* Change these macros so can be used in void function */ 4608 #undef CHKERRQ 4609 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4610 #undef SETERRQ2 4611 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4612 #undef SETERRQ 4613 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4614 4615 EXTERN_C_BEGIN 4616 #undef __FUNCT__ 4617 #define __FUNCT__ "matsetvaluesmpiaij_" 4618 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4619 { 4620 Mat mat = *mmat; 4621 PetscInt m = *mm, n = *mn; 4622 InsertMode addv = *maddv; 4623 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4624 PetscScalar value; 4625 PetscErrorCode ierr; 4626 4627 MatPreallocated(mat); 4628 if (mat->insertmode == NOT_SET_VALUES) { 4629 mat->insertmode = addv; 4630 } 4631 #if defined(PETSC_USE_DEBUG) 4632 else if (mat->insertmode != addv) { 4633 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4634 } 4635 #endif 4636 { 4637 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4638 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4639 PetscTruth roworiented = aij->roworiented; 4640 4641 /* Some Variables required in the macro */ 4642 Mat A = aij->A; 4643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4644 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4645 PetscScalar *aa = a->a; 4646 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4647 Mat B = aij->B; 4648 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4649 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4650 PetscScalar *ba = b->a; 4651 4652 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4653 PetscInt nonew = a->nonew; 4654 PetscScalar *ap1,*ap2; 4655 4656 PetscFunctionBegin; 4657 for (i=0; i<m; i++) { 4658 if (im[i] < 0) continue; 4659 #if defined(PETSC_USE_DEBUG) 4660 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4661 #endif 4662 if (im[i] >= rstart && im[i] < rend) { 4663 row = im[i] - rstart; 4664 lastcol1 = -1; 4665 rp1 = aj + ai[row]; 4666 ap1 = aa + ai[row]; 4667 rmax1 = aimax[row]; 4668 nrow1 = ailen[row]; 4669 low1 = 0; 4670 high1 = nrow1; 4671 lastcol2 = -1; 4672 rp2 = bj + bi[row]; 4673 ap2 = ba + bi[row]; 4674 rmax2 = bimax[row]; 4675 nrow2 = bilen[row]; 4676 low2 = 0; 4677 high2 = nrow2; 4678 4679 for (j=0; j<n; j++) { 4680 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4681 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4682 if (in[j] >= cstart && in[j] < cend){ 4683 col = in[j] - cstart; 4684 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4685 } else if (in[j] < 0) continue; 4686 #if defined(PETSC_USE_DEBUG) 4687 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4688 #endif 4689 else { 4690 if (mat->was_assembled) { 4691 if (!aij->colmap) { 4692 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4693 } 4694 #if defined (PETSC_USE_CTABLE) 4695 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4696 col--; 4697 #else 4698 col = aij->colmap[in[j]] - 1; 4699 #endif 4700 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4701 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4702 col = in[j]; 4703 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4704 B = aij->B; 4705 b = (Mat_SeqAIJ*)B->data; 4706 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4707 rp2 = bj + bi[row]; 4708 ap2 = ba + bi[row]; 4709 rmax2 = bimax[row]; 4710 nrow2 = bilen[row]; 4711 low2 = 0; 4712 high2 = nrow2; 4713 bm = aij->B->rmap.n; 4714 ba = b->a; 4715 } 4716 } else col = in[j]; 4717 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4718 } 4719 } 4720 } else { 4721 if (!aij->donotstash) { 4722 if (roworiented) { 4723 if (ignorezeroentries && v[i*n] == 0.0) continue; 4724 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4725 } else { 4726 if (ignorezeroentries && v[i] == 0.0) continue; 4727 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4728 } 4729 } 4730 } 4731 }} 4732 PetscFunctionReturnVoid(); 4733 } 4734 EXTERN_C_END 4735 4736