1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = a->ignorezeroentries; 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetValues_MPIAIJ" 240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 241 { 242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 243 PetscErrorCode ierr; 244 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 245 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 246 247 PetscFunctionBegin; 248 for (i=0; i<m; i++) { 249 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 250 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 251 if (idxm[i] >= rstart && idxm[i] < rend) { 252 row = idxm[i] - rstart; 253 for (j=0; j<n; j++) { 254 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 255 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 256 if (idxn[j] >= cstart && idxn[j] < cend){ 257 col = idxn[j] - cstart; 258 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 259 } else { 260 if (!aij->colmap) { 261 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 262 } 263 #if defined (PETSC_USE_CTABLE) 264 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 265 col --; 266 #else 267 col = aij->colmap[idxn[j]] - 1; 268 #endif 269 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 270 else { 271 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 272 } 273 } 274 } 275 } else { 276 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 277 } 278 } 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 if (aij->donotstash) { 293 PetscFunctionReturn(0); 294 } 295 296 /* make sure all processors are either in INSERTMODE or ADDMODE */ 297 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 298 if (addv == (ADD_VALUES|INSERT_VALUES)) { 299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 300 } 301 mat->insertmode = addv; /* in case this processor had no cache */ 302 303 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 304 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 305 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 315 PetscErrorCode ierr; 316 PetscMPIInt n; 317 PetscInt i,j,rstart,ncols,flg; 318 PetscInt *row,*col,other_disassembled; 319 PetscScalar *val; 320 InsertMode addv = mat->insertmode; 321 322 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 358 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 359 } 360 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 361 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 362 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 364 365 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 366 aij->rowvalues = 0; 367 368 /* used by MatAXPY() */ 369 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 370 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 371 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 378 { 379 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 384 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatZeroRows_MPIAIJ" 390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 391 { 392 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 393 PetscErrorCode ierr; 394 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 395 PetscInt i,*owners = A->rmap.range; 396 PetscInt *nprocs,j,idx,nsends,row; 397 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 398 PetscInt *rvalues,count,base,slen,*source; 399 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 400 MPI_Comm comm = A->comm; 401 MPI_Request *send_waits,*recv_waits; 402 MPI_Status recv_status,*send_status; 403 #if defined(PETSC_DEBUG) 404 PetscTruth found = PETSC_FALSE; 405 #endif 406 407 PetscFunctionBegin; 408 /* first count number of contributors to each processor */ 409 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 410 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 412 j = 0; 413 for (i=0; i<N; i++) { 414 if (lastidx > (idx = rows[i])) j = 0; 415 lastidx = idx; 416 for (; j<size; j++) { 417 if (idx >= owners[j] && idx < owners[j+1]) { 418 nprocs[2*j]++; 419 nprocs[2*j+1] = 1; 420 owner[i] = j; 421 #if defined(PETSC_DEBUG) 422 found = PETSC_TRUE; 423 #endif 424 break; 425 } 426 } 427 #if defined(PETSC_DEBUG) 428 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 429 found = PETSC_FALSE; 430 #endif 431 } 432 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 433 434 /* inform other processors of number of messages and max length*/ 435 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 436 437 /* post receives: */ 438 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 439 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 440 for (i=0; i<nrecvs; i++) { 441 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 442 } 443 444 /* do sends: 445 1) starts[i] gives the starting index in svalues for stuff going to 446 the ith processor 447 */ 448 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 449 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 451 starts[0] = 0; 452 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 453 for (i=0; i<N; i++) { 454 svalues[starts[owner[i]]++] = rows[i]; 455 } 456 457 starts[0] = 0; 458 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 459 count = 0; 460 for (i=0; i<size; i++) { 461 if (nprocs[2*i+1]) { 462 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscFree(starts);CHKERRQ(ierr); 466 467 base = owners[rank]; 468 469 /* wait on receives */ 470 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 471 source = lens + nrecvs; 472 count = nrecvs; slen = 0; 473 while (count) { 474 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 475 /* unpack receives into our local space */ 476 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 477 source[imdex] = recv_status.MPI_SOURCE; 478 lens[imdex] = n; 479 slen += n; 480 count--; 481 } 482 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 483 484 /* move the data into the send scatter */ 485 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 486 count = 0; 487 for (i=0; i<nrecvs; i++) { 488 values = rvalues + i*nmax; 489 for (j=0; j<lens[i]; j++) { 490 lrows[count++] = values[j] - base; 491 } 492 } 493 ierr = PetscFree(rvalues);CHKERRQ(ierr); 494 ierr = PetscFree(lens);CHKERRQ(ierr); 495 ierr = PetscFree(owner);CHKERRQ(ierr); 496 ierr = PetscFree(nprocs);CHKERRQ(ierr); 497 498 /* actually zap the local rows */ 499 /* 500 Zero the required rows. If the "diagonal block" of the matrix 501 is square and the user wishes to set the diagonal we use separate 502 code so that MatSetValues() is not called for each diagonal allocating 503 new memory, thus calling lots of mallocs and slowing things down. 504 505 Contributed by: Matthew Knepley 506 */ 507 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 508 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 509 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 510 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 511 } else if (diag != 0.0) { 512 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 513 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 514 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 516 } 517 for (i = 0; i < slen; i++) { 518 row = lrows[i] + rstart; 519 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 522 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 } else { 524 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 525 } 526 ierr = PetscFree(lrows);CHKERRQ(ierr); 527 528 /* wait on sends */ 529 if (nsends) { 530 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 531 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 532 ierr = PetscFree(send_status);CHKERRQ(ierr); 533 } 534 ierr = PetscFree(send_waits);CHKERRQ(ierr); 535 ierr = PetscFree(svalues);CHKERRQ(ierr); 536 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_MPIAIJ" 542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 543 { 544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt nt; 547 548 PetscFunctionBegin; 549 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 550 if (nt != A->cmap.n) { 551 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 552 } 553 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 554 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 555 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 556 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatMultAdd_MPIAIJ" 562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 567 PetscFunctionBegin; 568 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 570 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscTruth merged; 582 583 PetscFunctionBegin; 584 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 587 if (!merged) { 588 /* send it on its way */ 589 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 590 /* do local part */ 591 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 592 /* receive remote parts: note this assumes the values are not actually */ 593 /* added in yy until the next line, */ 594 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 } else { 596 /* do local part */ 597 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 /* values actually were received in the Begin() but we need to call this nop */ 601 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 EXTERN_C_BEGIN 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 610 { 611 MPI_Comm comm; 612 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 613 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 614 IS Me,Notme; 615 PetscErrorCode ierr; 616 PetscInt M,N,first,last,*notme,i; 617 PetscMPIInt size; 618 619 PetscFunctionBegin; 620 621 /* Easy test: symmetric diagonal block */ 622 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 623 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 624 if (!*f) PetscFunctionReturn(0); 625 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 627 if (size == 1) PetscFunctionReturn(0); 628 629 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 630 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 631 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 632 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 633 for (i=0; i<first; i++) notme[i] = i; 634 for (i=last; i<M; i++) notme[i-last+first] = i; 635 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 636 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 637 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 638 Aoff = Aoffs[0]; 639 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 640 Boff = Boffs[0]; 641 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 642 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 644 ierr = ISDestroy(Me);CHKERRQ(ierr); 645 ierr = ISDestroy(Notme);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 EXTERN_C_END 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 PetscErrorCode ierr; 657 658 PetscFunctionBegin; 659 /* do nondiagonal part */ 660 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 661 /* send it on its way */ 662 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 /* do local part */ 664 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 665 /* receive remote parts */ 666 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 /* 671 This only works correctly for square matrices where the subblock A->A is the 672 diagonal block 673 */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 677 { 678 PetscErrorCode ierr; 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 681 PetscFunctionBegin; 682 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 683 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 685 } 686 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatScale_MPIAIJ" 692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 693 { 694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 699 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatDestroy_MPIAIJ" 705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 706 { 707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 #if defined(PETSC_USE_LOG) 712 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 713 #endif 714 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 715 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 717 #if defined (PETSC_USE_CTABLE) 718 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 719 #else 720 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 721 #endif 722 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 723 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 724 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 725 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 726 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1024 1025 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1026 if (flag & SOR_ZERO_INITIAL_GUESS) { 1027 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1028 its--; 1029 } 1030 1031 while (its--) { 1032 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 1035 /* update rhs: bb1 = bb - B*x */ 1036 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1037 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1038 1039 /* local sweep */ 1040 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 1041 } 1042 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1043 if (flag & SOR_ZERO_INITIAL_GUESS) { 1044 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1045 its--; 1046 } 1047 while (its--) { 1048 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 1051 /* update rhs: bb1 = bb - B*x */ 1052 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1053 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1054 1055 /* local sweep */ 1056 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1057 } 1058 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1059 if (flag & SOR_ZERO_INITIAL_GUESS) { 1060 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1061 its--; 1062 } 1063 while (its--) { 1064 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066 1067 /* update rhs: bb1 = bb - B*x */ 1068 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1069 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1070 1071 /* local sweep */ 1072 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1073 } 1074 } else { 1075 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1076 } 1077 1078 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatPermute_MPIAIJ" 1084 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1085 { 1086 MPI_Comm comm,pcomm; 1087 PetscInt first,local_size,nrows,*rows; 1088 int ntids; 1089 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1094 /* make a collective version of 'rowp' */ 1095 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1096 if (pcomm==comm) { 1097 crowp = rowp; 1098 } else { 1099 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1100 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1101 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1102 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1103 } 1104 /* collect the global row permutation and invert it */ 1105 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1106 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1107 if (pcomm!=comm) { 1108 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1109 } 1110 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1111 /* get the local target indices */ 1112 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1113 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1114 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1116 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1117 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1118 /* the column permutation is so much easier; 1119 make a local version of 'colp' and invert it */ 1120 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1121 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1122 if (ntids==1) { 1123 lcolp = colp; 1124 } else { 1125 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1126 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1127 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1128 } 1129 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1130 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1131 if (ntids>1) { 1132 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1133 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1134 } 1135 /* now we just get the submatrix */ 1136 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1137 /* clean up */ 1138 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1139 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1145 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1146 { 1147 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1148 Mat A = mat->A,B = mat->B; 1149 PetscErrorCode ierr; 1150 PetscReal isend[5],irecv[5]; 1151 1152 PetscFunctionBegin; 1153 info->block_size = 1.0; 1154 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1156 isend[3] = info->memory; isend[4] = info->mallocs; 1157 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1159 isend[3] += info->memory; isend[4] += info->mallocs; 1160 if (flag == MAT_LOCAL) { 1161 info->nz_used = isend[0]; 1162 info->nz_allocated = isend[1]; 1163 info->nz_unneeded = isend[2]; 1164 info->memory = isend[3]; 1165 info->mallocs = isend[4]; 1166 } else if (flag == MAT_GLOBAL_MAX) { 1167 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1168 info->nz_used = irecv[0]; 1169 info->nz_allocated = irecv[1]; 1170 info->nz_unneeded = irecv[2]; 1171 info->memory = irecv[3]; 1172 info->mallocs = irecv[4]; 1173 } else if (flag == MAT_GLOBAL_SUM) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1175 info->nz_used = irecv[0]; 1176 info->nz_allocated = irecv[1]; 1177 info->nz_unneeded = irecv[2]; 1178 info->memory = irecv[3]; 1179 info->mallocs = irecv[4]; 1180 } 1181 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1182 info->fill_ratio_needed = 0; 1183 info->factor_mallocs = 0; 1184 info->rows_global = (double)matin->rmap.N; 1185 info->columns_global = (double)matin->cmap.N; 1186 info->rows_local = (double)matin->rmap.n; 1187 info->columns_local = (double)matin->cmap.N; 1188 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatSetOption_MPIAIJ" 1194 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1195 { 1196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 switch (op) { 1201 case MAT_NEW_NONZERO_LOCATIONS: 1202 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1203 case MAT_KEEP_ZEROED_ROWS: 1204 case MAT_NEW_NONZERO_LOCATION_ERR: 1205 case MAT_USE_INODES: 1206 case MAT_IGNORE_ZERO_ENTRIES: 1207 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1209 break; 1210 case MAT_ROW_ORIENTED: 1211 a->roworiented = flg; 1212 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1214 break; 1215 case MAT_NEW_DIAGONALS: 1216 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1217 break; 1218 case MAT_IGNORE_OFF_PROC_ENTRIES: 1219 a->donotstash = PETSC_TRUE; 1220 break; 1221 case MAT_SYMMETRIC: 1222 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1223 break; 1224 case MAT_STRUCTURALLY_SYMMETRIC: 1225 case MAT_HERMITIAN: 1226 case MAT_SYMMETRY_ETERNAL: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 default: 1230 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatGetRow_MPIAIJ" 1237 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1238 { 1239 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1240 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1241 PetscErrorCode ierr; 1242 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1243 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1244 PetscInt *cmap,*idx_p; 1245 1246 PetscFunctionBegin; 1247 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1248 mat->getrowactive = PETSC_TRUE; 1249 1250 if (!mat->rowvalues && (idx || v)) { 1251 /* 1252 allocate enough space to hold information from the longest row. 1253 */ 1254 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1255 PetscInt max = 1,tmp; 1256 for (i=0; i<matin->rmap.n; i++) { 1257 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1258 if (max < tmp) { max = tmp; } 1259 } 1260 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1261 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1262 } 1263 1264 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1265 lrow = row - rstart; 1266 1267 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1268 if (!v) {pvA = 0; pvB = 0;} 1269 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1270 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1272 nztot = nzA + nzB; 1273 1274 cmap = mat->garray; 1275 if (v || idx) { 1276 if (nztot) { 1277 /* Sort by increasing column numbers, assuming A and B already sorted */ 1278 PetscInt imark = -1; 1279 if (v) { 1280 *v = v_p = mat->rowvalues; 1281 for (i=0; i<nzB; i++) { 1282 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1283 else break; 1284 } 1285 imark = i; 1286 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1287 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1288 } 1289 if (idx) { 1290 *idx = idx_p = mat->rowindices; 1291 if (imark > -1) { 1292 for (i=0; i<imark; i++) { 1293 idx_p[i] = cmap[cworkB[i]]; 1294 } 1295 } else { 1296 for (i=0; i<nzB; i++) { 1297 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1298 else break; 1299 } 1300 imark = i; 1301 } 1302 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1303 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1304 } 1305 } else { 1306 if (idx) *idx = 0; 1307 if (v) *v = 0; 1308 } 1309 } 1310 *nz = nztot; 1311 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1312 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1318 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1319 { 1320 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1321 1322 PetscFunctionBegin; 1323 if (!aij->getrowactive) { 1324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1325 } 1326 aij->getrowactive = PETSC_FALSE; 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatNorm_MPIAIJ" 1332 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1333 { 1334 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1335 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1336 PetscErrorCode ierr; 1337 PetscInt i,j,cstart = mat->cmap.rstart; 1338 PetscReal sum = 0.0; 1339 PetscScalar *v; 1340 1341 PetscFunctionBegin; 1342 if (aij->size == 1) { 1343 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1344 } else { 1345 if (type == NORM_FROBENIUS) { 1346 v = amat->a; 1347 for (i=0; i<amat->nz; i++) { 1348 #if defined(PETSC_USE_COMPLEX) 1349 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1350 #else 1351 sum += (*v)*(*v); v++; 1352 #endif 1353 } 1354 v = bmat->a; 1355 for (i=0; i<bmat->nz; i++) { 1356 #if defined(PETSC_USE_COMPLEX) 1357 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1358 #else 1359 sum += (*v)*(*v); v++; 1360 #endif 1361 } 1362 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1363 *norm = sqrt(*norm); 1364 } else if (type == NORM_1) { /* max column norm */ 1365 PetscReal *tmp,*tmp2; 1366 PetscInt *jj,*garray = aij->garray; 1367 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1368 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1369 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1370 *norm = 0.0; 1371 v = amat->a; jj = amat->j; 1372 for (j=0; j<amat->nz; j++) { 1373 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1374 } 1375 v = bmat->a; jj = bmat->j; 1376 for (j=0; j<bmat->nz; j++) { 1377 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1378 } 1379 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1380 for (j=0; j<mat->cmap.N; j++) { 1381 if (tmp2[j] > *norm) *norm = tmp2[j]; 1382 } 1383 ierr = PetscFree(tmp);CHKERRQ(ierr); 1384 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1385 } else if (type == NORM_INFINITY) { /* max row norm */ 1386 PetscReal ntemp = 0.0; 1387 for (j=0; j<aij->A->rmap.n; j++) { 1388 v = amat->a + amat->i[j]; 1389 sum = 0.0; 1390 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1391 sum += PetscAbsScalar(*v); v++; 1392 } 1393 v = bmat->a + bmat->i[j]; 1394 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1395 sum += PetscAbsScalar(*v); v++; 1396 } 1397 if (sum > ntemp) ntemp = sum; 1398 } 1399 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1400 } else { 1401 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1402 } 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatTranspose_MPIAIJ" 1409 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1410 { 1411 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1412 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1413 PetscErrorCode ierr; 1414 PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz; 1415 PetscInt cstart=A->cmap.rstart,ncol; 1416 Mat B; 1417 PetscScalar *array; 1418 1419 PetscFunctionBegin; 1420 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1421 1422 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1423 ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n; 1424 ai = Aloc->i; aj = Aloc->j; 1425 bi = Bloc->i; bj = Bloc->j; 1426 ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1427 cols = d_nnz + na + 1; /* work space to be used by B part */ 1428 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1429 for (i=0; i<ai[ma]; i++){ 1430 d_nnz[aj[i]] ++; 1431 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1432 } 1433 1434 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1435 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1436 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1437 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1438 1439 /* copy over the A part */ 1440 array = Aloc->a; 1441 row = A->rmap.rstart; 1442 for (i=0; i<ma; i++) { 1443 ncol = ai[i+1]-ai[i]; 1444 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1445 row++; array += ncol; aj += ncol; 1446 } 1447 aj = Aloc->j; 1448 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1449 1450 /* copy over the B part */ 1451 array = Bloc->a; 1452 row = A->rmap.rstart; 1453 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1454 for (i=0; i<mb; i++) { 1455 ncol = bi[i+1]-bi[i]; 1456 ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1457 row++; array += ncol; cols += ncol; 1458 } 1459 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1460 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 if (matout) { 1463 *matout = B; 1464 } else { 1465 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1472 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1473 { 1474 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1475 Mat a = aij->A,b = aij->B; 1476 PetscErrorCode ierr; 1477 PetscInt s1,s2,s3; 1478 1479 PetscFunctionBegin; 1480 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1481 if (rr) { 1482 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1483 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1484 /* Overlap communication with computation. */ 1485 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 } 1487 if (ll) { 1488 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1489 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1490 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1491 } 1492 /* scale the diagonal block */ 1493 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1494 1495 if (rr) { 1496 /* Do a scatter end and then right scale the off-diagonal block */ 1497 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1499 } 1500 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1507 { 1508 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1513 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1519 { 1520 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatEqual_MPIAIJ" 1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1531 { 1532 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1533 Mat a,b,c,d; 1534 PetscTruth flg; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 a = matA->A; b = matA->B; 1539 c = matB->A; d = matB->B; 1540 1541 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1542 if (flg) { 1543 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1544 } 1545 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatCopy_MPIAIJ" 1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1552 { 1553 PetscErrorCode ierr; 1554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1555 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1556 1557 PetscFunctionBegin; 1558 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1559 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1560 /* because of the column compression in the off-processor part of the matrix a->B, 1561 the number of columns in a->B and b->B may be different, hence we cannot call 1562 the MatCopy() directly on the two parts. If need be, we can provide a more 1563 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1564 then copying the submatrices */ 1565 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1568 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1569 } 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1576 { 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #include "petscblaslapack.h" 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatAXPY_MPIAIJ" 1587 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1588 { 1589 PetscErrorCode ierr; 1590 PetscInt i; 1591 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1592 PetscBLASInt bnz,one=1; 1593 Mat_SeqAIJ *x,*y; 1594 1595 PetscFunctionBegin; 1596 if (str == SAME_NONZERO_PATTERN) { 1597 PetscScalar alpha = a; 1598 x = (Mat_SeqAIJ *)xx->A->data; 1599 y = (Mat_SeqAIJ *)yy->A->data; 1600 bnz = (PetscBLASInt)x->nz; 1601 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1602 x = (Mat_SeqAIJ *)xx->B->data; 1603 y = (Mat_SeqAIJ *)yy->B->data; 1604 bnz = (PetscBLASInt)x->nz; 1605 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1606 } else if (str == SUBSET_NONZERO_PATTERN) { 1607 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1608 1609 x = (Mat_SeqAIJ *)xx->B->data; 1610 y = (Mat_SeqAIJ *)yy->B->data; 1611 if (y->xtoy && y->XtoY != xx->B) { 1612 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1613 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1614 } 1615 if (!y->xtoy) { /* get xtoy */ 1616 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1617 y->XtoY = xx->B; 1618 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1619 } 1620 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1621 } else { 1622 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatConjugate_MPIAIJ" 1631 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1632 { 1633 #if defined(PETSC_USE_COMPLEX) 1634 PetscErrorCode ierr; 1635 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1639 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1640 #else 1641 PetscFunctionBegin; 1642 #endif 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatRealPart_MPIAIJ" 1648 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1655 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1656 PetscFunctionReturn(0); 1657 } 1658 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1661 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1662 { 1663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1664 PetscErrorCode ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1668 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #ifdef PETSC_HAVE_PBGL 1673 1674 #include <boost/parallel/mpi/bsp_process_group.hpp> 1675 #include <boost/graph/distributed/ilu_default_graph.hpp> 1676 #include <boost/graph/distributed/ilu_0_block.hpp> 1677 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1678 #include <boost/graph/distributed/petsc/interface.hpp> 1679 #include <boost/multi_array.hpp> 1680 #include <boost/parallel/distributed_property_map.hpp> 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1684 /* 1685 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1686 */ 1687 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1688 { 1689 namespace petsc = boost::distributed::petsc; 1690 1691 namespace graph_dist = boost::graph::distributed; 1692 using boost::graph::distributed::ilu_default::process_group_type; 1693 using boost::graph::ilu_permuted; 1694 1695 PetscTruth row_identity, col_identity; 1696 PetscContainer c; 1697 PetscInt m, n, M, N; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1702 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1703 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1704 if (!row_identity || !col_identity) { 1705 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1706 } 1707 1708 process_group_type pg; 1709 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1710 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1711 lgraph_type& level_graph = *lgraph_p; 1712 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1713 1714 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1715 ilu_permuted(level_graph); 1716 1717 /* put together the new matrix */ 1718 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1719 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1720 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1721 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1722 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1723 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 (*fact)->factor = FACTOR_LU; 1726 1727 ierr = PetscContainerCreate(A->comm, &c); 1728 ierr = PetscContainerSetPointer(c, lgraph_p); 1729 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1735 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1736 { 1737 PetscFunctionBegin; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatSolve_MPIAIJ" 1743 /* 1744 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1745 */ 1746 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1747 { 1748 namespace graph_dist = boost::graph::distributed; 1749 1750 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1751 lgraph_type* lgraph_p; 1752 PetscContainer c; 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1757 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1758 ierr = VecCopy(b, x); CHKERRQ(ierr); 1759 1760 PetscScalar* array_x; 1761 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1762 PetscInt sx; 1763 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1764 1765 PetscScalar* array_b; 1766 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1767 PetscInt sb; 1768 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1769 1770 lgraph_type& level_graph = *lgraph_p; 1771 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1772 1773 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1774 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1775 ref_x(array_x, boost::extents[num_vertices(graph)]); 1776 1777 typedef boost::iterator_property_map<array_ref_type::iterator, 1778 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1779 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1780 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1781 1782 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1783 1784 PetscFunctionReturn(0); 1785 } 1786 #endif 1787 1788 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 1789 PetscInt nzlocal,nsends,nrecvs; 1790 PetscMPIInt *send_rank; 1791 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 1792 PetscScalar *sbuf_a,**rbuf_a; 1793 PetscErrorCode (*MatDestroy)(Mat); 1794 } Mat_Redundant; 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 1798 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 1799 { 1800 PetscErrorCode ierr; 1801 Mat_Redundant *redund=(Mat_Redundant*)ptr; 1802 PetscInt i; 1803 1804 PetscFunctionBegin; 1805 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 1806 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1807 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1808 for (i=0; i<redund->nrecvs; i++){ 1809 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1810 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1811 } 1812 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1813 ierr = PetscFree(redund);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatDestroy_MatRedundant" 1819 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1820 { 1821 PetscErrorCode ierr; 1822 PetscContainer container; 1823 Mat_Redundant *redund=PETSC_NULL; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1827 if (container) { 1828 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1829 } else { 1830 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1831 } 1832 A->ops->destroy = redund->MatDestroy; 1833 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 1834 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 1835 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 1841 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 1842 { 1843 PetscMPIInt rank,size; 1844 MPI_Comm comm=mat->comm; 1845 PetscErrorCode ierr; 1846 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 1847 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 1848 PetscInt *rowrange=mat->rmap.range; 1849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1850 Mat A=aij->A,B=aij->B,C=*matredundant; 1851 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1852 PetscScalar *sbuf_a; 1853 PetscInt nzlocal=a->nz+b->nz; 1854 PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 1855 PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N; 1856 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 1857 PetscScalar *vals,*aworkA,*aworkB; 1858 PetscMPIInt tag1,tag2,tag3,imdex; 1859 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 1860 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 1861 MPI_Status recv_status,*send_status; 1862 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 1863 PetscInt **rbuf_j=PETSC_NULL; 1864 PetscScalar **rbuf_a=PETSC_NULL; 1865 Mat_Redundant *redund=PETSC_NULL; 1866 PetscContainer container; 1867 1868 PetscFunctionBegin; 1869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1871 1872 if (reuse == MAT_REUSE_MATRIX) { 1873 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 1874 if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 1875 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 1876 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 1877 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 1878 if (container) { 1879 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 1880 } else { 1881 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 1882 } 1883 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 1884 1885 nsends = redund->nsends; 1886 nrecvs = redund->nrecvs; 1887 send_rank = redund->send_rank; recv_rank = send_rank + size; 1888 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 1889 sbuf_j = redund->sbuf_j; 1890 sbuf_a = redund->sbuf_a; 1891 rbuf_j = redund->rbuf_j; 1892 rbuf_a = redund->rbuf_a; 1893 } 1894 1895 if (reuse == MAT_INITIAL_MATRIX){ 1896 PetscMPIInt subrank,subsize; 1897 PetscInt nleftover,np_subcomm; 1898 /* get the destination processors' id send_rank, nsends and nrecvs */ 1899 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 1900 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 1901 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 1902 recv_rank = send_rank + size; 1903 np_subcomm = size/nsubcomm; 1904 nleftover = size - nsubcomm*np_subcomm; 1905 nsends = 0; nrecvs = 0; 1906 for (i=0; i<size; i++){ /* i=rank*/ 1907 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 1908 send_rank[nsends] = i; nsends++; 1909 recv_rank[nrecvs++] = i; 1910 } 1911 } 1912 if (rank >= size - nleftover){/* this proc is a leftover processor */ 1913 i = size-nleftover-1; 1914 j = 0; 1915 while (j < nsubcomm - nleftover){ 1916 send_rank[nsends++] = i; 1917 i--; j++; 1918 } 1919 } 1920 1921 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 1922 for (i=0; i<nleftover; i++){ 1923 recv_rank[nrecvs++] = size-nleftover+i; 1924 } 1925 } 1926 1927 /* allocate sbuf_j, sbuf_a */ 1928 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 1929 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 1930 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 1931 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1932 1933 /* copy mat's local entries into the buffers */ 1934 if (reuse == MAT_INITIAL_MATRIX){ 1935 rownz_max = 0; 1936 rptr = sbuf_j; 1937 cols = sbuf_j + rend-rstart + 1; 1938 vals = sbuf_a; 1939 rptr[0] = 0; 1940 for (i=0; i<rend-rstart; i++){ 1941 row = i + rstart; 1942 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1943 ncols = nzA + nzB; 1944 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1945 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1946 /* load the column indices for this row into cols */ 1947 lwrite = 0; 1948 for (l=0; l<nzB; l++) { 1949 if ((ctmp = bmap[cworkB[l]]) < cstart){ 1950 vals[lwrite] = aworkB[l]; 1951 cols[lwrite++] = ctmp; 1952 } 1953 } 1954 for (l=0; l<nzA; l++){ 1955 vals[lwrite] = aworkA[l]; 1956 cols[lwrite++] = cstart + cworkA[l]; 1957 } 1958 for (l=0; l<nzB; l++) { 1959 if ((ctmp = bmap[cworkB[l]]) >= cend){ 1960 vals[lwrite] = aworkB[l]; 1961 cols[lwrite++] = ctmp; 1962 } 1963 } 1964 vals += ncols; 1965 cols += ncols; 1966 rptr[i+1] = rptr[i] + ncols; 1967 if (rownz_max < ncols) rownz_max = ncols; 1968 } 1969 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 1970 } else { /* only copy matrix values into sbuf_a */ 1971 rptr = sbuf_j; 1972 vals = sbuf_a; 1973 rptr[0] = 0; 1974 for (i=0; i<rend-rstart; i++){ 1975 row = i + rstart; 1976 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 1977 ncols = nzA + nzB; 1978 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 1979 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 1980 lwrite = 0; 1981 for (l=0; l<nzB; l++) { 1982 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 1983 } 1984 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 1985 for (l=0; l<nzB; l++) { 1986 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 1987 } 1988 vals += ncols; 1989 rptr[i+1] = rptr[i] + ncols; 1990 } 1991 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 1992 1993 /* send nzlocal to others, and recv other's nzlocal */ 1994 /*--------------------------------------------------*/ 1995 if (reuse == MAT_INITIAL_MATRIX){ 1996 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 1997 s_waits2 = s_waits3 + nsends; 1998 s_waits1 = s_waits2 + nsends; 1999 r_waits1 = s_waits1 + nsends; 2000 r_waits2 = r_waits1 + nrecvs; 2001 r_waits3 = r_waits2 + nrecvs; 2002 } else { 2003 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2004 r_waits3 = s_waits3 + nsends; 2005 } 2006 2007 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2008 if (reuse == MAT_INITIAL_MATRIX){ 2009 /* get new tags to keep the communication clean */ 2010 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2011 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2012 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2013 rbuf_nz = sbuf_nz + nsends; 2014 2015 /* post receives of other's nzlocal */ 2016 for (i=0; i<nrecvs; i++){ 2017 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2018 } 2019 /* send nzlocal to others */ 2020 for (i=0; i<nsends; i++){ 2021 sbuf_nz[i] = nzlocal; 2022 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2023 } 2024 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2025 count = nrecvs; 2026 while (count) { 2027 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2028 recv_rank[imdex] = recv_status.MPI_SOURCE; 2029 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2030 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2031 2032 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2033 rbuf_nz[imdex] += i + 2; 2034 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2035 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2036 count--; 2037 } 2038 /* wait on sends of nzlocal */ 2039 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2040 /* send mat->i,j to others, and recv from other's */ 2041 /*------------------------------------------------*/ 2042 for (i=0; i<nsends; i++){ 2043 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2044 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2045 } 2046 /* wait on receives of mat->i,j */ 2047 /*------------------------------*/ 2048 count = nrecvs; 2049 while (count) { 2050 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2051 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2052 count--; 2053 } 2054 /* wait on sends of mat->i,j */ 2055 /*---------------------------*/ 2056 if (nsends) { 2057 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2058 } 2059 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2060 2061 /* post receives, send and receive mat->a */ 2062 /*----------------------------------------*/ 2063 for (imdex=0; imdex<nrecvs; imdex++) { 2064 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2065 } 2066 for (i=0; i<nsends; i++){ 2067 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2068 } 2069 count = nrecvs; 2070 while (count) { 2071 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2072 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2073 count--; 2074 } 2075 if (nsends) { 2076 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2077 } 2078 2079 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2080 2081 /* create redundant matrix */ 2082 /*-------------------------*/ 2083 if (reuse == MAT_INITIAL_MATRIX){ 2084 /* compute rownz_max for preallocation */ 2085 for (imdex=0; imdex<nrecvs; imdex++){ 2086 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2087 rptr = rbuf_j[imdex]; 2088 for (i=0; i<j; i++){ 2089 ncols = rptr[i+1] - rptr[i]; 2090 if (rownz_max < ncols) rownz_max = ncols; 2091 } 2092 } 2093 2094 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2095 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2096 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2097 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2098 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2099 } else { 2100 C = *matredundant; 2101 } 2102 2103 /* insert local matrix entries */ 2104 rptr = sbuf_j; 2105 cols = sbuf_j + rend-rstart + 1; 2106 vals = sbuf_a; 2107 for (i=0; i<rend-rstart; i++){ 2108 row = i + rstart; 2109 ncols = rptr[i+1] - rptr[i]; 2110 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2111 vals += ncols; 2112 cols += ncols; 2113 } 2114 /* insert received matrix entries */ 2115 for (imdex=0; imdex<nrecvs; imdex++){ 2116 rstart = rowrange[recv_rank[imdex]]; 2117 rend = rowrange[recv_rank[imdex]+1]; 2118 rptr = rbuf_j[imdex]; 2119 cols = rbuf_j[imdex] + rend-rstart + 1; 2120 vals = rbuf_a[imdex]; 2121 for (i=0; i<rend-rstart; i++){ 2122 row = i + rstart; 2123 ncols = rptr[i+1] - rptr[i]; 2124 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2125 vals += ncols; 2126 cols += ncols; 2127 } 2128 } 2129 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2130 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2131 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2132 if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N); 2133 if (reuse == MAT_INITIAL_MATRIX){ 2134 PetscContainer container; 2135 *matredundant = C; 2136 /* create a supporting struct and attach it to C for reuse */ 2137 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2138 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2139 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2140 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2141 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2142 2143 redund->nzlocal = nzlocal; 2144 redund->nsends = nsends; 2145 redund->nrecvs = nrecvs; 2146 redund->send_rank = send_rank; 2147 redund->sbuf_nz = sbuf_nz; 2148 redund->sbuf_j = sbuf_j; 2149 redund->sbuf_a = sbuf_a; 2150 redund->rbuf_j = rbuf_j; 2151 redund->rbuf_a = rbuf_a; 2152 2153 redund->MatDestroy = C->ops->destroy; 2154 C->ops->destroy = MatDestroy_MatRedundant; 2155 } 2156 PetscFunctionReturn(0); 2157 } 2158 2159 /* -------------------------------------------------------------------*/ 2160 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2161 MatGetRow_MPIAIJ, 2162 MatRestoreRow_MPIAIJ, 2163 MatMult_MPIAIJ, 2164 /* 4*/ MatMultAdd_MPIAIJ, 2165 MatMultTranspose_MPIAIJ, 2166 MatMultTransposeAdd_MPIAIJ, 2167 #ifdef PETSC_HAVE_PBGL 2168 MatSolve_MPIAIJ, 2169 #else 2170 0, 2171 #endif 2172 0, 2173 0, 2174 /*10*/ 0, 2175 0, 2176 0, 2177 MatRelax_MPIAIJ, 2178 MatTranspose_MPIAIJ, 2179 /*15*/ MatGetInfo_MPIAIJ, 2180 MatEqual_MPIAIJ, 2181 MatGetDiagonal_MPIAIJ, 2182 MatDiagonalScale_MPIAIJ, 2183 MatNorm_MPIAIJ, 2184 /*20*/ MatAssemblyBegin_MPIAIJ, 2185 MatAssemblyEnd_MPIAIJ, 2186 0, 2187 MatSetOption_MPIAIJ, 2188 MatZeroEntries_MPIAIJ, 2189 /*25*/ MatZeroRows_MPIAIJ, 2190 0, 2191 #ifdef PETSC_HAVE_PBGL 2192 MatLUFactorNumeric_MPIAIJ, 2193 #else 2194 0, 2195 #endif 2196 0, 2197 0, 2198 /*30*/ MatSetUpPreallocation_MPIAIJ, 2199 #ifdef PETSC_HAVE_PBGL 2200 MatILUFactorSymbolic_MPIAIJ, 2201 #else 2202 0, 2203 #endif 2204 0, 2205 0, 2206 0, 2207 /*35*/ MatDuplicate_MPIAIJ, 2208 0, 2209 0, 2210 0, 2211 0, 2212 /*40*/ MatAXPY_MPIAIJ, 2213 MatGetSubMatrices_MPIAIJ, 2214 MatIncreaseOverlap_MPIAIJ, 2215 MatGetValues_MPIAIJ, 2216 MatCopy_MPIAIJ, 2217 /*45*/ 0, 2218 MatScale_MPIAIJ, 2219 0, 2220 0, 2221 0, 2222 /*50*/ MatSetBlockSize_MPIAIJ, 2223 0, 2224 0, 2225 0, 2226 0, 2227 /*55*/ MatFDColoringCreate_MPIAIJ, 2228 0, 2229 MatSetUnfactored_MPIAIJ, 2230 MatPermute_MPIAIJ, 2231 0, 2232 /*60*/ MatGetSubMatrix_MPIAIJ, 2233 MatDestroy_MPIAIJ, 2234 MatView_MPIAIJ, 2235 0, 2236 0, 2237 /*65*/ 0, 2238 0, 2239 0, 2240 0, 2241 0, 2242 /*70*/ 0, 2243 0, 2244 MatSetColoring_MPIAIJ, 2245 #if defined(PETSC_HAVE_ADIC) 2246 MatSetValuesAdic_MPIAIJ, 2247 #else 2248 0, 2249 #endif 2250 MatSetValuesAdifor_MPIAIJ, 2251 /*75*/ 0, 2252 0, 2253 0, 2254 0, 2255 0, 2256 /*80*/ 0, 2257 0, 2258 0, 2259 0, 2260 /*84*/ MatLoad_MPIAIJ, 2261 0, 2262 0, 2263 0, 2264 0, 2265 0, 2266 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 2267 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2268 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2269 MatPtAP_Basic, 2270 MatPtAPSymbolic_MPIAIJ, 2271 /*95*/ MatPtAPNumeric_MPIAIJ, 2272 0, 2273 0, 2274 0, 2275 0, 2276 /*100*/0, 2277 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2278 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2279 MatConjugate_MPIAIJ, 2280 0, 2281 /*105*/MatSetValuesRow_MPIAIJ, 2282 MatRealPart_MPIAIJ, 2283 MatImaginaryPart_MPIAIJ, 2284 0, 2285 0, 2286 /*110*/0, 2287 MatGetRedundantMatrix_MPIAIJ}; 2288 2289 /* ----------------------------------------------------------------------------------------*/ 2290 2291 EXTERN_C_BEGIN 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2294 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2295 { 2296 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2301 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 EXTERN_C_END 2305 2306 EXTERN_C_BEGIN 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2309 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2310 { 2311 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2316 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 EXTERN_C_END 2320 2321 #include "petscpc.h" 2322 EXTERN_C_BEGIN 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2325 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2326 { 2327 Mat_MPIAIJ *b; 2328 PetscErrorCode ierr; 2329 PetscInt i; 2330 2331 PetscFunctionBegin; 2332 B->preallocated = PETSC_TRUE; 2333 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2334 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2335 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2336 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2337 2338 B->rmap.bs = B->cmap.bs = 1; 2339 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2340 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2341 if (d_nnz) { 2342 for (i=0; i<B->rmap.n; i++) { 2343 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2344 } 2345 } 2346 if (o_nnz) { 2347 for (i=0; i<B->rmap.n; i++) { 2348 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2349 } 2350 } 2351 b = (Mat_MPIAIJ*)B->data; 2352 2353 /* Explicitly create 2 MATSEQAIJ matrices. */ 2354 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2355 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2356 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2357 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2358 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2359 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2360 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2361 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2362 2363 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2364 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2365 2366 PetscFunctionReturn(0); 2367 } 2368 EXTERN_C_END 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2372 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2373 { 2374 Mat mat; 2375 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 *newmat = 0; 2380 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2381 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2382 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2383 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2384 a = (Mat_MPIAIJ*)mat->data; 2385 2386 mat->factor = matin->factor; 2387 mat->rmap.bs = matin->rmap.bs; 2388 mat->assembled = PETSC_TRUE; 2389 mat->insertmode = NOT_SET_VALUES; 2390 mat->preallocated = PETSC_TRUE; 2391 2392 a->size = oldmat->size; 2393 a->rank = oldmat->rank; 2394 a->donotstash = oldmat->donotstash; 2395 a->roworiented = oldmat->roworiented; 2396 a->rowindices = 0; 2397 a->rowvalues = 0; 2398 a->getrowactive = PETSC_FALSE; 2399 2400 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2401 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2402 2403 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2404 if (oldmat->colmap) { 2405 #if defined (PETSC_USE_CTABLE) 2406 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2407 #else 2408 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2409 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2410 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2411 #endif 2412 } else a->colmap = 0; 2413 if (oldmat->garray) { 2414 PetscInt len; 2415 len = oldmat->B->cmap.n; 2416 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2417 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2418 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2419 } else a->garray = 0; 2420 2421 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2422 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2423 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2424 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2425 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2426 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2427 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2428 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2429 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2430 *newmat = mat; 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #include "petscsys.h" 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatLoad_MPIAIJ" 2438 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2439 { 2440 Mat A; 2441 PetscScalar *vals,*svals; 2442 MPI_Comm comm = ((PetscObject)viewer)->comm; 2443 MPI_Status status; 2444 PetscErrorCode ierr; 2445 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2446 PetscInt i,nz,j,rstart,rend,mmax; 2447 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2448 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2449 PetscInt cend,cstart,n,*rowners; 2450 int fd; 2451 2452 PetscFunctionBegin; 2453 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2454 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2455 if (!rank) { 2456 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2457 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2458 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2459 } 2460 2461 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2462 M = header[1]; N = header[2]; 2463 /* determine ownership of all rows */ 2464 m = M/size + ((M % size) > rank); 2465 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2466 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2467 2468 /* First process needs enough room for process with most rows */ 2469 if (!rank) { 2470 mmax = rowners[1]; 2471 for (i=2; i<size; i++) { 2472 mmax = PetscMax(mmax,rowners[i]); 2473 } 2474 } else mmax = m; 2475 2476 rowners[0] = 0; 2477 for (i=2; i<=size; i++) { 2478 rowners[i] += rowners[i-1]; 2479 } 2480 rstart = rowners[rank]; 2481 rend = rowners[rank+1]; 2482 2483 /* distribute row lengths to all processors */ 2484 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2485 if (!rank) { 2486 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2487 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2488 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2489 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2490 for (j=0; j<m; j++) { 2491 procsnz[0] += ourlens[j]; 2492 } 2493 for (i=1; i<size; i++) { 2494 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2495 /* calculate the number of nonzeros on each processor */ 2496 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2497 procsnz[i] += rowlengths[j]; 2498 } 2499 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2500 } 2501 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2502 } else { 2503 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2504 } 2505 2506 if (!rank) { 2507 /* determine max buffer needed and allocate it */ 2508 maxnz = 0; 2509 for (i=0; i<size; i++) { 2510 maxnz = PetscMax(maxnz,procsnz[i]); 2511 } 2512 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2513 2514 /* read in my part of the matrix column indices */ 2515 nz = procsnz[0]; 2516 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2517 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2518 2519 /* read in every one elses and ship off */ 2520 for (i=1; i<size; i++) { 2521 nz = procsnz[i]; 2522 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2523 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2524 } 2525 ierr = PetscFree(cols);CHKERRQ(ierr); 2526 } else { 2527 /* determine buffer space needed for message */ 2528 nz = 0; 2529 for (i=0; i<m; i++) { 2530 nz += ourlens[i]; 2531 } 2532 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2533 2534 /* receive message of column indices*/ 2535 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2536 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2537 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2538 } 2539 2540 /* determine column ownership if matrix is not square */ 2541 if (N != M) { 2542 n = N/size + ((N % size) > rank); 2543 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2544 cstart = cend - n; 2545 } else { 2546 cstart = rstart; 2547 cend = rend; 2548 n = cend - cstart; 2549 } 2550 2551 /* loop over local rows, determining number of off diagonal entries */ 2552 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2553 jj = 0; 2554 for (i=0; i<m; i++) { 2555 for (j=0; j<ourlens[i]; j++) { 2556 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2557 jj++; 2558 } 2559 } 2560 2561 /* create our matrix */ 2562 for (i=0; i<m; i++) { 2563 ourlens[i] -= offlens[i]; 2564 } 2565 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2566 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2567 ierr = MatSetType(A,type);CHKERRQ(ierr); 2568 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2569 2570 for (i=0; i<m; i++) { 2571 ourlens[i] += offlens[i]; 2572 } 2573 2574 if (!rank) { 2575 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2576 2577 /* read in my part of the matrix numerical values */ 2578 nz = procsnz[0]; 2579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2580 2581 /* insert into matrix */ 2582 jj = rstart; 2583 smycols = mycols; 2584 svals = vals; 2585 for (i=0; i<m; i++) { 2586 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2587 smycols += ourlens[i]; 2588 svals += ourlens[i]; 2589 jj++; 2590 } 2591 2592 /* read in other processors and ship out */ 2593 for (i=1; i<size; i++) { 2594 nz = procsnz[i]; 2595 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2596 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2597 } 2598 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2599 } else { 2600 /* receive numeric values */ 2601 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2602 2603 /* receive message of values*/ 2604 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2605 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2606 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2607 2608 /* insert into matrix */ 2609 jj = rstart; 2610 smycols = mycols; 2611 svals = vals; 2612 for (i=0; i<m; i++) { 2613 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2614 smycols += ourlens[i]; 2615 svals += ourlens[i]; 2616 jj++; 2617 } 2618 } 2619 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2620 ierr = PetscFree(vals);CHKERRQ(ierr); 2621 ierr = PetscFree(mycols);CHKERRQ(ierr); 2622 ierr = PetscFree(rowners);CHKERRQ(ierr); 2623 2624 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2625 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2626 *newmat = A; 2627 PetscFunctionReturn(0); 2628 } 2629 2630 #undef __FUNCT__ 2631 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2632 /* 2633 Not great since it makes two copies of the submatrix, first an SeqAIJ 2634 in local and then by concatenating the local matrices the end result. 2635 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2636 */ 2637 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2638 { 2639 PetscErrorCode ierr; 2640 PetscMPIInt rank,size; 2641 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2642 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2643 Mat *local,M,Mreuse; 2644 PetscScalar *vwork,*aa; 2645 MPI_Comm comm = mat->comm; 2646 Mat_SeqAIJ *aij; 2647 2648 2649 PetscFunctionBegin; 2650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2652 2653 if (call == MAT_REUSE_MATRIX) { 2654 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2655 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2656 local = &Mreuse; 2657 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2658 } else { 2659 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2660 Mreuse = *local; 2661 ierr = PetscFree(local);CHKERRQ(ierr); 2662 } 2663 2664 /* 2665 m - number of local rows 2666 n - number of columns (same on all processors) 2667 rstart - first row in new global matrix generated 2668 */ 2669 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2670 if (call == MAT_INITIAL_MATRIX) { 2671 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2672 ii = aij->i; 2673 jj = aij->j; 2674 2675 /* 2676 Determine the number of non-zeros in the diagonal and off-diagonal 2677 portions of the matrix in order to do correct preallocation 2678 */ 2679 2680 /* first get start and end of "diagonal" columns */ 2681 if (csize == PETSC_DECIDE) { 2682 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2683 if (mglobal == n) { /* square matrix */ 2684 nlocal = m; 2685 } else { 2686 nlocal = n/size + ((n % size) > rank); 2687 } 2688 } else { 2689 nlocal = csize; 2690 } 2691 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2692 rstart = rend - nlocal; 2693 if (rank == size - 1 && rend != n) { 2694 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2695 } 2696 2697 /* next, compute all the lengths */ 2698 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2699 olens = dlens + m; 2700 for (i=0; i<m; i++) { 2701 jend = ii[i+1] - ii[i]; 2702 olen = 0; 2703 dlen = 0; 2704 for (j=0; j<jend; j++) { 2705 if (*jj < rstart || *jj >= rend) olen++; 2706 else dlen++; 2707 jj++; 2708 } 2709 olens[i] = olen; 2710 dlens[i] = dlen; 2711 } 2712 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2713 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2714 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2715 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2716 ierr = PetscFree(dlens);CHKERRQ(ierr); 2717 } else { 2718 PetscInt ml,nl; 2719 2720 M = *newmat; 2721 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2722 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2723 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2724 /* 2725 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2726 rather than the slower MatSetValues(). 2727 */ 2728 M->was_assembled = PETSC_TRUE; 2729 M->assembled = PETSC_FALSE; 2730 } 2731 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2732 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2733 ii = aij->i; 2734 jj = aij->j; 2735 aa = aij->a; 2736 for (i=0; i<m; i++) { 2737 row = rstart + i; 2738 nz = ii[i+1] - ii[i]; 2739 cwork = jj; jj += nz; 2740 vwork = aa; aa += nz; 2741 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2742 } 2743 2744 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 *newmat = M; 2747 2748 /* save submatrix used in processor for next request */ 2749 if (call == MAT_INITIAL_MATRIX) { 2750 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2751 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2752 } 2753 2754 PetscFunctionReturn(0); 2755 } 2756 2757 EXTERN_C_BEGIN 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2761 { 2762 PetscInt m,cstart, cend,j,nnz,i,d; 2763 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2764 const PetscInt *JJ; 2765 PetscScalar *values; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2770 2771 B->rmap.bs = B->cmap.bs = 1; 2772 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2773 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2774 m = B->rmap.n; 2775 cstart = B->cmap.rstart; 2776 cend = B->cmap.rend; 2777 rstart = B->rmap.rstart; 2778 2779 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2780 o_nnz = d_nnz + m; 2781 2782 for (i=0; i<m; i++) { 2783 nnz = Ii[i+1]- Ii[i]; 2784 JJ = J + Ii[i]; 2785 nnz_max = PetscMax(nnz_max,nnz); 2786 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2787 for (j=0; j<nnz; j++) { 2788 if (*JJ >= cstart) break; 2789 JJ++; 2790 } 2791 d = 0; 2792 for (; j<nnz; j++) { 2793 if (*JJ++ >= cend) break; 2794 d++; 2795 } 2796 d_nnz[i] = d; 2797 o_nnz[i] = nnz - d; 2798 } 2799 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2800 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2801 2802 if (v) values = (PetscScalar*)v; 2803 else { 2804 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2805 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2806 } 2807 2808 for (i=0; i<m; i++) { 2809 ii = i + rstart; 2810 nnz = Ii[i+1]- Ii[i]; 2811 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 2816 if (!v) { 2817 ierr = PetscFree(values);CHKERRQ(ierr); 2818 } 2819 PetscFunctionReturn(0); 2820 } 2821 EXTERN_C_END 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2825 /*@ 2826 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2827 (the default parallel PETSc format). 2828 2829 Collective on MPI_Comm 2830 2831 Input Parameters: 2832 + B - the matrix 2833 . i - the indices into j for the start of each local row (starts with zero) 2834 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2835 - v - optional values in the matrix 2836 2837 Level: developer 2838 2839 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2840 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2841 2842 .keywords: matrix, aij, compressed row, sparse, parallel 2843 2844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2845 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2846 @*/ 2847 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2848 { 2849 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2850 2851 PetscFunctionBegin; 2852 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2853 if (f) { 2854 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2855 } 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2861 /*@C 2862 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2863 (the default parallel PETSc format). For good matrix assembly performance 2864 the user should preallocate the matrix storage by setting the parameters 2865 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2866 performance can be increased by more than a factor of 50. 2867 2868 Collective on MPI_Comm 2869 2870 Input Parameters: 2871 + A - the matrix 2872 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2873 (same value is used for all local rows) 2874 . d_nnz - array containing the number of nonzeros in the various rows of the 2875 DIAGONAL portion of the local submatrix (possibly different for each row) 2876 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2877 The size of this array is equal to the number of local rows, i.e 'm'. 2878 You must leave room for the diagonal entry even if it is zero. 2879 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2880 submatrix (same value is used for all local rows). 2881 - o_nnz - array containing the number of nonzeros in the various rows of the 2882 OFF-DIAGONAL portion of the local submatrix (possibly different for 2883 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2884 structure. The size of this array is equal to the number 2885 of local rows, i.e 'm'. 2886 2887 If the *_nnz parameter is given then the *_nz parameter is ignored 2888 2889 The AIJ format (also called the Yale sparse matrix format or 2890 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2891 storage. The stored row and column indices begin with zero. See the users manual for details. 2892 2893 The parallel matrix is partitioned such that the first m0 rows belong to 2894 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2895 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2896 2897 The DIAGONAL portion of the local submatrix of a processor can be defined 2898 as the submatrix which is obtained by extraction the part corresponding 2899 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2900 first row that belongs to the processor, and r2 is the last row belonging 2901 to the this processor. This is a square mxm matrix. The remaining portion 2902 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2903 2904 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2905 2906 Example usage: 2907 2908 Consider the following 8x8 matrix with 34 non-zero values, that is 2909 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2910 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2911 as follows: 2912 2913 .vb 2914 1 2 0 | 0 3 0 | 0 4 2915 Proc0 0 5 6 | 7 0 0 | 8 0 2916 9 0 10 | 11 0 0 | 12 0 2917 ------------------------------------- 2918 13 0 14 | 15 16 17 | 0 0 2919 Proc1 0 18 0 | 19 20 21 | 0 0 2920 0 0 0 | 22 23 0 | 24 0 2921 ------------------------------------- 2922 Proc2 25 26 27 | 0 0 28 | 29 0 2923 30 0 0 | 31 32 33 | 0 34 2924 .ve 2925 2926 This can be represented as a collection of submatrices as: 2927 2928 .vb 2929 A B C 2930 D E F 2931 G H I 2932 .ve 2933 2934 Where the submatrices A,B,C are owned by proc0, D,E,F are 2935 owned by proc1, G,H,I are owned by proc2. 2936 2937 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2938 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2939 The 'M','N' parameters are 8,8, and have the same values on all procs. 2940 2941 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2942 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2943 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2944 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2945 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2946 matrix, ans [DF] as another SeqAIJ matrix. 2947 2948 When d_nz, o_nz parameters are specified, d_nz storage elements are 2949 allocated for every row of the local diagonal submatrix, and o_nz 2950 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2951 One way to choose d_nz and o_nz is to use the max nonzerors per local 2952 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2953 In this case, the values of d_nz,o_nz are: 2954 .vb 2955 proc0 : dnz = 2, o_nz = 2 2956 proc1 : dnz = 3, o_nz = 2 2957 proc2 : dnz = 1, o_nz = 4 2958 .ve 2959 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2960 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2961 for proc3. i.e we are using 12+15+10=37 storage locations to store 2962 34 values. 2963 2964 When d_nnz, o_nnz parameters are specified, the storage is specified 2965 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2966 In the above case the values for d_nnz,o_nnz are: 2967 .vb 2968 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2969 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2970 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2971 .ve 2972 Here the space allocated is sum of all the above values i.e 34, and 2973 hence pre-allocation is perfect. 2974 2975 Level: intermediate 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2980 MPIAIJ 2981 @*/ 2982 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2983 { 2984 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2985 2986 PetscFunctionBegin; 2987 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2988 if (f) { 2989 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2996 /*@C 2997 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2998 CSR format the local rows. 2999 3000 Collective on MPI_Comm 3001 3002 Input Parameters: 3003 + comm - MPI communicator 3004 . m - number of local rows (Cannot be PETSC_DECIDE) 3005 . n - This value should be the same as the local size used in creating the 3006 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3007 calculated if N is given) For square matrices n is almost always m. 3008 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3009 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3010 . i - row indices 3011 . j - column indices 3012 - a - matrix values 3013 3014 Output Parameter: 3015 . mat - the matrix 3016 3017 Level: intermediate 3018 3019 Notes: 3020 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3021 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3022 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3023 3024 The i and j indices are 0 based 3025 3026 .keywords: matrix, aij, compressed row, sparse, parallel 3027 3028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3029 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3030 @*/ 3031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3032 { 3033 PetscErrorCode ierr; 3034 3035 PetscFunctionBegin; 3036 if (i[0]) { 3037 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3038 } 3039 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3040 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3041 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3042 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatCreateMPIAIJ" 3048 /*@C 3049 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3050 (the default parallel PETSc format). For good matrix assembly performance 3051 the user should preallocate the matrix storage by setting the parameters 3052 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3053 performance can be increased by more than a factor of 50. 3054 3055 Collective on MPI_Comm 3056 3057 Input Parameters: 3058 + comm - MPI communicator 3059 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3060 This value should be the same as the local size used in creating the 3061 y vector for the matrix-vector product y = Ax. 3062 . n - This value should be the same as the local size used in creating the 3063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3064 calculated if N is given) For square matrices n is almost always m. 3065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3067 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3068 (same value is used for all local rows) 3069 . d_nnz - array containing the number of nonzeros in the various rows of the 3070 DIAGONAL portion of the local submatrix (possibly different for each row) 3071 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3072 The size of this array is equal to the number of local rows, i.e 'm'. 3073 You must leave room for the diagonal entry even if it is zero. 3074 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3075 submatrix (same value is used for all local rows). 3076 - o_nnz - array containing the number of nonzeros in the various rows of the 3077 OFF-DIAGONAL portion of the local submatrix (possibly different for 3078 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3079 structure. The size of this array is equal to the number 3080 of local rows, i.e 'm'. 3081 3082 Output Parameter: 3083 . A - the matrix 3084 3085 Notes: 3086 If the *_nnz parameter is given then the *_nz parameter is ignored 3087 3088 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3089 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3090 storage requirements for this matrix. 3091 3092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3093 processor than it must be used on all processors that share the object for 3094 that argument. 3095 3096 The user MUST specify either the local or global matrix dimensions 3097 (possibly both). 3098 3099 The parallel matrix is partitioned across processors such that the 3100 first m0 rows belong to process 0, the next m1 rows belong to 3101 process 1, the next m2 rows belong to process 2 etc.. where 3102 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3103 values corresponding to [m x N] submatrix. 3104 3105 The columns are logically partitioned with the n0 columns belonging 3106 to 0th partition, the next n1 columns belonging to the next 3107 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3108 3109 The DIAGONAL portion of the local submatrix on any given processor 3110 is the submatrix corresponding to the rows and columns m,n 3111 corresponding to the given processor. i.e diagonal matrix on 3112 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3113 etc. The remaining portion of the local submatrix [m x (N-n)] 3114 constitute the OFF-DIAGONAL portion. The example below better 3115 illustrates this concept. 3116 3117 For a square global matrix we define each processor's diagonal portion 3118 to be its local rows and the corresponding columns (a square submatrix); 3119 each processor's off-diagonal portion encompasses the remainder of the 3120 local matrix (a rectangular submatrix). 3121 3122 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3123 3124 When calling this routine with a single process communicator, a matrix of 3125 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3126 type of communicator, use the construction mechanism: 3127 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 3128 3129 By default, this format uses inodes (identical nodes) when possible. 3130 We search for consecutive rows with the same nonzero structure, thereby 3131 reusing matrix information to achieve increased efficiency. 3132 3133 Options Database Keys: 3134 + -mat_no_inode - Do not use inodes 3135 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3136 - -mat_aij_oneindex - Internally use indexing starting at 1 3137 rather than 0. Note that when calling MatSetValues(), 3138 the user still MUST index entries starting at 0! 3139 3140 3141 Example usage: 3142 3143 Consider the following 8x8 matrix with 34 non-zero values, that is 3144 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3145 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3146 as follows: 3147 3148 .vb 3149 1 2 0 | 0 3 0 | 0 4 3150 Proc0 0 5 6 | 7 0 0 | 8 0 3151 9 0 10 | 11 0 0 | 12 0 3152 ------------------------------------- 3153 13 0 14 | 15 16 17 | 0 0 3154 Proc1 0 18 0 | 19 20 21 | 0 0 3155 0 0 0 | 22 23 0 | 24 0 3156 ------------------------------------- 3157 Proc2 25 26 27 | 0 0 28 | 29 0 3158 30 0 0 | 31 32 33 | 0 34 3159 .ve 3160 3161 This can be represented as a collection of submatrices as: 3162 3163 .vb 3164 A B C 3165 D E F 3166 G H I 3167 .ve 3168 3169 Where the submatrices A,B,C are owned by proc0, D,E,F are 3170 owned by proc1, G,H,I are owned by proc2. 3171 3172 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3173 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3174 The 'M','N' parameters are 8,8, and have the same values on all procs. 3175 3176 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3177 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3178 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3179 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3180 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3181 matrix, ans [DF] as another SeqAIJ matrix. 3182 3183 When d_nz, o_nz parameters are specified, d_nz storage elements are 3184 allocated for every row of the local diagonal submatrix, and o_nz 3185 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3186 One way to choose d_nz and o_nz is to use the max nonzerors per local 3187 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3188 In this case, the values of d_nz,o_nz are: 3189 .vb 3190 proc0 : dnz = 2, o_nz = 2 3191 proc1 : dnz = 3, o_nz = 2 3192 proc2 : dnz = 1, o_nz = 4 3193 .ve 3194 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3195 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3196 for proc3. i.e we are using 12+15+10=37 storage locations to store 3197 34 values. 3198 3199 When d_nnz, o_nnz parameters are specified, the storage is specified 3200 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3201 In the above case the values for d_nnz,o_nnz are: 3202 .vb 3203 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3204 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3205 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3206 .ve 3207 Here the space allocated is sum of all the above values i.e 34, and 3208 hence pre-allocation is perfect. 3209 3210 Level: intermediate 3211 3212 .keywords: matrix, aij, compressed row, sparse, parallel 3213 3214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3215 MPIAIJ, MatCreateMPIAIJWithArrays() 3216 @*/ 3217 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3218 { 3219 PetscErrorCode ierr; 3220 PetscMPIInt size; 3221 3222 PetscFunctionBegin; 3223 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3224 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3225 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3226 if (size > 1) { 3227 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3228 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3229 } else { 3230 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3231 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3232 } 3233 PetscFunctionReturn(0); 3234 } 3235 3236 #undef __FUNCT__ 3237 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3238 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3239 { 3240 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3241 3242 PetscFunctionBegin; 3243 *Ad = a->A; 3244 *Ao = a->B; 3245 *colmap = a->garray; 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3251 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3252 { 3253 PetscErrorCode ierr; 3254 PetscInt i; 3255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3256 3257 PetscFunctionBegin; 3258 if (coloring->ctype == IS_COLORING_GLOBAL) { 3259 ISColoringValue *allcolors,*colors; 3260 ISColoring ocoloring; 3261 3262 /* set coloring for diagonal portion */ 3263 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3264 3265 /* set coloring for off-diagonal portion */ 3266 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3267 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3268 for (i=0; i<a->B->cmap.n; i++) { 3269 colors[i] = allcolors[a->garray[i]]; 3270 } 3271 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3272 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3273 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3274 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3275 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3276 ISColoringValue *colors; 3277 PetscInt *larray; 3278 ISColoring ocoloring; 3279 3280 /* set coloring for diagonal portion */ 3281 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3282 for (i=0; i<a->A->cmap.n; i++) { 3283 larray[i] = i + A->cmap.rstart; 3284 } 3285 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3286 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3287 for (i=0; i<a->A->cmap.n; i++) { 3288 colors[i] = coloring->colors[larray[i]]; 3289 } 3290 ierr = PetscFree(larray);CHKERRQ(ierr); 3291 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3292 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3293 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3294 3295 /* set coloring for off-diagonal portion */ 3296 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3297 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3298 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3299 for (i=0; i<a->B->cmap.n; i++) { 3300 colors[i] = coloring->colors[larray[i]]; 3301 } 3302 ierr = PetscFree(larray);CHKERRQ(ierr); 3303 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 3304 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3305 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3306 } else { 3307 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3308 } 3309 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #if defined(PETSC_HAVE_ADIC) 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3316 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3317 { 3318 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3319 PetscErrorCode ierr; 3320 3321 PetscFunctionBegin; 3322 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3323 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 #endif 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3330 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3331 { 3332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3333 PetscErrorCode ierr; 3334 3335 PetscFunctionBegin; 3336 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3337 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatMerge" 3343 /*@C 3344 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3345 matrices from each processor 3346 3347 Collective on MPI_Comm 3348 3349 Input Parameters: 3350 + comm - the communicators the parallel matrix will live on 3351 . inmat - the input sequential matrices 3352 . n - number of local columns (or PETSC_DECIDE) 3353 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3354 3355 Output Parameter: 3356 . outmat - the parallel matrix generated 3357 3358 Level: advanced 3359 3360 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3361 3362 @*/ 3363 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3364 { 3365 PetscErrorCode ierr; 3366 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3367 PetscInt *indx; 3368 PetscScalar *values; 3369 3370 PetscFunctionBegin; 3371 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3372 if (scall == MAT_INITIAL_MATRIX){ 3373 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3374 if (n == PETSC_DECIDE){ 3375 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3376 } 3377 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3378 rstart -= m; 3379 3380 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3381 for (i=0;i<m;i++) { 3382 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3383 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3384 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3385 } 3386 /* This routine will ONLY return MPIAIJ type matrix */ 3387 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3388 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3389 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3390 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3391 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3392 3393 } else if (scall == MAT_REUSE_MATRIX){ 3394 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3395 } else { 3396 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3397 } 3398 3399 for (i=0;i<m;i++) { 3400 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3401 Ii = i + rstart; 3402 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3403 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3404 } 3405 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3406 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3407 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3408 3409 PetscFunctionReturn(0); 3410 } 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatFileSplit" 3414 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3415 { 3416 PetscErrorCode ierr; 3417 PetscMPIInt rank; 3418 PetscInt m,N,i,rstart,nnz; 3419 size_t len; 3420 const PetscInt *indx; 3421 PetscViewer out; 3422 char *name; 3423 Mat B; 3424 const PetscScalar *values; 3425 3426 PetscFunctionBegin; 3427 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3428 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3429 /* Should this be the type of the diagonal block of A? */ 3430 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3431 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3432 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3433 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3434 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3435 for (i=0;i<m;i++) { 3436 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3437 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3438 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3439 } 3440 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 3443 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3444 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3445 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3446 sprintf(name,"%s.%d",outfile,rank); 3447 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3448 ierr = PetscFree(name); 3449 ierr = MatView(B,out);CHKERRQ(ierr); 3450 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3451 ierr = MatDestroy(B);CHKERRQ(ierr); 3452 PetscFunctionReturn(0); 3453 } 3454 3455 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3456 #undef __FUNCT__ 3457 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3458 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3459 { 3460 PetscErrorCode ierr; 3461 Mat_Merge_SeqsToMPI *merge; 3462 PetscContainer container; 3463 3464 PetscFunctionBegin; 3465 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3466 if (container) { 3467 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3468 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3469 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3470 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3471 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3472 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3473 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3474 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3475 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3476 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3477 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3478 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3479 3480 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3481 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3482 } 3483 ierr = PetscFree(merge);CHKERRQ(ierr); 3484 3485 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 3489 #include "src/mat/utils/freespace.h" 3490 #include "petscbt.h" 3491 static PetscEvent logkey_seqstompinum = 0; 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3494 /*@C 3495 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3496 matrices from each processor 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - the communicators the parallel matrix will live on 3502 . seqmat - the input sequential matrices 3503 . m - number of local rows (or PETSC_DECIDE) 3504 . n - number of local columns (or PETSC_DECIDE) 3505 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3506 3507 Output Parameter: 3508 . mpimat - the parallel matrix generated 3509 3510 Level: advanced 3511 3512 Notes: 3513 The dimensions of the sequential matrix in each processor MUST be the same. 3514 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3515 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3516 @*/ 3517 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3518 { 3519 PetscErrorCode ierr; 3520 MPI_Comm comm=mpimat->comm; 3521 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3522 PetscMPIInt size,rank,taga,*len_s; 3523 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3524 PetscInt proc,m; 3525 PetscInt **buf_ri,**buf_rj; 3526 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3527 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3528 MPI_Request *s_waits,*r_waits; 3529 MPI_Status *status; 3530 MatScalar *aa=a->a,**abuf_r,*ba_i; 3531 Mat_Merge_SeqsToMPI *merge; 3532 PetscContainer container; 3533 3534 PetscFunctionBegin; 3535 if (!logkey_seqstompinum) { 3536 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3537 } 3538 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3539 3540 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3541 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3542 3543 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3544 if (container) { 3545 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3546 } 3547 bi = merge->bi; 3548 bj = merge->bj; 3549 buf_ri = merge->buf_ri; 3550 buf_rj = merge->buf_rj; 3551 3552 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3553 owners = merge->rowmap.range; 3554 len_s = merge->len_s; 3555 3556 /* send and recv matrix values */ 3557 /*-----------------------------*/ 3558 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3559 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3560 3561 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3562 for (proc=0,k=0; proc<size; proc++){ 3563 if (!len_s[proc]) continue; 3564 i = owners[proc]; 3565 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3566 k++; 3567 } 3568 3569 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3570 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3571 ierr = PetscFree(status);CHKERRQ(ierr); 3572 3573 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3574 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3575 3576 /* insert mat values of mpimat */ 3577 /*----------------------------*/ 3578 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3579 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3580 nextrow = buf_ri_k + merge->nrecv; 3581 nextai = nextrow + merge->nrecv; 3582 3583 for (k=0; k<merge->nrecv; k++){ 3584 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3585 nrows = *(buf_ri_k[k]); 3586 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3587 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3588 } 3589 3590 /* set values of ba */ 3591 m = merge->rowmap.n; 3592 for (i=0; i<m; i++) { 3593 arow = owners[rank] + i; 3594 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3595 bnzi = bi[i+1] - bi[i]; 3596 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3597 3598 /* add local non-zero vals of this proc's seqmat into ba */ 3599 anzi = ai[arow+1] - ai[arow]; 3600 aj = a->j + ai[arow]; 3601 aa = a->a + ai[arow]; 3602 nextaj = 0; 3603 for (j=0; nextaj<anzi; j++){ 3604 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3605 ba_i[j] += aa[nextaj++]; 3606 } 3607 } 3608 3609 /* add received vals into ba */ 3610 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3611 /* i-th row */ 3612 if (i == *nextrow[k]) { 3613 anzi = *(nextai[k]+1) - *nextai[k]; 3614 aj = buf_rj[k] + *(nextai[k]); 3615 aa = abuf_r[k] + *(nextai[k]); 3616 nextaj = 0; 3617 for (j=0; nextaj<anzi; j++){ 3618 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3619 ba_i[j] += aa[nextaj++]; 3620 } 3621 } 3622 nextrow[k]++; nextai[k]++; 3623 } 3624 } 3625 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3626 } 3627 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3628 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3629 3630 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3631 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3632 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3633 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3634 PetscFunctionReturn(0); 3635 } 3636 3637 static PetscEvent logkey_seqstompisym = 0; 3638 #undef __FUNCT__ 3639 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3641 { 3642 PetscErrorCode ierr; 3643 Mat B_mpi; 3644 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3645 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3646 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3647 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3648 PetscInt len,proc,*dnz,*onz; 3649 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3650 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3651 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3652 MPI_Status *status; 3653 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3654 PetscBT lnkbt; 3655 Mat_Merge_SeqsToMPI *merge; 3656 PetscContainer container; 3657 3658 PetscFunctionBegin; 3659 if (!logkey_seqstompisym) { 3660 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3661 } 3662 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3663 3664 /* make sure it is a PETSc comm */ 3665 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3666 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3668 3669 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3670 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3671 3672 /* determine row ownership */ 3673 /*---------------------------------------------------------*/ 3674 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3675 merge->rowmap.n = m; 3676 merge->rowmap.N = M; 3677 merge->rowmap.bs = 1; 3678 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 3679 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3680 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3681 3682 m = merge->rowmap.n; 3683 M = merge->rowmap.N; 3684 owners = merge->rowmap.range; 3685 3686 /* determine the number of messages to send, their lengths */ 3687 /*---------------------------------------------------------*/ 3688 len_s = merge->len_s; 3689 3690 len = 0; /* length of buf_si[] */ 3691 merge->nsend = 0; 3692 for (proc=0; proc<size; proc++){ 3693 len_si[proc] = 0; 3694 if (proc == rank){ 3695 len_s[proc] = 0; 3696 } else { 3697 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3698 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3699 } 3700 if (len_s[proc]) { 3701 merge->nsend++; 3702 nrows = 0; 3703 for (i=owners[proc]; i<owners[proc+1]; i++){ 3704 if (ai[i+1] > ai[i]) nrows++; 3705 } 3706 len_si[proc] = 2*(nrows+1); 3707 len += len_si[proc]; 3708 } 3709 } 3710 3711 /* determine the number and length of messages to receive for ij-structure */ 3712 /*-------------------------------------------------------------------------*/ 3713 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3714 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3715 3716 /* post the Irecv of j-structure */ 3717 /*-------------------------------*/ 3718 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3719 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3720 3721 /* post the Isend of j-structure */ 3722 /*--------------------------------*/ 3723 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3724 sj_waits = si_waits + merge->nsend; 3725 3726 for (proc=0, k=0; proc<size; proc++){ 3727 if (!len_s[proc]) continue; 3728 i = owners[proc]; 3729 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3730 k++; 3731 } 3732 3733 /* receives and sends of j-structure are complete */ 3734 /*------------------------------------------------*/ 3735 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3736 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3737 3738 /* send and recv i-structure */ 3739 /*---------------------------*/ 3740 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3741 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3742 3743 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3744 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3745 for (proc=0,k=0; proc<size; proc++){ 3746 if (!len_s[proc]) continue; 3747 /* form outgoing message for i-structure: 3748 buf_si[0]: nrows to be sent 3749 [1:nrows]: row index (global) 3750 [nrows+1:2*nrows+1]: i-structure index 3751 */ 3752 /*-------------------------------------------*/ 3753 nrows = len_si[proc]/2 - 1; 3754 buf_si_i = buf_si + nrows+1; 3755 buf_si[0] = nrows; 3756 buf_si_i[0] = 0; 3757 nrows = 0; 3758 for (i=owners[proc]; i<owners[proc+1]; i++){ 3759 anzi = ai[i+1] - ai[i]; 3760 if (anzi) { 3761 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3762 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3763 nrows++; 3764 } 3765 } 3766 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3767 k++; 3768 buf_si += len_si[proc]; 3769 } 3770 3771 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3772 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3773 3774 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3775 for (i=0; i<merge->nrecv; i++){ 3776 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3777 } 3778 3779 ierr = PetscFree(len_si);CHKERRQ(ierr); 3780 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3781 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3782 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3783 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3784 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3785 ierr = PetscFree(status);CHKERRQ(ierr); 3786 3787 /* compute a local seq matrix in each processor */ 3788 /*----------------------------------------------*/ 3789 /* allocate bi array and free space for accumulating nonzero column info */ 3790 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3791 bi[0] = 0; 3792 3793 /* create and initialize a linked list */ 3794 nlnk = N+1; 3795 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3796 3797 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3798 len = 0; 3799 len = ai[owners[rank+1]] - ai[owners[rank]]; 3800 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3801 current_space = free_space; 3802 3803 /* determine symbolic info for each local row */ 3804 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3805 nextrow = buf_ri_k + merge->nrecv; 3806 nextai = nextrow + merge->nrecv; 3807 for (k=0; k<merge->nrecv; k++){ 3808 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3809 nrows = *buf_ri_k[k]; 3810 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3811 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3812 } 3813 3814 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3815 len = 0; 3816 for (i=0;i<m;i++) { 3817 bnzi = 0; 3818 /* add local non-zero cols of this proc's seqmat into lnk */ 3819 arow = owners[rank] + i; 3820 anzi = ai[arow+1] - ai[arow]; 3821 aj = a->j + ai[arow]; 3822 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3823 bnzi += nlnk; 3824 /* add received col data into lnk */ 3825 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3826 if (i == *nextrow[k]) { /* i-th row */ 3827 anzi = *(nextai[k]+1) - *nextai[k]; 3828 aj = buf_rj[k] + *nextai[k]; 3829 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3830 bnzi += nlnk; 3831 nextrow[k]++; nextai[k]++; 3832 } 3833 } 3834 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3835 3836 /* if free space is not available, make more free space */ 3837 if (current_space->local_remaining<bnzi) { 3838 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3839 nspacedouble++; 3840 } 3841 /* copy data into free space, then initialize lnk */ 3842 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3843 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3844 3845 current_space->array += bnzi; 3846 current_space->local_used += bnzi; 3847 current_space->local_remaining -= bnzi; 3848 3849 bi[i+1] = bi[i] + bnzi; 3850 } 3851 3852 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3853 3854 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3855 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3856 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3857 3858 /* create symbolic parallel matrix B_mpi */ 3859 /*---------------------------------------*/ 3860 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3861 if (n==PETSC_DECIDE) { 3862 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3863 } else { 3864 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3865 } 3866 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3867 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3868 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3869 3870 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3871 B_mpi->assembled = PETSC_FALSE; 3872 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3873 merge->bi = bi; 3874 merge->bj = bj; 3875 merge->buf_ri = buf_ri; 3876 merge->buf_rj = buf_rj; 3877 merge->coi = PETSC_NULL; 3878 merge->coj = PETSC_NULL; 3879 merge->owners_co = PETSC_NULL; 3880 3881 /* attach the supporting struct to B_mpi for reuse */ 3882 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3883 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3884 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3885 *mpimat = B_mpi; 3886 3887 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3888 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891 3892 static PetscEvent logkey_seqstompi = 0; 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatMerge_SeqsToMPI" 3895 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3896 { 3897 PetscErrorCode ierr; 3898 3899 PetscFunctionBegin; 3900 if (!logkey_seqstompi) { 3901 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3902 } 3903 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3904 if (scall == MAT_INITIAL_MATRIX){ 3905 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3906 } 3907 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3908 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 static PetscEvent logkey_getlocalmat = 0; 3912 #undef __FUNCT__ 3913 #define __FUNCT__ "MatGetLocalMat" 3914 /*@C 3915 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3916 3917 Not Collective 3918 3919 Input Parameters: 3920 + A - the matrix 3921 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3922 3923 Output Parameter: 3924 . A_loc - the local sequential matrix generated 3925 3926 Level: developer 3927 3928 @*/ 3929 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3930 { 3931 PetscErrorCode ierr; 3932 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3933 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3934 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3935 PetscScalar *aa=a->a,*ba=b->a,*ca; 3936 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3937 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3938 3939 PetscFunctionBegin; 3940 if (!logkey_getlocalmat) { 3941 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3942 } 3943 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3944 if (scall == MAT_INITIAL_MATRIX){ 3945 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3946 ci[0] = 0; 3947 for (i=0; i<am; i++){ 3948 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3949 } 3950 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3951 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3952 k = 0; 3953 for (i=0; i<am; i++) { 3954 ncols_o = bi[i+1] - bi[i]; 3955 ncols_d = ai[i+1] - ai[i]; 3956 /* off-diagonal portion of A */ 3957 for (jo=0; jo<ncols_o; jo++) { 3958 col = cmap[*bj]; 3959 if (col >= cstart) break; 3960 cj[k] = col; bj++; 3961 ca[k++] = *ba++; 3962 } 3963 /* diagonal portion of A */ 3964 for (j=0; j<ncols_d; j++) { 3965 cj[k] = cstart + *aj++; 3966 ca[k++] = *aa++; 3967 } 3968 /* off-diagonal portion of A */ 3969 for (j=jo; j<ncols_o; j++) { 3970 cj[k] = cmap[*bj++]; 3971 ca[k++] = *ba++; 3972 } 3973 } 3974 /* put together the new matrix */ 3975 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3976 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3977 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3978 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3979 mat->free_a = PETSC_TRUE; 3980 mat->free_ij = PETSC_TRUE; 3981 mat->nonew = 0; 3982 } else if (scall == MAT_REUSE_MATRIX){ 3983 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3984 ci = mat->i; cj = mat->j; ca = mat->a; 3985 for (i=0; i<am; i++) { 3986 /* off-diagonal portion of A */ 3987 ncols_o = bi[i+1] - bi[i]; 3988 for (jo=0; jo<ncols_o; jo++) { 3989 col = cmap[*bj]; 3990 if (col >= cstart) break; 3991 *ca++ = *ba++; bj++; 3992 } 3993 /* diagonal portion of A */ 3994 ncols_d = ai[i+1] - ai[i]; 3995 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3996 /* off-diagonal portion of A */ 3997 for (j=jo; j<ncols_o; j++) { 3998 *ca++ = *ba++; bj++; 3999 } 4000 } 4001 } else { 4002 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4003 } 4004 4005 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 static PetscEvent logkey_getlocalmatcondensed = 0; 4010 #undef __FUNCT__ 4011 #define __FUNCT__ "MatGetLocalMatCondensed" 4012 /*@C 4013 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4014 4015 Not Collective 4016 4017 Input Parameters: 4018 + A - the matrix 4019 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4020 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4021 4022 Output Parameter: 4023 . A_loc - the local sequential matrix generated 4024 4025 Level: developer 4026 4027 @*/ 4028 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4029 { 4030 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4031 PetscErrorCode ierr; 4032 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4033 IS isrowa,iscola; 4034 Mat *aloc; 4035 4036 PetscFunctionBegin; 4037 if (!logkey_getlocalmatcondensed) { 4038 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 4039 } 4040 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4041 if (!row){ 4042 start = A->rmap.rstart; end = A->rmap.rend; 4043 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4044 } else { 4045 isrowa = *row; 4046 } 4047 if (!col){ 4048 start = A->cmap.rstart; 4049 cmap = a->garray; 4050 nzA = a->A->cmap.n; 4051 nzB = a->B->cmap.n; 4052 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4053 ncols = 0; 4054 for (i=0; i<nzB; i++) { 4055 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4056 else break; 4057 } 4058 imark = i; 4059 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4060 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4061 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4062 ierr = PetscFree(idx);CHKERRQ(ierr); 4063 } else { 4064 iscola = *col; 4065 } 4066 if (scall != MAT_INITIAL_MATRIX){ 4067 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4068 aloc[0] = *A_loc; 4069 } 4070 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4071 *A_loc = aloc[0]; 4072 ierr = PetscFree(aloc);CHKERRQ(ierr); 4073 if (!row){ 4074 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4075 } 4076 if (!col){ 4077 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4078 } 4079 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 static PetscEvent logkey_GetBrowsOfAcols = 0; 4084 #undef __FUNCT__ 4085 #define __FUNCT__ "MatGetBrowsOfAcols" 4086 /*@C 4087 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4088 4089 Collective on Mat 4090 4091 Input Parameters: 4092 + A,B - the matrices in mpiaij format 4093 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4094 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4095 4096 Output Parameter: 4097 + rowb, colb - index sets of rows and columns of B to extract 4098 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 4099 - B_seq - the sequential matrix generated 4100 4101 Level: developer 4102 4103 @*/ 4104 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4105 { 4106 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4107 PetscErrorCode ierr; 4108 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4109 IS isrowb,iscolb; 4110 Mat *bseq; 4111 4112 PetscFunctionBegin; 4113 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4114 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4115 } 4116 if (!logkey_GetBrowsOfAcols) { 4117 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 4118 } 4119 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4120 4121 if (scall == MAT_INITIAL_MATRIX){ 4122 start = A->cmap.rstart; 4123 cmap = a->garray; 4124 nzA = a->A->cmap.n; 4125 nzB = a->B->cmap.n; 4126 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4127 ncols = 0; 4128 for (i=0; i<nzB; i++) { /* row < local row index */ 4129 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4130 else break; 4131 } 4132 imark = i; 4133 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4134 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4135 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4136 ierr = PetscFree(idx);CHKERRQ(ierr); 4137 *brstart = imark; 4138 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 4139 } else { 4140 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4141 isrowb = *rowb; iscolb = *colb; 4142 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4143 bseq[0] = *B_seq; 4144 } 4145 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4146 *B_seq = bseq[0]; 4147 ierr = PetscFree(bseq);CHKERRQ(ierr); 4148 if (!rowb){ 4149 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4150 } else { 4151 *rowb = isrowb; 4152 } 4153 if (!colb){ 4154 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4155 } else { 4156 *colb = iscolb; 4157 } 4158 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4159 PetscFunctionReturn(0); 4160 } 4161 4162 static PetscEvent logkey_GetBrowsOfAocols = 0; 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatGetBrowsOfAoCols" 4165 /*@C 4166 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4167 of the OFF-DIAGONAL portion of local A 4168 4169 Collective on Mat 4170 4171 Input Parameters: 4172 + A,B - the matrices in mpiaij format 4173 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4174 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4175 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4176 4177 Output Parameter: 4178 + B_oth - the sequential matrix generated 4179 4180 Level: developer 4181 4182 @*/ 4183 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 4184 { 4185 VecScatter_MPI_General *gen_to,*gen_from; 4186 PetscErrorCode ierr; 4187 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4188 Mat_SeqAIJ *b_oth; 4189 VecScatter ctx=a->Mvctx; 4190 MPI_Comm comm=ctx->comm; 4191 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 4192 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 4193 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 4194 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4195 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4196 MPI_Status *sstatus,rstatus; 4197 PetscMPIInt jj; 4198 PetscInt *cols,sbs,rbs; 4199 PetscScalar *vals; 4200 4201 PetscFunctionBegin; 4202 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 4203 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 4204 } 4205 if (!logkey_GetBrowsOfAocols) { 4206 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 4207 } 4208 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4209 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4210 4211 gen_to = (VecScatter_MPI_General*)ctx->todata; 4212 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4213 rvalues = gen_from->values; /* holds the length of receiving row */ 4214 svalues = gen_to->values; /* holds the length of sending row */ 4215 nrecvs = gen_from->n; 4216 nsends = gen_to->n; 4217 4218 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4219 srow = gen_to->indices; /* local row index to be sent */ 4220 sstarts = gen_to->starts; 4221 sprocs = gen_to->procs; 4222 sstatus = gen_to->sstatus; 4223 sbs = gen_to->bs; 4224 rstarts = gen_from->starts; 4225 rprocs = gen_from->procs; 4226 rbs = gen_from->bs; 4227 4228 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4229 if (scall == MAT_INITIAL_MATRIX){ 4230 /* i-array */ 4231 /*---------*/ 4232 /* post receives */ 4233 for (i=0; i<nrecvs; i++){ 4234 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4235 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4236 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4237 } 4238 4239 /* pack the outgoing message */ 4240 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4241 rstartsj = sstartsj + nsends +1; 4242 sstartsj[0] = 0; rstartsj[0] = 0; 4243 len = 0; /* total length of j or a array to be sent */ 4244 k = 0; 4245 for (i=0; i<nsends; i++){ 4246 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4247 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4248 for (j=0; j<nrows; j++) { 4249 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 4250 for (l=0; l<sbs; l++){ 4251 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4252 rowlen[j*sbs+l] = ncols; 4253 len += ncols; 4254 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4255 } 4256 k++; 4257 } 4258 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4259 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4260 } 4261 /* recvs and sends of i-array are completed */ 4262 i = nrecvs; 4263 while (i--) { 4264 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4265 } 4266 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4267 4268 /* allocate buffers for sending j and a arrays */ 4269 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4270 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4271 4272 /* create i-array of B_oth */ 4273 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4274 b_othi[0] = 0; 4275 len = 0; /* total length of j or a array to be received */ 4276 k = 0; 4277 for (i=0; i<nrecvs; i++){ 4278 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4279 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4280 for (j=0; j<nrows; j++) { 4281 b_othi[k+1] = b_othi[k] + rowlen[j]; 4282 len += rowlen[j]; k++; 4283 } 4284 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4285 } 4286 4287 /* allocate space for j and a arrrays of B_oth */ 4288 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4289 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 4290 4291 /* j-array */ 4292 /*---------*/ 4293 /* post receives of j-array */ 4294 for (i=0; i<nrecvs; i++){ 4295 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4296 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4297 } 4298 4299 /* pack the outgoing message j-array */ 4300 k = 0; 4301 for (i=0; i<nsends; i++){ 4302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4303 bufJ = bufj+sstartsj[i]; 4304 for (j=0; j<nrows; j++) { 4305 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4306 for (ll=0; ll<sbs; ll++){ 4307 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4308 for (l=0; l<ncols; l++){ 4309 *bufJ++ = cols[l]; 4310 } 4311 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4312 } 4313 } 4314 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4315 } 4316 4317 /* recvs and sends of j-array are completed */ 4318 i = nrecvs; 4319 while (i--) { 4320 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4321 } 4322 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4323 } else if (scall == MAT_REUSE_MATRIX){ 4324 sstartsj = *startsj; 4325 rstartsj = sstartsj + nsends +1; 4326 bufa = *bufa_ptr; 4327 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4328 b_otha = b_oth->a; 4329 } else { 4330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4331 } 4332 4333 /* a-array */ 4334 /*---------*/ 4335 /* post receives of a-array */ 4336 for (i=0; i<nrecvs; i++){ 4337 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4338 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4339 } 4340 4341 /* pack the outgoing message a-array */ 4342 k = 0; 4343 for (i=0; i<nsends; i++){ 4344 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4345 bufA = bufa+sstartsj[i]; 4346 for (j=0; j<nrows; j++) { 4347 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 4348 for (ll=0; ll<sbs; ll++){ 4349 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4350 for (l=0; l<ncols; l++){ 4351 *bufA++ = vals[l]; 4352 } 4353 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4354 } 4355 } 4356 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4357 } 4358 /* recvs and sends of a-array are completed */ 4359 i = nrecvs; 4360 while (i--) { 4361 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4362 } 4363 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4364 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4365 4366 if (scall == MAT_INITIAL_MATRIX){ 4367 /* put together the new matrix */ 4368 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4369 4370 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4371 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4372 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4373 b_oth->free_a = PETSC_TRUE; 4374 b_oth->free_ij = PETSC_TRUE; 4375 b_oth->nonew = 0; 4376 4377 ierr = PetscFree(bufj);CHKERRQ(ierr); 4378 if (!startsj || !bufa_ptr){ 4379 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4380 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4381 } else { 4382 *startsj = sstartsj; 4383 *bufa_ptr = bufa; 4384 } 4385 } 4386 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 #undef __FUNCT__ 4391 #define __FUNCT__ "MatGetCommunicationStructs" 4392 /*@C 4393 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4394 4395 Not Collective 4396 4397 Input Parameters: 4398 . A - The matrix in mpiaij format 4399 4400 Output Parameter: 4401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4402 . colmap - A map from global column index to local index into lvec 4403 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4404 4405 Level: developer 4406 4407 @*/ 4408 #if defined (PETSC_USE_CTABLE) 4409 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4410 #else 4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4412 #endif 4413 { 4414 Mat_MPIAIJ *a; 4415 4416 PetscFunctionBegin; 4417 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4418 PetscValidPointer(lvec, 2) 4419 PetscValidPointer(colmap, 3) 4420 PetscValidPointer(multScatter, 4) 4421 a = (Mat_MPIAIJ *) A->data; 4422 if (lvec) *lvec = a->lvec; 4423 if (colmap) *colmap = a->colmap; 4424 if (multScatter) *multScatter = a->Mvctx; 4425 PetscFunctionReturn(0); 4426 } 4427 4428 EXTERN_C_BEGIN 4429 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4431 EXTERN_C_END 4432 4433 /*MC 4434 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4435 4436 Options Database Keys: 4437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4438 4439 Level: beginner 4440 4441 .seealso: MatCreateMPIAIJ 4442 M*/ 4443 4444 EXTERN_C_BEGIN 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "MatCreate_MPIAIJ" 4447 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4448 { 4449 Mat_MPIAIJ *b; 4450 PetscErrorCode ierr; 4451 PetscMPIInt size; 4452 4453 PetscFunctionBegin; 4454 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4455 4456 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 4457 B->data = (void*)b; 4458 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4459 B->factor = 0; 4460 B->rmap.bs = 1; 4461 B->assembled = PETSC_FALSE; 4462 B->mapping = 0; 4463 4464 B->insertmode = NOT_SET_VALUES; 4465 b->size = size; 4466 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4467 4468 /* build cache for off array entries formed */ 4469 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4470 b->donotstash = PETSC_FALSE; 4471 b->colmap = 0; 4472 b->garray = 0; 4473 b->roworiented = PETSC_TRUE; 4474 4475 /* stuff used for matrix vector multiply */ 4476 b->lvec = PETSC_NULL; 4477 b->Mvctx = PETSC_NULL; 4478 4479 /* stuff for MatGetRow() */ 4480 b->rowindices = 0; 4481 b->rowvalues = 0; 4482 b->getrowactive = PETSC_FALSE; 4483 4484 4485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4486 "MatStoreValues_MPIAIJ", 4487 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4489 "MatRetrieveValues_MPIAIJ", 4490 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4492 "MatGetDiagonalBlock_MPIAIJ", 4493 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4495 "MatIsTranspose_MPIAIJ", 4496 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4498 "MatMPIAIJSetPreallocation_MPIAIJ", 4499 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4501 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4502 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4504 "MatDiagonalScaleLocal_MPIAIJ", 4505 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4507 "MatConvert_MPIAIJ_MPICSRPERM", 4508 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4510 "MatConvert_MPIAIJ_MPICRL", 4511 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4512 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4513 PetscFunctionReturn(0); 4514 } 4515 EXTERN_C_END 4516 4517 #undef __FUNCT__ 4518 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4519 /*@C 4520 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4521 and "off-diagonal" part of the matrix in CSR format. 4522 4523 Collective on MPI_Comm 4524 4525 Input Parameters: 4526 + comm - MPI communicator 4527 . m - number of local rows (Cannot be PETSC_DECIDE) 4528 . n - This value should be the same as the local size used in creating the 4529 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4530 calculated if N is given) For square matrices n is almost always m. 4531 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4532 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4533 . i - row indices for "diagonal" portion of matrix 4534 . j - column indices 4535 . a - matrix values 4536 . oi - row indices for "off-diagonal" portion of matrix 4537 . oj - column indices 4538 - oa - matrix values 4539 4540 Output Parameter: 4541 . mat - the matrix 4542 4543 Level: advanced 4544 4545 Notes: 4546 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4547 4548 The i and j indices are 0 based 4549 4550 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4551 4552 4553 .keywords: matrix, aij, compressed row, sparse, parallel 4554 4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4556 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4557 @*/ 4558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4559 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4560 { 4561 PetscErrorCode ierr; 4562 Mat_MPIAIJ *maij; 4563 4564 PetscFunctionBegin; 4565 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4566 if (i[0]) { 4567 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4568 } 4569 if (oi[0]) { 4570 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4571 } 4572 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4573 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4574 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4575 maij = (Mat_MPIAIJ*) (*mat)->data; 4576 maij->donotstash = PETSC_TRUE; 4577 (*mat)->preallocated = PETSC_TRUE; 4578 4579 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4580 ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr); 4581 ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr); 4582 4583 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4584 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4585 4586 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4587 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4588 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 4591 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4593 PetscFunctionReturn(0); 4594 } 4595 4596 /* 4597 Special version for direct calls from Fortran 4598 */ 4599 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4600 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4601 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4602 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4603 #endif 4604 4605 /* Change these macros so can be used in void function */ 4606 #undef CHKERRQ 4607 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4608 #undef SETERRQ2 4609 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4610 #undef SETERRQ 4611 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4612 4613 EXTERN_C_BEGIN 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "matsetvaluesmpiaij_" 4616 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4617 { 4618 Mat mat = *mmat; 4619 PetscInt m = *mm, n = *mn; 4620 InsertMode addv = *maddv; 4621 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4622 PetscScalar value; 4623 PetscErrorCode ierr; 4624 4625 MatPreallocated(mat); 4626 if (mat->insertmode == NOT_SET_VALUES) { 4627 mat->insertmode = addv; 4628 } 4629 #if defined(PETSC_USE_DEBUG) 4630 else if (mat->insertmode != addv) { 4631 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4632 } 4633 #endif 4634 { 4635 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4636 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4637 PetscTruth roworiented = aij->roworiented; 4638 4639 /* Some Variables required in the macro */ 4640 Mat A = aij->A; 4641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4642 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4643 PetscScalar *aa = a->a; 4644 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4645 Mat B = aij->B; 4646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4647 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4648 PetscScalar *ba = b->a; 4649 4650 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4651 PetscInt nonew = a->nonew; 4652 PetscScalar *ap1,*ap2; 4653 4654 PetscFunctionBegin; 4655 for (i=0; i<m; i++) { 4656 if (im[i] < 0) continue; 4657 #if defined(PETSC_USE_DEBUG) 4658 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4659 #endif 4660 if (im[i] >= rstart && im[i] < rend) { 4661 row = im[i] - rstart; 4662 lastcol1 = -1; 4663 rp1 = aj + ai[row]; 4664 ap1 = aa + ai[row]; 4665 rmax1 = aimax[row]; 4666 nrow1 = ailen[row]; 4667 low1 = 0; 4668 high1 = nrow1; 4669 lastcol2 = -1; 4670 rp2 = bj + bi[row]; 4671 ap2 = ba + bi[row]; 4672 rmax2 = bimax[row]; 4673 nrow2 = bilen[row]; 4674 low2 = 0; 4675 high2 = nrow2; 4676 4677 for (j=0; j<n; j++) { 4678 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4679 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4680 if (in[j] >= cstart && in[j] < cend){ 4681 col = in[j] - cstart; 4682 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4683 } else if (in[j] < 0) continue; 4684 #if defined(PETSC_USE_DEBUG) 4685 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4686 #endif 4687 else { 4688 if (mat->was_assembled) { 4689 if (!aij->colmap) { 4690 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4691 } 4692 #if defined (PETSC_USE_CTABLE) 4693 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4694 col--; 4695 #else 4696 col = aij->colmap[in[j]] - 1; 4697 #endif 4698 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4699 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4700 col = in[j]; 4701 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4702 B = aij->B; 4703 b = (Mat_SeqAIJ*)B->data; 4704 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4705 rp2 = bj + bi[row]; 4706 ap2 = ba + bi[row]; 4707 rmax2 = bimax[row]; 4708 nrow2 = bilen[row]; 4709 low2 = 0; 4710 high2 = nrow2; 4711 bm = aij->B->rmap.n; 4712 ba = b->a; 4713 } 4714 } else col = in[j]; 4715 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4716 } 4717 } 4718 } else { 4719 if (!aij->donotstash) { 4720 if (roworiented) { 4721 if (ignorezeroentries && v[i*n] == 0.0) continue; 4722 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4723 } else { 4724 if (ignorezeroentries && v[i] == 0.0) continue; 4725 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4726 } 4727 } 4728 } 4729 }} 4730 PetscFunctionReturnVoid(); 4731 } 4732 EXTERN_C_END 4733 4734