1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRelax_MPIAIJ" 1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscErrorCode ierr; 1021 Vec bb1; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatPermute_MPIAIJ" 1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1091 { 1092 MPI_Comm comm,pcomm; 1093 PetscInt first,local_size,nrows,*rows; 1094 int ntids; 1095 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1100 /* make a collective version of 'rowp' */ 1101 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1102 if (pcomm==comm) { 1103 crowp = rowp; 1104 } else { 1105 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1106 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1108 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1109 } 1110 /* collect the global row permutation and invert it */ 1111 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1112 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1113 if (pcomm!=comm) { 1114 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1115 } 1116 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1117 /* get the local target indices */ 1118 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1123 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1124 /* the column permutation is so much easier; 1125 make a local version of 'colp' and invert it */ 1126 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1127 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1128 if (ntids==1) { 1129 lcolp = colp; 1130 } else { 1131 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1132 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1133 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1134 } 1135 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1136 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1137 if (ntids>1) { 1138 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1139 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1140 } 1141 /* now we just get the submatrix */ 1142 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1143 /* clean up */ 1144 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1145 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1154 Mat A = mat->A,B = mat->B; 1155 PetscErrorCode ierr; 1156 PetscReal isend[5],irecv[5]; 1157 1158 PetscFunctionBegin; 1159 info->block_size = 1.0; 1160 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1162 isend[3] = info->memory; isend[4] = info->mallocs; 1163 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1164 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1165 isend[3] += info->memory; isend[4] += info->mallocs; 1166 if (flag == MAT_LOCAL) { 1167 info->nz_used = isend[0]; 1168 info->nz_allocated = isend[1]; 1169 info->nz_unneeded = isend[2]; 1170 info->memory = isend[3]; 1171 info->mallocs = isend[4]; 1172 } else if (flag == MAT_GLOBAL_MAX) { 1173 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1174 info->nz_used = irecv[0]; 1175 info->nz_allocated = irecv[1]; 1176 info->nz_unneeded = irecv[2]; 1177 info->memory = irecv[3]; 1178 info->mallocs = irecv[4]; 1179 } else if (flag == MAT_GLOBAL_SUM) { 1180 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1181 info->nz_used = irecv[0]; 1182 info->nz_allocated = irecv[1]; 1183 info->nz_unneeded = irecv[2]; 1184 info->memory = irecv[3]; 1185 info->mallocs = irecv[4]; 1186 } 1187 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1188 info->fill_ratio_needed = 0; 1189 info->factor_mallocs = 0; 1190 info->rows_global = (double)matin->rmap.N; 1191 info->columns_global = (double)matin->cmap.N; 1192 info->rows_local = (double)matin->rmap.n; 1193 info->columns_local = (double)matin->cmap.N; 1194 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSetOption_MPIAIJ" 1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 switch (op) { 1207 case MAT_NO_NEW_NONZERO_LOCATIONS: 1208 case MAT_YES_NEW_NONZERO_LOCATIONS: 1209 case MAT_COLUMNS_UNSORTED: 1210 case MAT_COLUMNS_SORTED: 1211 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1212 case MAT_KEEP_ZEROED_ROWS: 1213 case MAT_NEW_NONZERO_LOCATION_ERR: 1214 case MAT_USE_INODES: 1215 case MAT_DO_NOT_USE_INODES: 1216 case MAT_IGNORE_ZERO_ENTRIES: 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROW_ORIENTED: 1221 a->roworiented = PETSC_TRUE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_ROWS_SORTED: 1226 case MAT_ROWS_UNSORTED: 1227 case MAT_YES_NEW_DIAGONALS: 1228 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1229 break; 1230 case MAT_COLUMN_ORIENTED: 1231 a->roworiented = PETSC_FALSE; 1232 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1233 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1234 break; 1235 case MAT_IGNORE_OFF_PROC_ENTRIES: 1236 a->donotstash = PETSC_TRUE; 1237 break; 1238 case MAT_NO_NEW_DIAGONALS: 1239 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 break; 1248 case MAT_NOT_SYMMETRIC: 1249 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1250 case MAT_NOT_HERMITIAN: 1251 case MAT_NOT_SYMMETRY_ETERNAL: 1252 break; 1253 default: 1254 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRow_MPIAIJ" 1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1264 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1265 PetscErrorCode ierr; 1266 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1267 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1268 PetscInt *cmap,*idx_p; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1279 PetscInt max = 1,tmp; 1280 for (i=0; i<matin->rmap.n; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1285 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1286 } 1287 1288 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1289 lrow = row - rstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 PetscInt imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1311 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for (i=0; i<imark; i++) { 1317 idx_p[i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!aij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1349 } 1350 aij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatNorm_MPIAIJ" 1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,j,cstart = mat->cmap.rstart; 1362 PetscReal sum = 0.0; 1363 PetscScalar *v; 1364 1365 PetscFunctionBegin; 1366 if (aij->size == 1) { 1367 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1368 } else { 1369 if (type == NORM_FROBENIUS) { 1370 v = amat->a; 1371 for (i=0; i<amat->nz; i++) { 1372 #if defined(PETSC_USE_COMPLEX) 1373 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 v = bmat->a; 1379 for (i=0; i<bmat->nz; i++) { 1380 #if defined(PETSC_USE_COMPLEX) 1381 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1382 #else 1383 sum += (*v)*(*v); v++; 1384 #endif 1385 } 1386 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 *norm = sqrt(*norm); 1388 } else if (type == NORM_1) { /* max column norm */ 1389 PetscReal *tmp,*tmp2; 1390 PetscInt *jj,*garray = aij->garray; 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1393 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1394 *norm = 0.0; 1395 v = amat->a; jj = amat->j; 1396 for (j=0; j<amat->nz; j++) { 1397 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a; jj = bmat->j; 1400 for (j=0; j<bmat->nz; j++) { 1401 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1402 } 1403 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1404 for (j=0; j<mat->cmap.N; j++) { 1405 if (tmp2[j] > *norm) *norm = tmp2[j]; 1406 } 1407 ierr = PetscFree(tmp);CHKERRQ(ierr); 1408 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1409 } else if (type == NORM_INFINITY) { /* max row norm */ 1410 PetscReal ntemp = 0.0; 1411 for (j=0; j<aij->A->rmap.n; j++) { 1412 v = amat->a + amat->i[j]; 1413 sum = 0.0; 1414 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1415 sum += PetscAbsScalar(*v); v++; 1416 } 1417 v = bmat->a + bmat->i[j]; 1418 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1419 sum += PetscAbsScalar(*v); v++; 1420 } 1421 if (sum > ntemp) ntemp = sum; 1422 } 1423 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1424 } else { 1425 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1426 } 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatTranspose_MPIAIJ" 1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1436 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1437 PetscErrorCode ierr; 1438 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) { 1444 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1445 } 1446 1447 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1448 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1449 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1450 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1451 1452 /* copy over the A part */ 1453 Aloc = (Mat_SeqAIJ*)a->A->data; 1454 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1457 for (i=0; i<m; i++) { 1458 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1459 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1460 } 1461 aj = Aloc->j; 1462 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1463 1464 /* copy over the B part */ 1465 Aloc = (Mat_SeqAIJ*)a->B->data; 1466 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1467 row = A->rmap.rstart; 1468 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1469 ct = cols; 1470 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1471 for (i=0; i<m; i++) { 1472 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1473 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1474 } 1475 ierr = PetscFree(ct);CHKERRQ(ierr); 1476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 if (matout) { 1479 *matout = B; 1480 } else { 1481 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1489 { 1490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1491 Mat a = aij->A,b = aij->B; 1492 PetscErrorCode ierr; 1493 PetscInt s1,s2,s3; 1494 1495 PetscFunctionBegin; 1496 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1497 if (rr) { 1498 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1499 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1500 /* Overlap communication with computation. */ 1501 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1502 } 1503 if (ll) { 1504 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1505 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1506 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1507 } 1508 /* scale the diagonal block */ 1509 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1510 1511 if (rr) { 1512 /* Do a scatter end and then right scale the off-diagonal block */ 1513 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1514 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1515 } 1516 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1529 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1535 { 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatEqual_MPIAIJ" 1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1547 { 1548 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1549 Mat a,b,c,d; 1550 PetscTruth flg; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 a = matA->A; b = matA->B; 1555 c = matB->A; d = matB->B; 1556 1557 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1558 if (flg) { 1559 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1560 } 1561 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatCopy_MPIAIJ" 1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1568 { 1569 PetscErrorCode ierr; 1570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1571 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1572 1573 PetscFunctionBegin; 1574 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1575 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1576 /* because of the column compression in the off-processor part of the matrix a->B, 1577 the number of columns in a->B and b->B may be different, hence we cannot call 1578 the MatCopy() directly on the two parts. If need be, we can provide a more 1579 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1580 then copying the submatrices */ 1581 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1582 } else { 1583 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1584 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1592 { 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #include "petscblaslapack.h" 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatAXPY_MPIAIJ" 1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1604 { 1605 PetscErrorCode ierr; 1606 PetscInt i; 1607 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1608 PetscBLASInt bnz,one=1; 1609 Mat_SeqAIJ *x,*y; 1610 1611 PetscFunctionBegin; 1612 if (str == SAME_NONZERO_PATTERN) { 1613 PetscScalar alpha = a; 1614 x = (Mat_SeqAIJ *)xx->A->data; 1615 y = (Mat_SeqAIJ *)yy->A->data; 1616 bnz = (PetscBLASInt)x->nz; 1617 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1618 x = (Mat_SeqAIJ *)xx->B->data; 1619 y = (Mat_SeqAIJ *)yy->B->data; 1620 bnz = (PetscBLASInt)x->nz; 1621 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1622 } else if (str == SUBSET_NONZERO_PATTERN) { 1623 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1624 1625 x = (Mat_SeqAIJ *)xx->B->data; 1626 y = (Mat_SeqAIJ *)yy->B->data; 1627 if (y->xtoy && y->XtoY != xx->B) { 1628 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1629 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1630 } 1631 if (!y->xtoy) { /* get xtoy */ 1632 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1633 y->XtoY = xx->B; 1634 } 1635 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1636 } else { 1637 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1638 } 1639 PetscFunctionReturn(0); 1640 } 1641 1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatConjugate_MPIAIJ" 1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1647 { 1648 #if defined(PETSC_USE_COMPLEX) 1649 PetscErrorCode ierr; 1650 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1651 1652 PetscFunctionBegin; 1653 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1654 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1655 #else 1656 PetscFunctionBegin; 1657 #endif 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatRealPart_MPIAIJ" 1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1670 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1683 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #ifdef PETSC_HAVE_PBGL 1688 1689 #include <boost/parallel/mpi/bsp_process_group.hpp> 1690 #include <boost/graph/distributed/ilu_default_graph.hpp> 1691 #include <boost/graph/distributed/ilu_0_block.hpp> 1692 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1693 #include <boost/graph/distributed/petsc/interface.hpp> 1694 #include <boost/multi_array.hpp> 1695 #include <boost/parallel/distributed_property_map.hpp> 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1699 /* 1700 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1701 */ 1702 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1703 { 1704 namespace petsc = boost::distributed::petsc; 1705 1706 namespace graph_dist = boost::graph::distributed; 1707 using boost::graph::distributed::ilu_default::process_group_type; 1708 using boost::graph::ilu_permuted; 1709 1710 PetscTruth row_identity, col_identity; 1711 PetscContainer c; 1712 PetscInt m, n, M, N; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1717 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1718 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1719 if (!row_identity || !col_identity) { 1720 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1721 } 1722 1723 process_group_type pg; 1724 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1725 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1726 lgraph_type& level_graph = *lgraph_p; 1727 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1728 1729 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1730 ilu_permuted(level_graph); 1731 1732 /* put together the new matrix */ 1733 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1734 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1735 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1736 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1737 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1738 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1740 (*fact)->factor = FACTOR_LU; 1741 1742 ierr = PetscContainerCreate(A->comm, &c); 1743 ierr = PetscContainerSetPointer(c, lgraph_p); 1744 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1750 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1751 { 1752 PetscFunctionBegin; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSolve_MPIAIJ" 1758 /* 1759 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1760 */ 1761 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1762 { 1763 namespace graph_dist = boost::graph::distributed; 1764 1765 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1766 lgraph_type* lgraph_p; 1767 PetscContainer c; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1772 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1773 ierr = VecCopy(b, x); CHKERRQ(ierr); 1774 1775 PetscScalar* array_x; 1776 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1777 PetscInt sx; 1778 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1779 1780 PetscScalar* array_b; 1781 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1782 PetscInt sb; 1783 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1784 1785 lgraph_type& level_graph = *lgraph_p; 1786 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1787 1788 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 1789 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 1790 ref_x(array_x, boost::extents[num_vertices(graph)]); 1791 1792 typedef boost::iterator_property_map<array_ref_type::iterator, 1793 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 1794 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 1795 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 1796 1797 ilu_set_solve(*lgraph_p, vector_b, vector_x); 1798 1799 PetscFunctionReturn(0); 1800 } 1801 #endif 1802 1803 /* -------------------------------------------------------------------*/ 1804 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1805 MatGetRow_MPIAIJ, 1806 MatRestoreRow_MPIAIJ, 1807 MatMult_MPIAIJ, 1808 /* 4*/ MatMultAdd_MPIAIJ, 1809 MatMultTranspose_MPIAIJ, 1810 MatMultTransposeAdd_MPIAIJ, 1811 #ifdef PETSC_HAVE_PBGL 1812 MatSolve_MPIAIJ, 1813 #else 1814 0, 1815 #endif 1816 0, 1817 0, 1818 /*10*/ 0, 1819 0, 1820 0, 1821 MatRelax_MPIAIJ, 1822 MatTranspose_MPIAIJ, 1823 /*15*/ MatGetInfo_MPIAIJ, 1824 MatEqual_MPIAIJ, 1825 MatGetDiagonal_MPIAIJ, 1826 MatDiagonalScale_MPIAIJ, 1827 MatNorm_MPIAIJ, 1828 /*20*/ MatAssemblyBegin_MPIAIJ, 1829 MatAssemblyEnd_MPIAIJ, 1830 0, 1831 MatSetOption_MPIAIJ, 1832 MatZeroEntries_MPIAIJ, 1833 /*25*/ MatZeroRows_MPIAIJ, 1834 0, 1835 #ifdef PETSC_HAVE_PBGL 1836 MatLUFactorNumeric_MPIAIJ, 1837 #else 1838 0, 1839 #endif 1840 0, 1841 0, 1842 /*30*/ MatSetUpPreallocation_MPIAIJ, 1843 #ifdef PETSC_HAVE_PBGL 1844 MatILUFactorSymbolic_MPIAIJ, 1845 #else 1846 0, 1847 #endif 1848 0, 1849 0, 1850 0, 1851 /*35*/ MatDuplicate_MPIAIJ, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*40*/ MatAXPY_MPIAIJ, 1857 MatGetSubMatrices_MPIAIJ, 1858 MatIncreaseOverlap_MPIAIJ, 1859 MatGetValues_MPIAIJ, 1860 MatCopy_MPIAIJ, 1861 /*45*/ 0, 1862 MatScale_MPIAIJ, 1863 0, 1864 0, 1865 0, 1866 /*50*/ MatSetBlockSize_MPIAIJ, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*55*/ MatFDColoringCreate_MPIAIJ, 1872 0, 1873 MatSetUnfactored_MPIAIJ, 1874 MatPermute_MPIAIJ, 1875 0, 1876 /*60*/ MatGetSubMatrix_MPIAIJ, 1877 MatDestroy_MPIAIJ, 1878 MatView_MPIAIJ, 1879 0, 1880 0, 1881 /*65*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*70*/ 0, 1887 0, 1888 MatSetColoring_MPIAIJ, 1889 #if defined(PETSC_HAVE_ADIC) 1890 MatSetValuesAdic_MPIAIJ, 1891 #else 1892 0, 1893 #endif 1894 MatSetValuesAdifor_MPIAIJ, 1895 /*75*/ 0, 1896 0, 1897 0, 1898 0, 1899 0, 1900 /*80*/ 0, 1901 0, 1902 0, 1903 0, 1904 /*84*/ MatLoad_MPIAIJ, 1905 0, 1906 0, 1907 0, 1908 0, 1909 0, 1910 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1911 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1912 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1913 MatPtAP_Basic, 1914 MatPtAPSymbolic_MPIAIJ, 1915 /*95*/ MatPtAPNumeric_MPIAIJ, 1916 0, 1917 0, 1918 0, 1919 0, 1920 /*100*/0, 1921 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1922 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1923 MatConjugate_MPIAIJ, 1924 0, 1925 /*105*/MatSetValuesRow_MPIAIJ, 1926 MatRealPart_MPIAIJ, 1927 MatImaginaryPart_MPIAIJ}; 1928 1929 /* ----------------------------------------------------------------------------------------*/ 1930 1931 EXTERN_C_BEGIN 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1934 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1935 { 1936 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1941 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 EXTERN_C_END 1945 1946 EXTERN_C_BEGIN 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1949 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1956 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 EXTERN_C_END 1960 1961 #include "petscpc.h" 1962 EXTERN_C_BEGIN 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1966 { 1967 Mat_MPIAIJ *b; 1968 PetscErrorCode ierr; 1969 PetscInt i; 1970 1971 PetscFunctionBegin; 1972 B->preallocated = PETSC_TRUE; 1973 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1974 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1975 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1976 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1977 1978 B->rmap.bs = B->cmap.bs = 1; 1979 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1980 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1981 if (d_nnz) { 1982 for (i=0; i<B->rmap.n; i++) { 1983 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1984 } 1985 } 1986 if (o_nnz) { 1987 for (i=0; i<B->rmap.n; i++) { 1988 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1989 } 1990 } 1991 b = (Mat_MPIAIJ*)B->data; 1992 1993 /* Explicitly create 2 MATSEQAIJ matrices. */ 1994 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1995 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1996 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1997 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1998 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1999 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2000 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2001 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2002 2003 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2004 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2005 2006 PetscFunctionReturn(0); 2007 } 2008 EXTERN_C_END 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2012 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2013 { 2014 Mat mat; 2015 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 *newmat = 0; 2020 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2021 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2022 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2023 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2024 a = (Mat_MPIAIJ*)mat->data; 2025 2026 mat->factor = matin->factor; 2027 mat->rmap.bs = matin->rmap.bs; 2028 mat->assembled = PETSC_TRUE; 2029 mat->insertmode = NOT_SET_VALUES; 2030 mat->preallocated = PETSC_TRUE; 2031 2032 a->size = oldmat->size; 2033 a->rank = oldmat->rank; 2034 a->donotstash = oldmat->donotstash; 2035 a->roworiented = oldmat->roworiented; 2036 a->rowindices = 0; 2037 a->rowvalues = 0; 2038 a->getrowactive = PETSC_FALSE; 2039 2040 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2041 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2042 2043 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2044 if (oldmat->colmap) { 2045 #if defined (PETSC_USE_CTABLE) 2046 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2047 #else 2048 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2049 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2051 #endif 2052 } else a->colmap = 0; 2053 if (oldmat->garray) { 2054 PetscInt len; 2055 len = oldmat->B->cmap.n; 2056 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2057 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2058 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2059 } else a->garray = 0; 2060 2061 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2062 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2063 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2064 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2065 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2066 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2067 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2068 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2069 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2070 *newmat = mat; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #include "petscsys.h" 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatLoad_MPIAIJ" 2078 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2079 { 2080 Mat A; 2081 PetscScalar *vals,*svals; 2082 MPI_Comm comm = ((PetscObject)viewer)->comm; 2083 MPI_Status status; 2084 PetscErrorCode ierr; 2085 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2086 PetscInt i,nz,j,rstart,rend,mmax; 2087 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2088 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2089 PetscInt cend,cstart,n,*rowners; 2090 int fd; 2091 2092 PetscFunctionBegin; 2093 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2094 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2095 if (!rank) { 2096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2097 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2098 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2099 } 2100 2101 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2102 M = header[1]; N = header[2]; 2103 /* determine ownership of all rows */ 2104 m = M/size + ((M % size) > rank); 2105 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2106 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2107 2108 /* First process needs enough room for process with most rows */ 2109 if (!rank) { 2110 mmax = rowners[1]; 2111 for (i=2; i<size; i++) { 2112 mmax = PetscMax(mmax,rowners[i]); 2113 } 2114 } else mmax = m; 2115 2116 rowners[0] = 0; 2117 for (i=2; i<=size; i++) { 2118 rowners[i] += rowners[i-1]; 2119 } 2120 rstart = rowners[rank]; 2121 rend = rowners[rank+1]; 2122 2123 /* distribute row lengths to all processors */ 2124 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2125 if (!rank) { 2126 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2127 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2128 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2129 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (j=0; j<m; j++) { 2131 procsnz[0] += ourlens[j]; 2132 } 2133 for (i=1; i<size; i++) { 2134 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2135 /* calculate the number of nonzeros on each processor */ 2136 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2137 procsnz[i] += rowlengths[j]; 2138 } 2139 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2140 } 2141 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2142 } else { 2143 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2144 } 2145 2146 if (!rank) { 2147 /* determine max buffer needed and allocate it */ 2148 maxnz = 0; 2149 for (i=0; i<size; i++) { 2150 maxnz = PetscMax(maxnz,procsnz[i]); 2151 } 2152 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2153 2154 /* read in my part of the matrix column indices */ 2155 nz = procsnz[0]; 2156 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2157 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2158 2159 /* read in every one elses and ship off */ 2160 for (i=1; i<size; i++) { 2161 nz = procsnz[i]; 2162 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2163 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2164 } 2165 ierr = PetscFree(cols);CHKERRQ(ierr); 2166 } else { 2167 /* determine buffer space needed for message */ 2168 nz = 0; 2169 for (i=0; i<m; i++) { 2170 nz += ourlens[i]; 2171 } 2172 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2173 2174 /* receive message of column indices*/ 2175 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2176 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2177 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2178 } 2179 2180 /* determine column ownership if matrix is not square */ 2181 if (N != M) { 2182 n = N/size + ((N % size) > rank); 2183 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2184 cstart = cend - n; 2185 } else { 2186 cstart = rstart; 2187 cend = rend; 2188 n = cend - cstart; 2189 } 2190 2191 /* loop over local rows, determining number of off diagonal entries */ 2192 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2193 jj = 0; 2194 for (i=0; i<m; i++) { 2195 for (j=0; j<ourlens[i]; j++) { 2196 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2197 jj++; 2198 } 2199 } 2200 2201 /* create our matrix */ 2202 for (i=0; i<m; i++) { 2203 ourlens[i] -= offlens[i]; 2204 } 2205 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2206 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2207 ierr = MatSetType(A,type);CHKERRQ(ierr); 2208 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2209 2210 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2211 for (i=0; i<m; i++) { 2212 ourlens[i] += offlens[i]; 2213 } 2214 2215 if (!rank) { 2216 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2217 2218 /* read in my part of the matrix numerical values */ 2219 nz = procsnz[0]; 2220 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2221 2222 /* insert into matrix */ 2223 jj = rstart; 2224 smycols = mycols; 2225 svals = vals; 2226 for (i=0; i<m; i++) { 2227 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2228 smycols += ourlens[i]; 2229 svals += ourlens[i]; 2230 jj++; 2231 } 2232 2233 /* read in other processors and ship out */ 2234 for (i=1; i<size; i++) { 2235 nz = procsnz[i]; 2236 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2237 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2238 } 2239 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2240 } else { 2241 /* receive numeric values */ 2242 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2243 2244 /* receive message of values*/ 2245 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2246 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2247 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2248 2249 /* insert into matrix */ 2250 jj = rstart; 2251 smycols = mycols; 2252 svals = vals; 2253 for (i=0; i<m; i++) { 2254 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2255 smycols += ourlens[i]; 2256 svals += ourlens[i]; 2257 jj++; 2258 } 2259 } 2260 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2261 ierr = PetscFree(vals);CHKERRQ(ierr); 2262 ierr = PetscFree(mycols);CHKERRQ(ierr); 2263 ierr = PetscFree(rowners);CHKERRQ(ierr); 2264 2265 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2267 *newmat = A; 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2273 /* 2274 Not great since it makes two copies of the submatrix, first an SeqAIJ 2275 in local and then by concatenating the local matrices the end result. 2276 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2277 */ 2278 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2279 { 2280 PetscErrorCode ierr; 2281 PetscMPIInt rank,size; 2282 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2283 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2284 Mat *local,M,Mreuse; 2285 PetscScalar *vwork,*aa; 2286 MPI_Comm comm = mat->comm; 2287 Mat_SeqAIJ *aij; 2288 2289 2290 PetscFunctionBegin; 2291 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2293 2294 if (call == MAT_REUSE_MATRIX) { 2295 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2296 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2297 local = &Mreuse; 2298 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2299 } else { 2300 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2301 Mreuse = *local; 2302 ierr = PetscFree(local);CHKERRQ(ierr); 2303 } 2304 2305 /* 2306 m - number of local rows 2307 n - number of columns (same on all processors) 2308 rstart - first row in new global matrix generated 2309 */ 2310 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2311 if (call == MAT_INITIAL_MATRIX) { 2312 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2313 ii = aij->i; 2314 jj = aij->j; 2315 2316 /* 2317 Determine the number of non-zeros in the diagonal and off-diagonal 2318 portions of the matrix in order to do correct preallocation 2319 */ 2320 2321 /* first get start and end of "diagonal" columns */ 2322 if (csize == PETSC_DECIDE) { 2323 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2324 if (mglobal == n) { /* square matrix */ 2325 nlocal = m; 2326 } else { 2327 nlocal = n/size + ((n % size) > rank); 2328 } 2329 } else { 2330 nlocal = csize; 2331 } 2332 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2333 rstart = rend - nlocal; 2334 if (rank == size - 1 && rend != n) { 2335 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2336 } 2337 2338 /* next, compute all the lengths */ 2339 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2340 olens = dlens + m; 2341 for (i=0; i<m; i++) { 2342 jend = ii[i+1] - ii[i]; 2343 olen = 0; 2344 dlen = 0; 2345 for (j=0; j<jend; j++) { 2346 if (*jj < rstart || *jj >= rend) olen++; 2347 else dlen++; 2348 jj++; 2349 } 2350 olens[i] = olen; 2351 dlens[i] = dlen; 2352 } 2353 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2354 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2355 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2356 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2357 ierr = PetscFree(dlens);CHKERRQ(ierr); 2358 } else { 2359 PetscInt ml,nl; 2360 2361 M = *newmat; 2362 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2363 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2364 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2365 /* 2366 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2367 rather than the slower MatSetValues(). 2368 */ 2369 M->was_assembled = PETSC_TRUE; 2370 M->assembled = PETSC_FALSE; 2371 } 2372 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2373 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2374 ii = aij->i; 2375 jj = aij->j; 2376 aa = aij->a; 2377 for (i=0; i<m; i++) { 2378 row = rstart + i; 2379 nz = ii[i+1] - ii[i]; 2380 cwork = jj; jj += nz; 2381 vwork = aa; aa += nz; 2382 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2383 } 2384 2385 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 *newmat = M; 2388 2389 /* save submatrix used in processor for next request */ 2390 if (call == MAT_INITIAL_MATRIX) { 2391 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2392 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2393 } 2394 2395 PetscFunctionReturn(0); 2396 } 2397 2398 EXTERN_C_BEGIN 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2401 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2402 { 2403 PetscInt m,cstart, cend,j,nnz,i,d; 2404 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2405 const PetscInt *JJ; 2406 PetscScalar *values; 2407 PetscErrorCode ierr; 2408 2409 PetscFunctionBegin; 2410 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2411 2412 B->rmap.bs = B->cmap.bs = 1; 2413 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2414 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2415 m = B->rmap.n; 2416 cstart = B->cmap.rstart; 2417 cend = B->cmap.rend; 2418 rstart = B->rmap.rstart; 2419 2420 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2421 o_nnz = d_nnz + m; 2422 2423 for (i=0; i<m; i++) { 2424 nnz = Ii[i+1]- Ii[i]; 2425 JJ = J + Ii[i]; 2426 nnz_max = PetscMax(nnz_max,nnz); 2427 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2428 for (j=0; j<nnz; j++) { 2429 if (*JJ >= cstart) break; 2430 JJ++; 2431 } 2432 d = 0; 2433 for (; j<nnz; j++) { 2434 if (*JJ++ >= cend) break; 2435 d++; 2436 } 2437 d_nnz[i] = d; 2438 o_nnz[i] = nnz - d; 2439 } 2440 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2441 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2442 2443 if (v) values = (PetscScalar*)v; 2444 else { 2445 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2446 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2447 } 2448 2449 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2450 for (i=0; i<m; i++) { 2451 ii = i + rstart; 2452 nnz = Ii[i+1]- Ii[i]; 2453 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2454 } 2455 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2457 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2458 2459 if (!v) { 2460 ierr = PetscFree(values);CHKERRQ(ierr); 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 EXTERN_C_END 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2468 /*@ 2469 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2470 (the default parallel PETSc format). 2471 2472 Collective on MPI_Comm 2473 2474 Input Parameters: 2475 + B - the matrix 2476 . i - the indices into j for the start of each local row (starts with zero) 2477 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2478 - v - optional values in the matrix 2479 2480 Level: developer 2481 2482 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2483 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2484 2485 .keywords: matrix, aij, compressed row, sparse, parallel 2486 2487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2488 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2489 @*/ 2490 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2491 { 2492 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2493 2494 PetscFunctionBegin; 2495 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2496 if (f) { 2497 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2498 } 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2504 /*@C 2505 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2506 (the default parallel PETSc format). For good matrix assembly performance 2507 the user should preallocate the matrix storage by setting the parameters 2508 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2509 performance can be increased by more than a factor of 50. 2510 2511 Collective on MPI_Comm 2512 2513 Input Parameters: 2514 + A - the matrix 2515 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2516 (same value is used for all local rows) 2517 . d_nnz - array containing the number of nonzeros in the various rows of the 2518 DIAGONAL portion of the local submatrix (possibly different for each row) 2519 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2520 The size of this array is equal to the number of local rows, i.e 'm'. 2521 You must leave room for the diagonal entry even if it is zero. 2522 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2523 submatrix (same value is used for all local rows). 2524 - o_nnz - array containing the number of nonzeros in the various rows of the 2525 OFF-DIAGONAL portion of the local submatrix (possibly different for 2526 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2527 structure. The size of this array is equal to the number 2528 of local rows, i.e 'm'. 2529 2530 If the *_nnz parameter is given then the *_nz parameter is ignored 2531 2532 The AIJ format (also called the Yale sparse matrix format or 2533 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2534 storage. The stored row and column indices begin with zero. See the users manual for details. 2535 2536 The parallel matrix is partitioned such that the first m0 rows belong to 2537 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2538 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2539 2540 The DIAGONAL portion of the local submatrix of a processor can be defined 2541 as the submatrix which is obtained by extraction the part corresponding 2542 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2543 first row that belongs to the processor, and r2 is the last row belonging 2544 to the this processor. This is a square mxm matrix. The remaining portion 2545 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2546 2547 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2548 2549 Example usage: 2550 2551 Consider the following 8x8 matrix with 34 non-zero values, that is 2552 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2553 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2554 as follows: 2555 2556 .vb 2557 1 2 0 | 0 3 0 | 0 4 2558 Proc0 0 5 6 | 7 0 0 | 8 0 2559 9 0 10 | 11 0 0 | 12 0 2560 ------------------------------------- 2561 13 0 14 | 15 16 17 | 0 0 2562 Proc1 0 18 0 | 19 20 21 | 0 0 2563 0 0 0 | 22 23 0 | 24 0 2564 ------------------------------------- 2565 Proc2 25 26 27 | 0 0 28 | 29 0 2566 30 0 0 | 31 32 33 | 0 34 2567 .ve 2568 2569 This can be represented as a collection of submatrices as: 2570 2571 .vb 2572 A B C 2573 D E F 2574 G H I 2575 .ve 2576 2577 Where the submatrices A,B,C are owned by proc0, D,E,F are 2578 owned by proc1, G,H,I are owned by proc2. 2579 2580 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2581 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2582 The 'M','N' parameters are 8,8, and have the same values on all procs. 2583 2584 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2585 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2586 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2587 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2588 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2589 matrix, ans [DF] as another SeqAIJ matrix. 2590 2591 When d_nz, o_nz parameters are specified, d_nz storage elements are 2592 allocated for every row of the local diagonal submatrix, and o_nz 2593 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2594 One way to choose d_nz and o_nz is to use the max nonzerors per local 2595 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2596 In this case, the values of d_nz,o_nz are: 2597 .vb 2598 proc0 : dnz = 2, o_nz = 2 2599 proc1 : dnz = 3, o_nz = 2 2600 proc2 : dnz = 1, o_nz = 4 2601 .ve 2602 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2603 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2604 for proc3. i.e we are using 12+15+10=37 storage locations to store 2605 34 values. 2606 2607 When d_nnz, o_nnz parameters are specified, the storage is specified 2608 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2609 In the above case the values for d_nnz,o_nnz are: 2610 .vb 2611 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2612 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2613 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2614 .ve 2615 Here the space allocated is sum of all the above values i.e 34, and 2616 hence pre-allocation is perfect. 2617 2618 Level: intermediate 2619 2620 .keywords: matrix, aij, compressed row, sparse, parallel 2621 2622 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2623 MPIAIJ 2624 @*/ 2625 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2626 { 2627 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2628 2629 PetscFunctionBegin; 2630 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2631 if (f) { 2632 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2633 } 2634 PetscFunctionReturn(0); 2635 } 2636 2637 #undef __FUNCT__ 2638 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2639 /*@C 2640 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2641 CSR format the local rows. 2642 2643 Collective on MPI_Comm 2644 2645 Input Parameters: 2646 + comm - MPI communicator 2647 . m - number of local rows (Cannot be PETSC_DECIDE) 2648 . n - This value should be the same as the local size used in creating the 2649 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2650 calculated if N is given) For square matrices n is almost always m. 2651 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2652 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2653 . i - row indices 2654 . j - column indices 2655 - a - matrix values 2656 2657 Output Parameter: 2658 . mat - the matrix 2659 2660 Level: intermediate 2661 2662 Notes: 2663 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2664 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2665 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2666 2667 The i and j indices are 0 based 2668 2669 .keywords: matrix, aij, compressed row, sparse, parallel 2670 2671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2672 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2673 @*/ 2674 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2675 { 2676 PetscErrorCode ierr; 2677 2678 PetscFunctionBegin; 2679 if (i[0]) { 2680 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2681 } 2682 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2683 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2684 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2685 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatCreateMPIAIJ" 2691 /*@C 2692 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2693 (the default parallel PETSc format). For good matrix assembly performance 2694 the user should preallocate the matrix storage by setting the parameters 2695 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2696 performance can be increased by more than a factor of 50. 2697 2698 Collective on MPI_Comm 2699 2700 Input Parameters: 2701 + comm - MPI communicator 2702 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2703 This value should be the same as the local size used in creating the 2704 y vector for the matrix-vector product y = Ax. 2705 . n - This value should be the same as the local size used in creating the 2706 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2707 calculated if N is given) For square matrices n is almost always m. 2708 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2709 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2710 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2711 (same value is used for all local rows) 2712 . d_nnz - array containing the number of nonzeros in the various rows of the 2713 DIAGONAL portion of the local submatrix (possibly different for each row) 2714 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2715 The size of this array is equal to the number of local rows, i.e 'm'. 2716 You must leave room for the diagonal entry even if it is zero. 2717 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2718 submatrix (same value is used for all local rows). 2719 - o_nnz - array containing the number of nonzeros in the various rows of the 2720 OFF-DIAGONAL portion of the local submatrix (possibly different for 2721 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2722 structure. The size of this array is equal to the number 2723 of local rows, i.e 'm'. 2724 2725 Output Parameter: 2726 . A - the matrix 2727 2728 Notes: 2729 If the *_nnz parameter is given then the *_nz parameter is ignored 2730 2731 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2732 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2733 storage requirements for this matrix. 2734 2735 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2736 processor than it must be used on all processors that share the object for 2737 that argument. 2738 2739 The user MUST specify either the local or global matrix dimensions 2740 (possibly both). 2741 2742 The parallel matrix is partitioned such that the first m0 rows belong to 2743 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2744 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2745 2746 The DIAGONAL portion of the local submatrix of a processor can be defined 2747 as the submatrix which is obtained by extraction the part corresponding 2748 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2749 first row that belongs to the processor, and r2 is the last row belonging 2750 to the this processor. This is a square mxm matrix. The remaining portion 2751 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2752 2753 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2754 2755 When calling this routine with a single process communicator, a matrix of 2756 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2757 type of communicator, use the construction mechanism: 2758 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2759 2760 By default, this format uses inodes (identical nodes) when possible. 2761 We search for consecutive rows with the same nonzero structure, thereby 2762 reusing matrix information to achieve increased efficiency. 2763 2764 Options Database Keys: 2765 + -mat_no_inode - Do not use inodes 2766 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2767 - -mat_aij_oneindex - Internally use indexing starting at 1 2768 rather than 0. Note that when calling MatSetValues(), 2769 the user still MUST index entries starting at 0! 2770 2771 2772 Example usage: 2773 2774 Consider the following 8x8 matrix with 34 non-zero values, that is 2775 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2776 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2777 as follows: 2778 2779 .vb 2780 1 2 0 | 0 3 0 | 0 4 2781 Proc0 0 5 6 | 7 0 0 | 8 0 2782 9 0 10 | 11 0 0 | 12 0 2783 ------------------------------------- 2784 13 0 14 | 15 16 17 | 0 0 2785 Proc1 0 18 0 | 19 20 21 | 0 0 2786 0 0 0 | 22 23 0 | 24 0 2787 ------------------------------------- 2788 Proc2 25 26 27 | 0 0 28 | 29 0 2789 30 0 0 | 31 32 33 | 0 34 2790 .ve 2791 2792 This can be represented as a collection of submatrices as: 2793 2794 .vb 2795 A B C 2796 D E F 2797 G H I 2798 .ve 2799 2800 Where the submatrices A,B,C are owned by proc0, D,E,F are 2801 owned by proc1, G,H,I are owned by proc2. 2802 2803 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2804 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2805 The 'M','N' parameters are 8,8, and have the same values on all procs. 2806 2807 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2808 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2809 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2810 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2811 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2812 matrix, ans [DF] as another SeqAIJ matrix. 2813 2814 When d_nz, o_nz parameters are specified, d_nz storage elements are 2815 allocated for every row of the local diagonal submatrix, and o_nz 2816 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2817 One way to choose d_nz and o_nz is to use the max nonzerors per local 2818 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2819 In this case, the values of d_nz,o_nz are: 2820 .vb 2821 proc0 : dnz = 2, o_nz = 2 2822 proc1 : dnz = 3, o_nz = 2 2823 proc2 : dnz = 1, o_nz = 4 2824 .ve 2825 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2826 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2827 for proc3. i.e we are using 12+15+10=37 storage locations to store 2828 34 values. 2829 2830 When d_nnz, o_nnz parameters are specified, the storage is specified 2831 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2832 In the above case the values for d_nnz,o_nnz are: 2833 .vb 2834 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2835 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2836 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2837 .ve 2838 Here the space allocated is sum of all the above values i.e 34, and 2839 hence pre-allocation is perfect. 2840 2841 Level: intermediate 2842 2843 .keywords: matrix, aij, compressed row, sparse, parallel 2844 2845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2846 MPIAIJ, MatCreateMPIAIJWithArrays() 2847 @*/ 2848 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2849 { 2850 PetscErrorCode ierr; 2851 PetscMPIInt size; 2852 2853 PetscFunctionBegin; 2854 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2855 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2857 if (size > 1) { 2858 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2859 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2860 } else { 2861 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2862 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2863 } 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2869 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2870 { 2871 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2872 2873 PetscFunctionBegin; 2874 *Ad = a->A; 2875 *Ao = a->B; 2876 *colmap = a->garray; 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2882 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2883 { 2884 PetscErrorCode ierr; 2885 PetscInt i; 2886 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2887 2888 PetscFunctionBegin; 2889 if (coloring->ctype == IS_COLORING_GLOBAL) { 2890 ISColoringValue *allcolors,*colors; 2891 ISColoring ocoloring; 2892 2893 /* set coloring for diagonal portion */ 2894 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2895 2896 /* set coloring for off-diagonal portion */ 2897 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2898 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2899 for (i=0; i<a->B->cmap.n; i++) { 2900 colors[i] = allcolors[a->garray[i]]; 2901 } 2902 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2903 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2904 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2905 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2906 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2907 ISColoringValue *colors; 2908 PetscInt *larray; 2909 ISColoring ocoloring; 2910 2911 /* set coloring for diagonal portion */ 2912 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2913 for (i=0; i<a->A->cmap.n; i++) { 2914 larray[i] = i + A->cmap.rstart; 2915 } 2916 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2917 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2918 for (i=0; i<a->A->cmap.n; i++) { 2919 colors[i] = coloring->colors[larray[i]]; 2920 } 2921 ierr = PetscFree(larray);CHKERRQ(ierr); 2922 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2923 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2924 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2925 2926 /* set coloring for off-diagonal portion */ 2927 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2928 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2929 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2930 for (i=0; i<a->B->cmap.n; i++) { 2931 colors[i] = coloring->colors[larray[i]]; 2932 } 2933 ierr = PetscFree(larray);CHKERRQ(ierr); 2934 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2935 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2936 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2937 } else { 2938 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2939 } 2940 2941 PetscFunctionReturn(0); 2942 } 2943 2944 #if defined(PETSC_HAVE_ADIC) 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2947 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2948 { 2949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2954 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 #endif 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2961 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2962 { 2963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2964 PetscErrorCode ierr; 2965 2966 PetscFunctionBegin; 2967 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2968 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatMerge" 2974 /*@C 2975 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2976 matrices from each processor 2977 2978 Collective on MPI_Comm 2979 2980 Input Parameters: 2981 + comm - the communicators the parallel matrix will live on 2982 . inmat - the input sequential matrices 2983 . n - number of local columns (or PETSC_DECIDE) 2984 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2985 2986 Output Parameter: 2987 . outmat - the parallel matrix generated 2988 2989 Level: advanced 2990 2991 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2992 2993 @*/ 2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2995 { 2996 PetscErrorCode ierr; 2997 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2998 PetscInt *indx; 2999 PetscScalar *values; 3000 3001 PetscFunctionBegin; 3002 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3003 if (scall == MAT_INITIAL_MATRIX){ 3004 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3005 if (n == PETSC_DECIDE){ 3006 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3007 } 3008 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3009 rstart -= m; 3010 3011 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3012 for (i=0;i<m;i++) { 3013 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3014 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3015 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3016 } 3017 /* This routine will ONLY return MPIAIJ type matrix */ 3018 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3019 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3020 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3021 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3022 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3023 3024 } else if (scall == MAT_REUSE_MATRIX){ 3025 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3026 } else { 3027 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3028 } 3029 3030 for (i=0;i<m;i++) { 3031 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3032 Ii = i + rstart; 3033 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3034 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3035 } 3036 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3037 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 3040 PetscFunctionReturn(0); 3041 } 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatFileSplit" 3045 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3046 { 3047 PetscErrorCode ierr; 3048 PetscMPIInt rank; 3049 PetscInt m,N,i,rstart,nnz; 3050 size_t len; 3051 const PetscInt *indx; 3052 PetscViewer out; 3053 char *name; 3054 Mat B; 3055 const PetscScalar *values; 3056 3057 PetscFunctionBegin; 3058 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3059 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3060 /* Should this be the type of the diagonal block of A? */ 3061 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3062 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3063 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3064 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3065 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3066 for (i=0;i<m;i++) { 3067 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3068 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3069 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3070 } 3071 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3072 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 3074 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3075 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3076 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3077 sprintf(name,"%s.%d",outfile,rank); 3078 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3079 ierr = PetscFree(name); 3080 ierr = MatView(B,out);CHKERRQ(ierr); 3081 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3082 ierr = MatDestroy(B);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3089 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3090 { 3091 PetscErrorCode ierr; 3092 Mat_Merge_SeqsToMPI *merge; 3093 PetscContainer container; 3094 3095 PetscFunctionBegin; 3096 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3097 if (container) { 3098 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3099 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3100 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3101 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3102 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3103 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3104 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3105 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3106 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3107 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3108 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3109 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3110 3111 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3112 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3113 } 3114 ierr = PetscFree(merge);CHKERRQ(ierr); 3115 3116 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 3120 #include "src/mat/utils/freespace.h" 3121 #include "petscbt.h" 3122 static PetscEvent logkey_seqstompinum = 0; 3123 #undef __FUNCT__ 3124 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3125 /*@C 3126 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3127 matrices from each processor 3128 3129 Collective on MPI_Comm 3130 3131 Input Parameters: 3132 + comm - the communicators the parallel matrix will live on 3133 . seqmat - the input sequential matrices 3134 . m - number of local rows (or PETSC_DECIDE) 3135 . n - number of local columns (or PETSC_DECIDE) 3136 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3137 3138 Output Parameter: 3139 . mpimat - the parallel matrix generated 3140 3141 Level: advanced 3142 3143 Notes: 3144 The dimensions of the sequential matrix in each processor MUST be the same. 3145 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3146 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3147 @*/ 3148 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3149 { 3150 PetscErrorCode ierr; 3151 MPI_Comm comm=mpimat->comm; 3152 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3153 PetscMPIInt size,rank,taga,*len_s; 3154 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3155 PetscInt proc,m; 3156 PetscInt **buf_ri,**buf_rj; 3157 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3158 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3159 MPI_Request *s_waits,*r_waits; 3160 MPI_Status *status; 3161 MatScalar *aa=a->a,**abuf_r,*ba_i; 3162 Mat_Merge_SeqsToMPI *merge; 3163 PetscContainer container; 3164 3165 PetscFunctionBegin; 3166 if (!logkey_seqstompinum) { 3167 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3168 } 3169 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3170 3171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3172 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3173 3174 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3175 if (container) { 3176 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3177 } 3178 bi = merge->bi; 3179 bj = merge->bj; 3180 buf_ri = merge->buf_ri; 3181 buf_rj = merge->buf_rj; 3182 3183 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3184 owners = merge->rowmap.range; 3185 len_s = merge->len_s; 3186 3187 /* send and recv matrix values */ 3188 /*-----------------------------*/ 3189 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3190 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3191 3192 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3193 for (proc=0,k=0; proc<size; proc++){ 3194 if (!len_s[proc]) continue; 3195 i = owners[proc]; 3196 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3197 k++; 3198 } 3199 3200 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3201 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3202 ierr = PetscFree(status);CHKERRQ(ierr); 3203 3204 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3205 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3206 3207 /* insert mat values of mpimat */ 3208 /*----------------------------*/ 3209 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3210 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3211 nextrow = buf_ri_k + merge->nrecv; 3212 nextai = nextrow + merge->nrecv; 3213 3214 for (k=0; k<merge->nrecv; k++){ 3215 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3216 nrows = *(buf_ri_k[k]); 3217 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3218 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3219 } 3220 3221 /* set values of ba */ 3222 m = merge->rowmap.n; 3223 for (i=0; i<m; i++) { 3224 arow = owners[rank] + i; 3225 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3226 bnzi = bi[i+1] - bi[i]; 3227 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3228 3229 /* add local non-zero vals of this proc's seqmat into ba */ 3230 anzi = ai[arow+1] - ai[arow]; 3231 aj = a->j + ai[arow]; 3232 aa = a->a + ai[arow]; 3233 nextaj = 0; 3234 for (j=0; nextaj<anzi; j++){ 3235 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3236 ba_i[j] += aa[nextaj++]; 3237 } 3238 } 3239 3240 /* add received vals into ba */ 3241 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3242 /* i-th row */ 3243 if (i == *nextrow[k]) { 3244 anzi = *(nextai[k]+1) - *nextai[k]; 3245 aj = buf_rj[k] + *(nextai[k]); 3246 aa = abuf_r[k] + *(nextai[k]); 3247 nextaj = 0; 3248 for (j=0; nextaj<anzi; j++){ 3249 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3250 ba_i[j] += aa[nextaj++]; 3251 } 3252 } 3253 nextrow[k]++; nextai[k]++; 3254 } 3255 } 3256 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3257 } 3258 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3259 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3260 3261 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3262 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3263 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3264 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 static PetscEvent logkey_seqstompisym = 0; 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3271 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3272 { 3273 PetscErrorCode ierr; 3274 Mat B_mpi; 3275 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3276 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3277 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3278 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3279 PetscInt len,proc,*dnz,*onz; 3280 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3281 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3282 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3283 MPI_Status *status; 3284 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3285 PetscBT lnkbt; 3286 Mat_Merge_SeqsToMPI *merge; 3287 PetscContainer container; 3288 3289 PetscFunctionBegin; 3290 if (!logkey_seqstompisym) { 3291 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3292 } 3293 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3294 3295 /* make sure it is a PETSc comm */ 3296 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3297 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3298 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3299 3300 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3301 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3302 3303 /* determine row ownership */ 3304 /*---------------------------------------------------------*/ 3305 merge->rowmap.n = m; 3306 merge->rowmap.N = M; 3307 merge->rowmap.bs = 1; 3308 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3309 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3310 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3311 3312 m = merge->rowmap.n; 3313 M = merge->rowmap.N; 3314 owners = merge->rowmap.range; 3315 3316 /* determine the number of messages to send, their lengths */ 3317 /*---------------------------------------------------------*/ 3318 len_s = merge->len_s; 3319 3320 len = 0; /* length of buf_si[] */ 3321 merge->nsend = 0; 3322 for (proc=0; proc<size; proc++){ 3323 len_si[proc] = 0; 3324 if (proc == rank){ 3325 len_s[proc] = 0; 3326 } else { 3327 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3328 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3329 } 3330 if (len_s[proc]) { 3331 merge->nsend++; 3332 nrows = 0; 3333 for (i=owners[proc]; i<owners[proc+1]; i++){ 3334 if (ai[i+1] > ai[i]) nrows++; 3335 } 3336 len_si[proc] = 2*(nrows+1); 3337 len += len_si[proc]; 3338 } 3339 } 3340 3341 /* determine the number and length of messages to receive for ij-structure */ 3342 /*-------------------------------------------------------------------------*/ 3343 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3344 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3345 3346 /* post the Irecv of j-structure */ 3347 /*-------------------------------*/ 3348 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3349 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3350 3351 /* post the Isend of j-structure */ 3352 /*--------------------------------*/ 3353 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3354 sj_waits = si_waits + merge->nsend; 3355 3356 for (proc=0, k=0; proc<size; proc++){ 3357 if (!len_s[proc]) continue; 3358 i = owners[proc]; 3359 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3360 k++; 3361 } 3362 3363 /* receives and sends of j-structure are complete */ 3364 /*------------------------------------------------*/ 3365 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3366 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3367 3368 /* send and recv i-structure */ 3369 /*---------------------------*/ 3370 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3371 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3372 3373 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3374 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3375 for (proc=0,k=0; proc<size; proc++){ 3376 if (!len_s[proc]) continue; 3377 /* form outgoing message for i-structure: 3378 buf_si[0]: nrows to be sent 3379 [1:nrows]: row index (global) 3380 [nrows+1:2*nrows+1]: i-structure index 3381 */ 3382 /*-------------------------------------------*/ 3383 nrows = len_si[proc]/2 - 1; 3384 buf_si_i = buf_si + nrows+1; 3385 buf_si[0] = nrows; 3386 buf_si_i[0] = 0; 3387 nrows = 0; 3388 for (i=owners[proc]; i<owners[proc+1]; i++){ 3389 anzi = ai[i+1] - ai[i]; 3390 if (anzi) { 3391 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3392 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3393 nrows++; 3394 } 3395 } 3396 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3397 k++; 3398 buf_si += len_si[proc]; 3399 } 3400 3401 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3402 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3403 3404 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3405 for (i=0; i<merge->nrecv; i++){ 3406 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3407 } 3408 3409 ierr = PetscFree(len_si);CHKERRQ(ierr); 3410 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3411 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3412 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3413 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3414 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3415 ierr = PetscFree(status);CHKERRQ(ierr); 3416 3417 /* compute a local seq matrix in each processor */ 3418 /*----------------------------------------------*/ 3419 /* allocate bi array and free space for accumulating nonzero column info */ 3420 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3421 bi[0] = 0; 3422 3423 /* create and initialize a linked list */ 3424 nlnk = N+1; 3425 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3426 3427 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3428 len = 0; 3429 len = ai[owners[rank+1]] - ai[owners[rank]]; 3430 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3431 current_space = free_space; 3432 3433 /* determine symbolic info for each local row */ 3434 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3435 nextrow = buf_ri_k + merge->nrecv; 3436 nextai = nextrow + merge->nrecv; 3437 for (k=0; k<merge->nrecv; k++){ 3438 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3439 nrows = *buf_ri_k[k]; 3440 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3441 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3442 } 3443 3444 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3445 len = 0; 3446 for (i=0;i<m;i++) { 3447 bnzi = 0; 3448 /* add local non-zero cols of this proc's seqmat into lnk */ 3449 arow = owners[rank] + i; 3450 anzi = ai[arow+1] - ai[arow]; 3451 aj = a->j + ai[arow]; 3452 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3453 bnzi += nlnk; 3454 /* add received col data into lnk */ 3455 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3456 if (i == *nextrow[k]) { /* i-th row */ 3457 anzi = *(nextai[k]+1) - *nextai[k]; 3458 aj = buf_rj[k] + *nextai[k]; 3459 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3460 bnzi += nlnk; 3461 nextrow[k]++; nextai[k]++; 3462 } 3463 } 3464 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3465 3466 /* if free space is not available, make more free space */ 3467 if (current_space->local_remaining<bnzi) { 3468 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3469 nspacedouble++; 3470 } 3471 /* copy data into free space, then initialize lnk */ 3472 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3473 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3474 3475 current_space->array += bnzi; 3476 current_space->local_used += bnzi; 3477 current_space->local_remaining -= bnzi; 3478 3479 bi[i+1] = bi[i] + bnzi; 3480 } 3481 3482 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3483 3484 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3485 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3486 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3487 3488 /* create symbolic parallel matrix B_mpi */ 3489 /*---------------------------------------*/ 3490 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3491 if (n==PETSC_DECIDE) { 3492 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3493 } else { 3494 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3495 } 3496 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3497 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3498 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3499 3500 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3501 B_mpi->assembled = PETSC_FALSE; 3502 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3503 merge->bi = bi; 3504 merge->bj = bj; 3505 merge->buf_ri = buf_ri; 3506 merge->buf_rj = buf_rj; 3507 merge->coi = PETSC_NULL; 3508 merge->coj = PETSC_NULL; 3509 merge->owners_co = PETSC_NULL; 3510 3511 /* attach the supporting struct to B_mpi for reuse */ 3512 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3513 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 3514 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3515 *mpimat = B_mpi; 3516 3517 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3518 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 static PetscEvent logkey_seqstompi = 0; 3523 #undef __FUNCT__ 3524 #define __FUNCT__ "MatMerge_SeqsToMPI" 3525 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3526 { 3527 PetscErrorCode ierr; 3528 3529 PetscFunctionBegin; 3530 if (!logkey_seqstompi) { 3531 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3532 } 3533 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3534 if (scall == MAT_INITIAL_MATRIX){ 3535 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3536 } 3537 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3538 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3539 PetscFunctionReturn(0); 3540 } 3541 static PetscEvent logkey_getlocalmat = 0; 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatGetLocalMat" 3544 /*@C 3545 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3546 3547 Not Collective 3548 3549 Input Parameters: 3550 + A - the matrix 3551 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3552 3553 Output Parameter: 3554 . A_loc - the local sequential matrix generated 3555 3556 Level: developer 3557 3558 @*/ 3559 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3560 { 3561 PetscErrorCode ierr; 3562 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3563 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3564 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3565 PetscScalar *aa=a->a,*ba=b->a,*ca; 3566 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3567 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3568 3569 PetscFunctionBegin; 3570 if (!logkey_getlocalmat) { 3571 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3572 } 3573 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3574 if (scall == MAT_INITIAL_MATRIX){ 3575 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3576 ci[0] = 0; 3577 for (i=0; i<am; i++){ 3578 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3579 } 3580 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3581 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3582 k = 0; 3583 for (i=0; i<am; i++) { 3584 ncols_o = bi[i+1] - bi[i]; 3585 ncols_d = ai[i+1] - ai[i]; 3586 /* off-diagonal portion of A */ 3587 for (jo=0; jo<ncols_o; jo++) { 3588 col = cmap[*bj]; 3589 if (col >= cstart) break; 3590 cj[k] = col; bj++; 3591 ca[k++] = *ba++; 3592 } 3593 /* diagonal portion of A */ 3594 for (j=0; j<ncols_d; j++) { 3595 cj[k] = cstart + *aj++; 3596 ca[k++] = *aa++; 3597 } 3598 /* off-diagonal portion of A */ 3599 for (j=jo; j<ncols_o; j++) { 3600 cj[k] = cmap[*bj++]; 3601 ca[k++] = *ba++; 3602 } 3603 } 3604 /* put together the new matrix */ 3605 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3606 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3607 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3608 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3609 mat->free_a = PETSC_TRUE; 3610 mat->free_ij = PETSC_TRUE; 3611 mat->nonew = 0; 3612 } else if (scall == MAT_REUSE_MATRIX){ 3613 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3614 ci = mat->i; cj = mat->j; ca = mat->a; 3615 for (i=0; i<am; i++) { 3616 /* off-diagonal portion of A */ 3617 ncols_o = bi[i+1] - bi[i]; 3618 for (jo=0; jo<ncols_o; jo++) { 3619 col = cmap[*bj]; 3620 if (col >= cstart) break; 3621 *ca++ = *ba++; bj++; 3622 } 3623 /* diagonal portion of A */ 3624 ncols_d = ai[i+1] - ai[i]; 3625 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3626 /* off-diagonal portion of A */ 3627 for (j=jo; j<ncols_o; j++) { 3628 *ca++ = *ba++; bj++; 3629 } 3630 } 3631 } else { 3632 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3633 } 3634 3635 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3636 PetscFunctionReturn(0); 3637 } 3638 3639 static PetscEvent logkey_getlocalmatcondensed = 0; 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatGetLocalMatCondensed" 3642 /*@C 3643 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3644 3645 Not Collective 3646 3647 Input Parameters: 3648 + A - the matrix 3649 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3650 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3651 3652 Output Parameter: 3653 . A_loc - the local sequential matrix generated 3654 3655 Level: developer 3656 3657 @*/ 3658 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3659 { 3660 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3661 PetscErrorCode ierr; 3662 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3663 IS isrowa,iscola; 3664 Mat *aloc; 3665 3666 PetscFunctionBegin; 3667 if (!logkey_getlocalmatcondensed) { 3668 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3669 } 3670 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3671 if (!row){ 3672 start = A->rmap.rstart; end = A->rmap.rend; 3673 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3674 } else { 3675 isrowa = *row; 3676 } 3677 if (!col){ 3678 start = A->cmap.rstart; 3679 cmap = a->garray; 3680 nzA = a->A->cmap.n; 3681 nzB = a->B->cmap.n; 3682 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3683 ncols = 0; 3684 for (i=0; i<nzB; i++) { 3685 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3686 else break; 3687 } 3688 imark = i; 3689 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3690 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3691 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3692 ierr = PetscFree(idx);CHKERRQ(ierr); 3693 } else { 3694 iscola = *col; 3695 } 3696 if (scall != MAT_INITIAL_MATRIX){ 3697 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3698 aloc[0] = *A_loc; 3699 } 3700 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3701 *A_loc = aloc[0]; 3702 ierr = PetscFree(aloc);CHKERRQ(ierr); 3703 if (!row){ 3704 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3705 } 3706 if (!col){ 3707 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3708 } 3709 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3710 PetscFunctionReturn(0); 3711 } 3712 3713 static PetscEvent logkey_GetBrowsOfAcols = 0; 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatGetBrowsOfAcols" 3716 /*@C 3717 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3718 3719 Collective on Mat 3720 3721 Input Parameters: 3722 + A,B - the matrices in mpiaij format 3723 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3724 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3725 3726 Output Parameter: 3727 + rowb, colb - index sets of rows and columns of B to extract 3728 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3729 - B_seq - the sequential matrix generated 3730 3731 Level: developer 3732 3733 @*/ 3734 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3735 { 3736 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3737 PetscErrorCode ierr; 3738 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3739 IS isrowb,iscolb; 3740 Mat *bseq; 3741 3742 PetscFunctionBegin; 3743 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3744 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3745 } 3746 if (!logkey_GetBrowsOfAcols) { 3747 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3748 } 3749 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3750 3751 if (scall == MAT_INITIAL_MATRIX){ 3752 start = A->cmap.rstart; 3753 cmap = a->garray; 3754 nzA = a->A->cmap.n; 3755 nzB = a->B->cmap.n; 3756 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3757 ncols = 0; 3758 for (i=0; i<nzB; i++) { /* row < local row index */ 3759 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3760 else break; 3761 } 3762 imark = i; 3763 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3764 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3765 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3766 ierr = PetscFree(idx);CHKERRQ(ierr); 3767 *brstart = imark; 3768 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3769 } else { 3770 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3771 isrowb = *rowb; iscolb = *colb; 3772 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3773 bseq[0] = *B_seq; 3774 } 3775 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3776 *B_seq = bseq[0]; 3777 ierr = PetscFree(bseq);CHKERRQ(ierr); 3778 if (!rowb){ 3779 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3780 } else { 3781 *rowb = isrowb; 3782 } 3783 if (!colb){ 3784 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3785 } else { 3786 *colb = iscolb; 3787 } 3788 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3789 PetscFunctionReturn(0); 3790 } 3791 3792 static PetscEvent logkey_GetBrowsOfAocols = 0; 3793 #undef __FUNCT__ 3794 #define __FUNCT__ "MatGetBrowsOfAoCols" 3795 /*@C 3796 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3797 of the OFF-DIAGONAL portion of local A 3798 3799 Collective on Mat 3800 3801 Input Parameters: 3802 + A,B - the matrices in mpiaij format 3803 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3804 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3805 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3806 3807 Output Parameter: 3808 + B_oth - the sequential matrix generated 3809 3810 Level: developer 3811 3812 @*/ 3813 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3814 { 3815 VecScatter_MPI_General *gen_to,*gen_from; 3816 PetscErrorCode ierr; 3817 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3818 Mat_SeqAIJ *b_oth; 3819 VecScatter ctx=a->Mvctx; 3820 MPI_Comm comm=ctx->comm; 3821 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3822 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3823 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3824 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3825 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3826 MPI_Status *sstatus,rstatus; 3827 PetscInt *cols,sbs,rbs; 3828 PetscScalar *vals; 3829 3830 PetscFunctionBegin; 3831 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3832 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3833 } 3834 if (!logkey_GetBrowsOfAocols) { 3835 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3836 } 3837 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3838 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3839 3840 gen_to = (VecScatter_MPI_General*)ctx->todata; 3841 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3842 rvalues = gen_from->values; /* holds the length of receiving row */ 3843 svalues = gen_to->values; /* holds the length of sending row */ 3844 nrecvs = gen_from->n; 3845 nsends = gen_to->n; 3846 3847 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3848 srow = gen_to->indices; /* local row index to be sent */ 3849 sstarts = gen_to->starts; 3850 sprocs = gen_to->procs; 3851 sstatus = gen_to->sstatus; 3852 sbs = gen_to->bs; 3853 rstarts = gen_from->starts; 3854 rprocs = gen_from->procs; 3855 rbs = gen_from->bs; 3856 3857 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3858 if (scall == MAT_INITIAL_MATRIX){ 3859 /* i-array */ 3860 /*---------*/ 3861 /* post receives */ 3862 for (i=0; i<nrecvs; i++){ 3863 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 3864 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 3865 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3866 } 3867 3868 /* pack the outgoing message */ 3869 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3870 rstartsj = sstartsj + nsends +1; 3871 sstartsj[0] = 0; rstartsj[0] = 0; 3872 len = 0; /* total length of j or a array to be sent */ 3873 k = 0; 3874 for (i=0; i<nsends; i++){ 3875 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 3876 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3877 for (j=0; j<nrows; j++) { 3878 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3879 for (l=0; l<sbs; l++){ 3880 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3881 rowlen[j*sbs+l] = ncols; 3882 len += ncols; 3883 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3884 } 3885 k++; 3886 } 3887 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3888 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3889 } 3890 /* recvs and sends of i-array are completed */ 3891 i = nrecvs; 3892 while (i--) { 3893 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3894 } 3895 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3896 3897 /* allocate buffers for sending j and a arrays */ 3898 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3899 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3900 3901 /* create i-array of B_oth */ 3902 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3903 b_othi[0] = 0; 3904 len = 0; /* total length of j or a array to be received */ 3905 k = 0; 3906 for (i=0; i<nrecvs; i++){ 3907 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 3908 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 3909 for (j=0; j<nrows; j++) { 3910 b_othi[k+1] = b_othi[k] + rowlen[j]; 3911 len += rowlen[j]; k++; 3912 } 3913 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3914 } 3915 3916 /* allocate space for j and a arrrays of B_oth */ 3917 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3918 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3919 3920 /* j-array */ 3921 /*---------*/ 3922 /* post receives of j-array */ 3923 for (i=0; i<nrecvs; i++){ 3924 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3925 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3926 } 3927 3928 /* pack the outgoing message j-array */ 3929 k = 0; 3930 for (i=0; i<nsends; i++){ 3931 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3932 bufJ = bufj+sstartsj[i]; 3933 for (j=0; j<nrows; j++) { 3934 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3935 for (ll=0; ll<sbs; ll++){ 3936 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3937 for (l=0; l<ncols; l++){ 3938 *bufJ++ = cols[l]; 3939 } 3940 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3941 } 3942 } 3943 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3944 } 3945 3946 /* recvs and sends of j-array are completed */ 3947 i = nrecvs; 3948 while (i--) { 3949 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3950 } 3951 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3952 } else if (scall == MAT_REUSE_MATRIX){ 3953 sstartsj = *startsj; 3954 rstartsj = sstartsj + nsends +1; 3955 bufa = *bufa_ptr; 3956 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3957 b_otha = b_oth->a; 3958 } else { 3959 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3960 } 3961 3962 /* a-array */ 3963 /*---------*/ 3964 /* post receives of a-array */ 3965 for (i=0; i<nrecvs; i++){ 3966 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3967 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3968 } 3969 3970 /* pack the outgoing message a-array */ 3971 k = 0; 3972 for (i=0; i<nsends; i++){ 3973 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 3974 bufA = bufa+sstartsj[i]; 3975 for (j=0; j<nrows; j++) { 3976 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3977 for (ll=0; ll<sbs; ll++){ 3978 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3979 for (l=0; l<ncols; l++){ 3980 *bufA++ = vals[l]; 3981 } 3982 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3983 } 3984 } 3985 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3986 } 3987 /* recvs and sends of a-array are completed */ 3988 i = nrecvs; 3989 while (i--) { 3990 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3991 } 3992 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3993 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3994 3995 if (scall == MAT_INITIAL_MATRIX){ 3996 /* put together the new matrix */ 3997 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3998 3999 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4000 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4001 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4002 b_oth->free_a = PETSC_TRUE; 4003 b_oth->free_ij = PETSC_TRUE; 4004 b_oth->nonew = 0; 4005 4006 ierr = PetscFree(bufj);CHKERRQ(ierr); 4007 if (!startsj || !bufa_ptr){ 4008 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4009 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4010 } else { 4011 *startsj = sstartsj; 4012 *bufa_ptr = bufa; 4013 } 4014 } 4015 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatGetCommunicationStructs" 4021 /*@C 4022 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4023 4024 Not Collective 4025 4026 Input Parameters: 4027 . A - The matrix in mpiaij format 4028 4029 Output Parameter: 4030 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4031 . colmap - A map from global column index to local index into lvec 4032 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4033 4034 Level: developer 4035 4036 @*/ 4037 #if defined (PETSC_USE_CTABLE) 4038 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4039 #else 4040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4041 #endif 4042 { 4043 Mat_MPIAIJ *a; 4044 4045 PetscFunctionBegin; 4046 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4047 PetscValidPointer(lvec, 2) 4048 PetscValidPointer(colmap, 3) 4049 PetscValidPointer(multScatter, 4) 4050 a = (Mat_MPIAIJ *) A->data; 4051 if (lvec) *lvec = a->lvec; 4052 if (colmap) *colmap = a->colmap; 4053 if (multScatter) *multScatter = a->Mvctx; 4054 PetscFunctionReturn(0); 4055 } 4056 4057 EXTERN_C_BEGIN 4058 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4059 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4060 EXTERN_C_END 4061 4062 /*MC 4063 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4064 4065 Options Database Keys: 4066 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4067 4068 Level: beginner 4069 4070 .seealso: MatCreateMPIAIJ 4071 M*/ 4072 4073 EXTERN_C_BEGIN 4074 #undef __FUNCT__ 4075 #define __FUNCT__ "MatCreate_MPIAIJ" 4076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4077 { 4078 Mat_MPIAIJ *b; 4079 PetscErrorCode ierr; 4080 PetscMPIInt size; 4081 4082 PetscFunctionBegin; 4083 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4084 4085 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4086 B->data = (void*)b; 4087 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4088 B->factor = 0; 4089 B->rmap.bs = 1; 4090 B->assembled = PETSC_FALSE; 4091 B->mapping = 0; 4092 4093 B->insertmode = NOT_SET_VALUES; 4094 b->size = size; 4095 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4096 4097 /* build cache for off array entries formed */ 4098 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4099 b->donotstash = PETSC_FALSE; 4100 b->colmap = 0; 4101 b->garray = 0; 4102 b->roworiented = PETSC_TRUE; 4103 4104 /* stuff used for matrix vector multiply */ 4105 b->lvec = PETSC_NULL; 4106 b->Mvctx = PETSC_NULL; 4107 4108 /* stuff for MatGetRow() */ 4109 b->rowindices = 0; 4110 b->rowvalues = 0; 4111 b->getrowactive = PETSC_FALSE; 4112 4113 4114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4115 "MatStoreValues_MPIAIJ", 4116 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4118 "MatRetrieveValues_MPIAIJ", 4119 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4121 "MatGetDiagonalBlock_MPIAIJ", 4122 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4124 "MatIsTranspose_MPIAIJ", 4125 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4126 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4127 "MatMPIAIJSetPreallocation_MPIAIJ", 4128 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4130 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4131 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4132 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4133 "MatDiagonalScaleLocal_MPIAIJ", 4134 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4135 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4136 "MatConvert_MPIAIJ_MPICSRPERM", 4137 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4139 "MatConvert_MPIAIJ_MPICRL", 4140 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4141 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4142 PetscFunctionReturn(0); 4143 } 4144 EXTERN_C_END 4145 4146 #undef __FUNCT__ 4147 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4148 /*@C 4149 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4150 and "off-diagonal" part of the matrix in CSR format. 4151 4152 Collective on MPI_Comm 4153 4154 Input Parameters: 4155 + comm - MPI communicator 4156 . m - number of local rows (Cannot be PETSC_DECIDE) 4157 . n - This value should be the same as the local size used in creating the 4158 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4159 calculated if N is given) For square matrices n is almost always m. 4160 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4161 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4162 . i - row indices for "diagonal" portion of matrix 4163 . j - column indices 4164 . a - matrix values 4165 . oi - row indices for "off-diagonal" portion of matrix 4166 . oj - column indices 4167 - oa - matrix values 4168 4169 Output Parameter: 4170 . mat - the matrix 4171 4172 Level: advanced 4173 4174 Notes: 4175 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4176 4177 The i and j indices are 0 based 4178 4179 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4180 4181 4182 .keywords: matrix, aij, compressed row, sparse, parallel 4183 4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4185 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4186 @*/ 4187 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4188 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4189 { 4190 PetscErrorCode ierr; 4191 Mat_MPIAIJ *maij; 4192 4193 PetscFunctionBegin; 4194 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4195 if (i[0]) { 4196 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4197 } 4198 if (oi[0]) { 4199 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4200 } 4201 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4202 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4203 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4204 maij = (Mat_MPIAIJ*) (*mat)->data; 4205 maij->donotstash = PETSC_TRUE; 4206 (*mat)->preallocated = PETSC_TRUE; 4207 4208 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4209 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4210 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4211 4212 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4213 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4214 4215 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4216 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4217 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4219 4220 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4221 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4222 PetscFunctionReturn(0); 4223 } 4224 4225 /* 4226 Special version for direct calls from Fortran 4227 */ 4228 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4229 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4230 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4231 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4232 #endif 4233 4234 /* Change these macros so can be used in void function */ 4235 #undef CHKERRQ 4236 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4237 #undef SETERRQ2 4238 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4239 #undef SETERRQ 4240 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4241 4242 EXTERN_C_BEGIN 4243 #undef __FUNCT__ 4244 #define __FUNCT__ "matsetvaluesmpiaij_" 4245 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4246 { 4247 Mat mat = *mmat; 4248 PetscInt m = *mm, n = *mn; 4249 InsertMode addv = *maddv; 4250 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4251 PetscScalar value; 4252 PetscErrorCode ierr; 4253 4254 MatPreallocated(mat); 4255 if (mat->insertmode == NOT_SET_VALUES) { 4256 mat->insertmode = addv; 4257 } 4258 #if defined(PETSC_USE_DEBUG) 4259 else if (mat->insertmode != addv) { 4260 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4261 } 4262 #endif 4263 { 4264 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4265 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4266 PetscTruth roworiented = aij->roworiented; 4267 4268 /* Some Variables required in the macro */ 4269 Mat A = aij->A; 4270 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4271 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4272 PetscScalar *aa = a->a; 4273 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4274 Mat B = aij->B; 4275 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4276 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4277 PetscScalar *ba = b->a; 4278 4279 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4280 PetscInt nonew = a->nonew; 4281 PetscScalar *ap1,*ap2; 4282 4283 PetscFunctionBegin; 4284 for (i=0; i<m; i++) { 4285 if (im[i] < 0) continue; 4286 #if defined(PETSC_USE_DEBUG) 4287 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4288 #endif 4289 if (im[i] >= rstart && im[i] < rend) { 4290 row = im[i] - rstart; 4291 lastcol1 = -1; 4292 rp1 = aj + ai[row]; 4293 ap1 = aa + ai[row]; 4294 rmax1 = aimax[row]; 4295 nrow1 = ailen[row]; 4296 low1 = 0; 4297 high1 = nrow1; 4298 lastcol2 = -1; 4299 rp2 = bj + bi[row]; 4300 ap2 = ba + bi[row]; 4301 rmax2 = bimax[row]; 4302 nrow2 = bilen[row]; 4303 low2 = 0; 4304 high2 = nrow2; 4305 4306 for (j=0; j<n; j++) { 4307 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4308 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4309 if (in[j] >= cstart && in[j] < cend){ 4310 col = in[j] - cstart; 4311 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4312 } else if (in[j] < 0) continue; 4313 #if defined(PETSC_USE_DEBUG) 4314 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4315 #endif 4316 else { 4317 if (mat->was_assembled) { 4318 if (!aij->colmap) { 4319 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4320 } 4321 #if defined (PETSC_USE_CTABLE) 4322 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4323 col--; 4324 #else 4325 col = aij->colmap[in[j]] - 1; 4326 #endif 4327 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4328 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4329 col = in[j]; 4330 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4331 B = aij->B; 4332 b = (Mat_SeqAIJ*)B->data; 4333 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4334 rp2 = bj + bi[row]; 4335 ap2 = ba + bi[row]; 4336 rmax2 = bimax[row]; 4337 nrow2 = bilen[row]; 4338 low2 = 0; 4339 high2 = nrow2; 4340 bm = aij->B->rmap.n; 4341 ba = b->a; 4342 } 4343 } else col = in[j]; 4344 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4345 } 4346 } 4347 } else { 4348 if (!aij->donotstash) { 4349 if (roworiented) { 4350 if (ignorezeroentries && v[i*n] == 0.0) continue; 4351 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4352 } else { 4353 if (ignorezeroentries && v[i] == 0.0) continue; 4354 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4355 } 4356 } 4357 } 4358 }} 4359 PetscFunctionReturnVoid(); 4360 } 4361 EXTERN_C_END 4362 4363