1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRelax_MPIAIJ" 1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscErrorCode ierr; 1021 Vec bb1; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatPermute_MPIAIJ" 1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1091 { 1092 MPI_Comm comm,pcomm; 1093 PetscInt first,local_size,nrows,*rows; 1094 int ntids; 1095 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1100 /* make a collective version of 'rowp' */ 1101 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1102 if (pcomm==comm) { 1103 crowp = rowp; 1104 } else { 1105 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1106 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1108 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1109 } 1110 /* collect the global row permutation and invert it */ 1111 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1112 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1113 if (pcomm!=comm) { 1114 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1115 } 1116 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1117 /* get the local target indices */ 1118 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1123 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1124 /* the column permutation is so much easier; 1125 make a local version of 'colp' and invert it */ 1126 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1127 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1128 if (ntids==1) { 1129 lcolp = colp; 1130 } else { 1131 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1132 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1133 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1134 } 1135 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1136 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1137 if (ntids>1) { 1138 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1139 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1140 } 1141 /* now we just get the submatrix */ 1142 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1143 /* clean up */ 1144 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1145 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1154 Mat A = mat->A,B = mat->B; 1155 PetscErrorCode ierr; 1156 PetscReal isend[5],irecv[5]; 1157 1158 PetscFunctionBegin; 1159 info->block_size = 1.0; 1160 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1162 isend[3] = info->memory; isend[4] = info->mallocs; 1163 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1164 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1165 isend[3] += info->memory; isend[4] += info->mallocs; 1166 if (flag == MAT_LOCAL) { 1167 info->nz_used = isend[0]; 1168 info->nz_allocated = isend[1]; 1169 info->nz_unneeded = isend[2]; 1170 info->memory = isend[3]; 1171 info->mallocs = isend[4]; 1172 } else if (flag == MAT_GLOBAL_MAX) { 1173 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1174 info->nz_used = irecv[0]; 1175 info->nz_allocated = irecv[1]; 1176 info->nz_unneeded = irecv[2]; 1177 info->memory = irecv[3]; 1178 info->mallocs = irecv[4]; 1179 } else if (flag == MAT_GLOBAL_SUM) { 1180 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1181 info->nz_used = irecv[0]; 1182 info->nz_allocated = irecv[1]; 1183 info->nz_unneeded = irecv[2]; 1184 info->memory = irecv[3]; 1185 info->mallocs = irecv[4]; 1186 } 1187 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1188 info->fill_ratio_needed = 0; 1189 info->factor_mallocs = 0; 1190 info->rows_global = (double)matin->rmap.N; 1191 info->columns_global = (double)matin->cmap.N; 1192 info->rows_local = (double)matin->rmap.n; 1193 info->columns_local = (double)matin->cmap.N; 1194 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSetOption_MPIAIJ" 1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 switch (op) { 1207 case MAT_NO_NEW_NONZERO_LOCATIONS: 1208 case MAT_YES_NEW_NONZERO_LOCATIONS: 1209 case MAT_COLUMNS_UNSORTED: 1210 case MAT_COLUMNS_SORTED: 1211 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1212 case MAT_KEEP_ZEROED_ROWS: 1213 case MAT_NEW_NONZERO_LOCATION_ERR: 1214 case MAT_USE_INODES: 1215 case MAT_DO_NOT_USE_INODES: 1216 case MAT_IGNORE_ZERO_ENTRIES: 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROW_ORIENTED: 1221 a->roworiented = PETSC_TRUE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_ROWS_SORTED: 1226 case MAT_ROWS_UNSORTED: 1227 case MAT_YES_NEW_DIAGONALS: 1228 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1229 break; 1230 case MAT_COLUMN_ORIENTED: 1231 a->roworiented = PETSC_FALSE; 1232 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1233 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1234 break; 1235 case MAT_IGNORE_OFF_PROC_ENTRIES: 1236 a->donotstash = PETSC_TRUE; 1237 break; 1238 case MAT_NO_NEW_DIAGONALS: 1239 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 break; 1248 case MAT_NOT_SYMMETRIC: 1249 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1250 case MAT_NOT_HERMITIAN: 1251 case MAT_NOT_SYMMETRY_ETERNAL: 1252 break; 1253 default: 1254 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRow_MPIAIJ" 1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1264 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1265 PetscErrorCode ierr; 1266 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1267 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1268 PetscInt *cmap,*idx_p; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1279 PetscInt max = 1,tmp; 1280 for (i=0; i<matin->rmap.n; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1285 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1286 } 1287 1288 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1289 lrow = row - rstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 PetscInt imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1311 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for (i=0; i<imark; i++) { 1317 idx_p[i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!aij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1349 } 1350 aij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatNorm_MPIAIJ" 1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,j,cstart = mat->cmap.rstart; 1362 PetscReal sum = 0.0; 1363 PetscScalar *v; 1364 1365 PetscFunctionBegin; 1366 if (aij->size == 1) { 1367 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1368 } else { 1369 if (type == NORM_FROBENIUS) { 1370 v = amat->a; 1371 for (i=0; i<amat->nz; i++) { 1372 #if defined(PETSC_USE_COMPLEX) 1373 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 v = bmat->a; 1379 for (i=0; i<bmat->nz; i++) { 1380 #if defined(PETSC_USE_COMPLEX) 1381 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1382 #else 1383 sum += (*v)*(*v); v++; 1384 #endif 1385 } 1386 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 *norm = sqrt(*norm); 1388 } else if (type == NORM_1) { /* max column norm */ 1389 PetscReal *tmp,*tmp2; 1390 PetscInt *jj,*garray = aij->garray; 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1393 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1394 *norm = 0.0; 1395 v = amat->a; jj = amat->j; 1396 for (j=0; j<amat->nz; j++) { 1397 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a; jj = bmat->j; 1400 for (j=0; j<bmat->nz; j++) { 1401 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1402 } 1403 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1404 for (j=0; j<mat->cmap.N; j++) { 1405 if (tmp2[j] > *norm) *norm = tmp2[j]; 1406 } 1407 ierr = PetscFree(tmp);CHKERRQ(ierr); 1408 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1409 } else if (type == NORM_INFINITY) { /* max row norm */ 1410 PetscReal ntemp = 0.0; 1411 for (j=0; j<aij->A->rmap.n; j++) { 1412 v = amat->a + amat->i[j]; 1413 sum = 0.0; 1414 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1415 sum += PetscAbsScalar(*v); v++; 1416 } 1417 v = bmat->a + bmat->i[j]; 1418 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1419 sum += PetscAbsScalar(*v); v++; 1420 } 1421 if (sum > ntemp) ntemp = sum; 1422 } 1423 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1424 } else { 1425 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1426 } 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatTranspose_MPIAIJ" 1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1436 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1437 PetscErrorCode ierr; 1438 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) { 1444 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1445 } 1446 1447 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1448 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1449 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1450 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1451 1452 /* copy over the A part */ 1453 Aloc = (Mat_SeqAIJ*)a->A->data; 1454 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1457 for (i=0; i<m; i++) { 1458 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1459 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1460 } 1461 aj = Aloc->j; 1462 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1463 1464 /* copy over the B part */ 1465 Aloc = (Mat_SeqAIJ*)a->B->data; 1466 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1467 row = A->rmap.rstart; 1468 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1469 ct = cols; 1470 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1471 for (i=0; i<m; i++) { 1472 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1473 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1474 } 1475 ierr = PetscFree(ct);CHKERRQ(ierr); 1476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 if (matout) { 1479 *matout = B; 1480 } else { 1481 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1489 { 1490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1491 Mat a = aij->A,b = aij->B; 1492 PetscErrorCode ierr; 1493 PetscInt s1,s2,s3; 1494 1495 PetscFunctionBegin; 1496 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1497 if (rr) { 1498 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1499 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1500 /* Overlap communication with computation. */ 1501 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1502 } 1503 if (ll) { 1504 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1505 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1506 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1507 } 1508 /* scale the diagonal block */ 1509 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1510 1511 if (rr) { 1512 /* Do a scatter end and then right scale the off-diagonal block */ 1513 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1514 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1515 } 1516 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1529 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1535 { 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatEqual_MPIAIJ" 1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1547 { 1548 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1549 Mat a,b,c,d; 1550 PetscTruth flg; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 a = matA->A; b = matA->B; 1555 c = matB->A; d = matB->B; 1556 1557 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1558 if (flg) { 1559 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1560 } 1561 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatCopy_MPIAIJ" 1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1568 { 1569 PetscErrorCode ierr; 1570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1571 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1572 1573 PetscFunctionBegin; 1574 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1575 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1576 /* because of the column compression in the off-processor part of the matrix a->B, 1577 the number of columns in a->B and b->B may be different, hence we cannot call 1578 the MatCopy() directly on the two parts. If need be, we can provide a more 1579 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1580 then copying the submatrices */ 1581 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1582 } else { 1583 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1584 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1592 { 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #include "petscblaslapack.h" 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatAXPY_MPIAIJ" 1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1604 { 1605 PetscErrorCode ierr; 1606 PetscInt i; 1607 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1608 PetscBLASInt bnz,one=1; 1609 Mat_SeqAIJ *x,*y; 1610 1611 PetscFunctionBegin; 1612 if (str == SAME_NONZERO_PATTERN) { 1613 PetscScalar alpha = a; 1614 x = (Mat_SeqAIJ *)xx->A->data; 1615 y = (Mat_SeqAIJ *)yy->A->data; 1616 bnz = (PetscBLASInt)x->nz; 1617 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1618 x = (Mat_SeqAIJ *)xx->B->data; 1619 y = (Mat_SeqAIJ *)yy->B->data; 1620 bnz = (PetscBLASInt)x->nz; 1621 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1622 } else if (str == SUBSET_NONZERO_PATTERN) { 1623 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1624 1625 x = (Mat_SeqAIJ *)xx->B->data; 1626 y = (Mat_SeqAIJ *)yy->B->data; 1627 if (y->xtoy && y->XtoY != xx->B) { 1628 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1629 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1630 } 1631 if (!y->xtoy) { /* get xtoy */ 1632 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1633 y->XtoY = xx->B; 1634 } 1635 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1636 } else { 1637 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1638 } 1639 PetscFunctionReturn(0); 1640 } 1641 1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatConjugate_MPIAIJ" 1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1647 { 1648 #if defined(PETSC_USE_COMPLEX) 1649 PetscErrorCode ierr; 1650 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1651 1652 PetscFunctionBegin; 1653 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1654 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1655 #else 1656 PetscFunctionBegin; 1657 #endif 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatRealPart_MPIAIJ" 1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1670 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1683 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #ifdef PETSC_HAVE_PBGL 1688 #include <boost/parallel/mpi/bsp_process_group.hpp> 1689 typedef boost::parallel::mpi::bsp_process_group process_group_type; 1690 1691 #include <boost/graph/distributed/adjacency_list.hpp> 1692 #include <boost/parallel/mpi/bsp_process_group.hpp> 1693 1694 #include <boost/graph/distributed/petsc/interface.hpp> 1695 #include <boost/graph/distributed/ilu_0.hpp> 1696 1697 namespace petsc = boost::distributed::petsc; 1698 using namespace std; 1699 typedef double value_type; 1700 typedef boost::graph::distributed::ilu_elimination_state elimination_state; 1701 typedef boost::adjacency_list<boost::listS, 1702 boost::distributedS<process_group_type, boost::vecS>, 1703 boost::bidirectionalS, 1704 // Vertex properties 1705 boost::no_property, 1706 // Edge properties 1707 boost::property<boost::edge_weight_t, value_type, 1708 boost::property<boost::edge_finished_t, elimination_state> > > graph_type; 1709 1710 typedef boost::graph_traits<graph_type>::vertex_descriptor vertex_type; 1711 typedef boost::graph_traits<graph_type>::edge_descriptor edge_type; 1712 typedef boost::property_map<graph_type, boost::edge_weight_t>::type weight_map_type; 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1716 /* 1717 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1718 */ 1719 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1720 { 1721 PetscTruth row_identity, col_identity; 1722 PetscObjectContainer c; 1723 PetscInt m, n, M, N; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1728 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1729 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1730 if (!row_identity || !col_identity) { 1731 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1732 } 1733 1734 process_group_type pg; 1735 graph_type* graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1736 graph_type& graph = *graph_p; 1737 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1738 1739 //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph)); 1740 boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph); 1741 BGL_FORALL_EDGES(e, graph, graph_type) 1742 put(finished, e, boost::graph::distributed::unseen); 1743 1744 ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph)); 1745 1746 /* put together the new matrix */ 1747 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1748 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1749 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1750 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1751 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1752 ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1754 (*fact)->factor = FACTOR_LU; 1755 1756 ierr = PetscObjectContainerCreate(A->comm, &c); 1757 ierr = PetscObjectContainerSetPointer(c, graph_p); 1758 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1764 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1765 { 1766 PetscFunctionBegin; 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatSolve_MPIAIJ" 1772 /* 1773 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1774 */ 1775 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1776 { 1777 graph_type* graph_p; 1778 PetscObjectContainer c; 1779 PetscErrorCode ierr; 1780 1781 PetscFunctionBegin; 1782 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1783 ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr); 1784 ierr = VecCopy(b, x); CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 #endif 1788 1789 /* -------------------------------------------------------------------*/ 1790 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1791 MatGetRow_MPIAIJ, 1792 MatRestoreRow_MPIAIJ, 1793 MatMult_MPIAIJ, 1794 /* 4*/ MatMultAdd_MPIAIJ, 1795 MatMultTranspose_MPIAIJ, 1796 MatMultTransposeAdd_MPIAIJ, 1797 #ifdef PETSC_HAVE_PBGL 1798 MatSolve_MPIAIJ, 1799 #else 1800 0, 1801 #endif 1802 0, 1803 0, 1804 /*10*/ 0, 1805 0, 1806 0, 1807 MatRelax_MPIAIJ, 1808 MatTranspose_MPIAIJ, 1809 /*15*/ MatGetInfo_MPIAIJ, 1810 MatEqual_MPIAIJ, 1811 MatGetDiagonal_MPIAIJ, 1812 MatDiagonalScale_MPIAIJ, 1813 MatNorm_MPIAIJ, 1814 /*20*/ MatAssemblyBegin_MPIAIJ, 1815 MatAssemblyEnd_MPIAIJ, 1816 0, 1817 MatSetOption_MPIAIJ, 1818 MatZeroEntries_MPIAIJ, 1819 /*25*/ MatZeroRows_MPIAIJ, 1820 0, 1821 #ifdef PETSC_HAVE_PBGL 1822 MatLUFactorNumeric_MPIAIJ, 1823 #else 1824 0, 1825 #endif 1826 0, 1827 0, 1828 /*30*/ MatSetUpPreallocation_MPIAIJ, 1829 #ifdef PETSC_HAVE_PBGL 1830 MatILUFactorSymbolic_MPIAIJ, 1831 #else 1832 0, 1833 #endif 1834 0, 1835 0, 1836 0, 1837 /*35*/ MatDuplicate_MPIAIJ, 1838 0, 1839 0, 1840 0, 1841 0, 1842 /*40*/ MatAXPY_MPIAIJ, 1843 MatGetSubMatrices_MPIAIJ, 1844 MatIncreaseOverlap_MPIAIJ, 1845 MatGetValues_MPIAIJ, 1846 MatCopy_MPIAIJ, 1847 /*45*/ 0, 1848 MatScale_MPIAIJ, 1849 0, 1850 0, 1851 0, 1852 /*50*/ MatSetBlockSize_MPIAIJ, 1853 0, 1854 0, 1855 0, 1856 0, 1857 /*55*/ MatFDColoringCreate_MPIAIJ, 1858 0, 1859 MatSetUnfactored_MPIAIJ, 1860 MatPermute_MPIAIJ, 1861 0, 1862 /*60*/ MatGetSubMatrix_MPIAIJ, 1863 MatDestroy_MPIAIJ, 1864 MatView_MPIAIJ, 1865 0, 1866 0, 1867 /*65*/ 0, 1868 0, 1869 0, 1870 0, 1871 0, 1872 /*70*/ 0, 1873 0, 1874 MatSetColoring_MPIAIJ, 1875 #if defined(PETSC_HAVE_ADIC) 1876 MatSetValuesAdic_MPIAIJ, 1877 #else 1878 0, 1879 #endif 1880 MatSetValuesAdifor_MPIAIJ, 1881 /*75*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*80*/ 0, 1887 0, 1888 0, 1889 0, 1890 /*84*/ MatLoad_MPIAIJ, 1891 0, 1892 0, 1893 0, 1894 0, 1895 0, 1896 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1897 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1898 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1899 MatPtAP_Basic, 1900 MatPtAPSymbolic_MPIAIJ, 1901 /*95*/ MatPtAPNumeric_MPIAIJ, 1902 0, 1903 0, 1904 0, 1905 0, 1906 /*100*/0, 1907 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1908 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1909 MatConjugate_MPIAIJ, 1910 0, 1911 /*105*/MatSetValuesRow_MPIAIJ, 1912 MatRealPart_MPIAIJ, 1913 MatImaginaryPart_MPIAIJ}; 1914 1915 /* ----------------------------------------------------------------------------------------*/ 1916 1917 EXTERN_C_BEGIN 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1920 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1921 { 1922 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1927 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 EXTERN_C_END 1931 1932 EXTERN_C_BEGIN 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1935 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1936 { 1937 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1942 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 EXTERN_C_END 1946 1947 #include "petscpc.h" 1948 EXTERN_C_BEGIN 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1951 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1952 { 1953 Mat_MPIAIJ *b; 1954 PetscErrorCode ierr; 1955 PetscInt i; 1956 1957 PetscFunctionBegin; 1958 B->preallocated = PETSC_TRUE; 1959 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1960 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1961 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1962 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1963 1964 B->rmap.bs = B->cmap.bs = 1; 1965 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1966 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1967 if (d_nnz) { 1968 for (i=0; i<B->rmap.n; i++) { 1969 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1970 } 1971 } 1972 if (o_nnz) { 1973 for (i=0; i<B->rmap.n; i++) { 1974 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1975 } 1976 } 1977 b = (Mat_MPIAIJ*)B->data; 1978 1979 /* Explicitly create 2 MATSEQAIJ matrices. */ 1980 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1981 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1982 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1983 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1984 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1985 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1986 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1987 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1988 1989 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1990 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1991 1992 PetscFunctionReturn(0); 1993 } 1994 EXTERN_C_END 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1998 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1999 { 2000 Mat mat; 2001 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 *newmat = 0; 2006 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2007 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2008 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2009 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2010 a = (Mat_MPIAIJ*)mat->data; 2011 2012 mat->factor = matin->factor; 2013 mat->rmap.bs = matin->rmap.bs; 2014 mat->assembled = PETSC_TRUE; 2015 mat->insertmode = NOT_SET_VALUES; 2016 mat->preallocated = PETSC_TRUE; 2017 2018 a->size = oldmat->size; 2019 a->rank = oldmat->rank; 2020 a->donotstash = oldmat->donotstash; 2021 a->roworiented = oldmat->roworiented; 2022 a->rowindices = 0; 2023 a->rowvalues = 0; 2024 a->getrowactive = PETSC_FALSE; 2025 2026 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2027 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2028 2029 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2030 if (oldmat->colmap) { 2031 #if defined (PETSC_USE_CTABLE) 2032 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2033 #else 2034 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2035 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2036 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2037 #endif 2038 } else a->colmap = 0; 2039 if (oldmat->garray) { 2040 PetscInt len; 2041 len = oldmat->B->cmap.n; 2042 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2043 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2044 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2045 } else a->garray = 0; 2046 2047 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2048 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2049 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2050 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2051 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2052 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2053 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2054 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2055 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2056 *newmat = mat; 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #include "petscsys.h" 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatLoad_MPIAIJ" 2064 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2065 { 2066 Mat A; 2067 PetscScalar *vals,*svals; 2068 MPI_Comm comm = ((PetscObject)viewer)->comm; 2069 MPI_Status status; 2070 PetscErrorCode ierr; 2071 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2072 PetscInt i,nz,j,rstart,rend,mmax; 2073 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2074 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2075 PetscInt cend,cstart,n,*rowners; 2076 int fd; 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2080 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2081 if (!rank) { 2082 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2083 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2084 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2085 } 2086 2087 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2088 M = header[1]; N = header[2]; 2089 /* determine ownership of all rows */ 2090 m = M/size + ((M % size) > rank); 2091 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2092 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2093 2094 /* First process needs enough room for process with most rows */ 2095 if (!rank) { 2096 mmax = rowners[1]; 2097 for (i=2; i<size; i++) { 2098 mmax = PetscMax(mmax,rowners[i]); 2099 } 2100 } else mmax = m; 2101 2102 rowners[0] = 0; 2103 for (i=2; i<=size; i++) { 2104 mmax = PetscMax(mmax,rowners[i]); 2105 rowners[i] += rowners[i-1]; 2106 } 2107 rstart = rowners[rank]; 2108 rend = rowners[rank+1]; 2109 2110 /* distribute row lengths to all processors */ 2111 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2112 if (!rank) { 2113 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2114 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2115 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2116 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2117 for (j=0; j<m; j++) { 2118 procsnz[0] += ourlens[j]; 2119 } 2120 for (i=1; i<size; i++) { 2121 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2122 /* calculate the number of nonzeros on each processor */ 2123 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2124 procsnz[i] += rowlengths[j]; 2125 } 2126 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2127 } 2128 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2129 } else { 2130 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2131 } 2132 2133 if (!rank) { 2134 /* determine max buffer needed and allocate it */ 2135 maxnz = 0; 2136 for (i=0; i<size; i++) { 2137 maxnz = PetscMax(maxnz,procsnz[i]); 2138 } 2139 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2140 2141 /* read in my part of the matrix column indices */ 2142 nz = procsnz[0]; 2143 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2144 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2145 2146 /* read in every one elses and ship off */ 2147 for (i=1; i<size; i++) { 2148 nz = procsnz[i]; 2149 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2150 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2151 } 2152 ierr = PetscFree(cols);CHKERRQ(ierr); 2153 } else { 2154 /* determine buffer space needed for message */ 2155 nz = 0; 2156 for (i=0; i<m; i++) { 2157 nz += ourlens[i]; 2158 } 2159 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2160 2161 /* receive message of column indices*/ 2162 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2163 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2164 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2165 } 2166 2167 /* determine column ownership if matrix is not square */ 2168 if (N != M) { 2169 n = N/size + ((N % size) > rank); 2170 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2171 cstart = cend - n; 2172 } else { 2173 cstart = rstart; 2174 cend = rend; 2175 n = cend - cstart; 2176 } 2177 2178 /* loop over local rows, determining number of off diagonal entries */ 2179 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2180 jj = 0; 2181 for (i=0; i<m; i++) { 2182 for (j=0; j<ourlens[i]; j++) { 2183 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2184 jj++; 2185 } 2186 } 2187 2188 /* create our matrix */ 2189 for (i=0; i<m; i++) { 2190 ourlens[i] -= offlens[i]; 2191 } 2192 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2193 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2194 ierr = MatSetType(A,type);CHKERRQ(ierr); 2195 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2196 2197 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2198 for (i=0; i<m; i++) { 2199 ourlens[i] += offlens[i]; 2200 } 2201 2202 if (!rank) { 2203 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2204 2205 /* read in my part of the matrix numerical values */ 2206 nz = procsnz[0]; 2207 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2208 2209 /* insert into matrix */ 2210 jj = rstart; 2211 smycols = mycols; 2212 svals = vals; 2213 for (i=0; i<m; i++) { 2214 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2215 smycols += ourlens[i]; 2216 svals += ourlens[i]; 2217 jj++; 2218 } 2219 2220 /* read in other processors and ship out */ 2221 for (i=1; i<size; i++) { 2222 nz = procsnz[i]; 2223 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2224 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2225 } 2226 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2227 } else { 2228 /* receive numeric values */ 2229 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2230 2231 /* receive message of values*/ 2232 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2233 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2234 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2235 2236 /* insert into matrix */ 2237 jj = rstart; 2238 smycols = mycols; 2239 svals = vals; 2240 for (i=0; i<m; i++) { 2241 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2242 smycols += ourlens[i]; 2243 svals += ourlens[i]; 2244 jj++; 2245 } 2246 } 2247 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2248 ierr = PetscFree(vals);CHKERRQ(ierr); 2249 ierr = PetscFree(mycols);CHKERRQ(ierr); 2250 ierr = PetscFree(rowners);CHKERRQ(ierr); 2251 2252 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 *newmat = A; 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2260 /* 2261 Not great since it makes two copies of the submatrix, first an SeqAIJ 2262 in local and then by concatenating the local matrices the end result. 2263 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2264 */ 2265 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2266 { 2267 PetscErrorCode ierr; 2268 PetscMPIInt rank,size; 2269 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2270 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2271 Mat *local,M,Mreuse; 2272 PetscScalar *vwork,*aa; 2273 MPI_Comm comm = mat->comm; 2274 Mat_SeqAIJ *aij; 2275 2276 2277 PetscFunctionBegin; 2278 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2279 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2280 2281 if (call == MAT_REUSE_MATRIX) { 2282 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2283 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2284 local = &Mreuse; 2285 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2286 } else { 2287 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2288 Mreuse = *local; 2289 ierr = PetscFree(local);CHKERRQ(ierr); 2290 } 2291 2292 /* 2293 m - number of local rows 2294 n - number of columns (same on all processors) 2295 rstart - first row in new global matrix generated 2296 */ 2297 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2298 if (call == MAT_INITIAL_MATRIX) { 2299 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2300 ii = aij->i; 2301 jj = aij->j; 2302 2303 /* 2304 Determine the number of non-zeros in the diagonal and off-diagonal 2305 portions of the matrix in order to do correct preallocation 2306 */ 2307 2308 /* first get start and end of "diagonal" columns */ 2309 if (csize == PETSC_DECIDE) { 2310 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2311 if (mglobal == n) { /* square matrix */ 2312 nlocal = m; 2313 } else { 2314 nlocal = n/size + ((n % size) > rank); 2315 } 2316 } else { 2317 nlocal = csize; 2318 } 2319 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2320 rstart = rend - nlocal; 2321 if (rank == size - 1 && rend != n) { 2322 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2323 } 2324 2325 /* next, compute all the lengths */ 2326 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2327 olens = dlens + m; 2328 for (i=0; i<m; i++) { 2329 jend = ii[i+1] - ii[i]; 2330 olen = 0; 2331 dlen = 0; 2332 for (j=0; j<jend; j++) { 2333 if (*jj < rstart || *jj >= rend) olen++; 2334 else dlen++; 2335 jj++; 2336 } 2337 olens[i] = olen; 2338 dlens[i] = dlen; 2339 } 2340 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2341 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2342 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2344 ierr = PetscFree(dlens);CHKERRQ(ierr); 2345 } else { 2346 PetscInt ml,nl; 2347 2348 M = *newmat; 2349 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2350 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2351 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2352 /* 2353 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2354 rather than the slower MatSetValues(). 2355 */ 2356 M->was_assembled = PETSC_TRUE; 2357 M->assembled = PETSC_FALSE; 2358 } 2359 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2360 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2361 ii = aij->i; 2362 jj = aij->j; 2363 aa = aij->a; 2364 for (i=0; i<m; i++) { 2365 row = rstart + i; 2366 nz = ii[i+1] - ii[i]; 2367 cwork = jj; jj += nz; 2368 vwork = aa; aa += nz; 2369 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2370 } 2371 2372 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2373 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2374 *newmat = M; 2375 2376 /* save submatrix used in processor for next request */ 2377 if (call == MAT_INITIAL_MATRIX) { 2378 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2379 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2380 } 2381 2382 PetscFunctionReturn(0); 2383 } 2384 2385 EXTERN_C_BEGIN 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2388 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2389 { 2390 PetscInt m,cstart, cend,j,nnz,i,d; 2391 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2392 const PetscInt *JJ; 2393 PetscScalar *values; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2398 2399 B->rmap.bs = B->cmap.bs = 1; 2400 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2401 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2402 m = B->rmap.n; 2403 cstart = B->cmap.rstart; 2404 cend = B->cmap.rend; 2405 rstart = B->rmap.rstart; 2406 2407 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2408 o_nnz = d_nnz + m; 2409 2410 for (i=0; i<m; i++) { 2411 nnz = Ii[i+1]- Ii[i]; 2412 JJ = J + Ii[i]; 2413 nnz_max = PetscMax(nnz_max,nnz); 2414 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2415 for (j=0; j<nnz; j++) { 2416 if (*JJ >= cstart) break; 2417 JJ++; 2418 } 2419 d = 0; 2420 for (; j<nnz; j++) { 2421 if (*JJ++ >= cend) break; 2422 d++; 2423 } 2424 d_nnz[i] = d; 2425 o_nnz[i] = nnz - d; 2426 } 2427 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2428 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2429 2430 if (v) values = (PetscScalar*)v; 2431 else { 2432 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2433 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2434 } 2435 2436 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2437 for (i=0; i<m; i++) { 2438 ii = i + rstart; 2439 nnz = Ii[i+1]- Ii[i]; 2440 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2441 } 2442 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2444 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2445 2446 if (!v) { 2447 ierr = PetscFree(values);CHKERRQ(ierr); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 EXTERN_C_END 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2455 /*@ 2456 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2457 (the default parallel PETSc format). 2458 2459 Collective on MPI_Comm 2460 2461 Input Parameters: 2462 + B - the matrix 2463 . i - the indices into j for the start of each local row (starts with zero) 2464 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2465 - v - optional values in the matrix 2466 2467 Level: developer 2468 2469 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2470 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2471 2472 .keywords: matrix, aij, compressed row, sparse, parallel 2473 2474 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2475 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 2476 @*/ 2477 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2478 { 2479 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2480 2481 PetscFunctionBegin; 2482 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2483 if (f) { 2484 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2485 } 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2491 /*@C 2492 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2493 (the default parallel PETSc format). For good matrix assembly performance 2494 the user should preallocate the matrix storage by setting the parameters 2495 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2496 performance can be increased by more than a factor of 50. 2497 2498 Collective on MPI_Comm 2499 2500 Input Parameters: 2501 + A - the matrix 2502 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2503 (same value is used for all local rows) 2504 . d_nnz - array containing the number of nonzeros in the various rows of the 2505 DIAGONAL portion of the local submatrix (possibly different for each row) 2506 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2507 The size of this array is equal to the number of local rows, i.e 'm'. 2508 You must leave room for the diagonal entry even if it is zero. 2509 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2510 submatrix (same value is used for all local rows). 2511 - o_nnz - array containing the number of nonzeros in the various rows of the 2512 OFF-DIAGONAL portion of the local submatrix (possibly different for 2513 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2514 structure. The size of this array is equal to the number 2515 of local rows, i.e 'm'. 2516 2517 If the *_nnz parameter is given then the *_nz parameter is ignored 2518 2519 The AIJ format (also called the Yale sparse matrix format or 2520 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2521 storage. The stored row and column indices begin with zero. See the users manual for details. 2522 2523 The parallel matrix is partitioned such that the first m0 rows belong to 2524 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2525 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2526 2527 The DIAGONAL portion of the local submatrix of a processor can be defined 2528 as the submatrix which is obtained by extraction the part corresponding 2529 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2530 first row that belongs to the processor, and r2 is the last row belonging 2531 to the this processor. This is a square mxm matrix. The remaining portion 2532 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2533 2534 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2535 2536 Example usage: 2537 2538 Consider the following 8x8 matrix with 34 non-zero values, that is 2539 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2540 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2541 as follows: 2542 2543 .vb 2544 1 2 0 | 0 3 0 | 0 4 2545 Proc0 0 5 6 | 7 0 0 | 8 0 2546 9 0 10 | 11 0 0 | 12 0 2547 ------------------------------------- 2548 13 0 14 | 15 16 17 | 0 0 2549 Proc1 0 18 0 | 19 20 21 | 0 0 2550 0 0 0 | 22 23 0 | 24 0 2551 ------------------------------------- 2552 Proc2 25 26 27 | 0 0 28 | 29 0 2553 30 0 0 | 31 32 33 | 0 34 2554 .ve 2555 2556 This can be represented as a collection of submatrices as: 2557 2558 .vb 2559 A B C 2560 D E F 2561 G H I 2562 .ve 2563 2564 Where the submatrices A,B,C are owned by proc0, D,E,F are 2565 owned by proc1, G,H,I are owned by proc2. 2566 2567 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2568 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2569 The 'M','N' parameters are 8,8, and have the same values on all procs. 2570 2571 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2572 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2573 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2574 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2575 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2576 matrix, ans [DF] as another SeqAIJ matrix. 2577 2578 When d_nz, o_nz parameters are specified, d_nz storage elements are 2579 allocated for every row of the local diagonal submatrix, and o_nz 2580 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2581 One way to choose d_nz and o_nz is to use the max nonzerors per local 2582 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2583 In this case, the values of d_nz,o_nz are: 2584 .vb 2585 proc0 : dnz = 2, o_nz = 2 2586 proc1 : dnz = 3, o_nz = 2 2587 proc2 : dnz = 1, o_nz = 4 2588 .ve 2589 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2590 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2591 for proc3. i.e we are using 12+15+10=37 storage locations to store 2592 34 values. 2593 2594 When d_nnz, o_nnz parameters are specified, the storage is specified 2595 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2596 In the above case the values for d_nnz,o_nnz are: 2597 .vb 2598 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2599 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2600 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2601 .ve 2602 Here the space allocated is sum of all the above values i.e 34, and 2603 hence pre-allocation is perfect. 2604 2605 Level: intermediate 2606 2607 .keywords: matrix, aij, compressed row, sparse, parallel 2608 2609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2610 MPIAIJ 2611 @*/ 2612 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2613 { 2614 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2615 2616 PetscFunctionBegin; 2617 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2618 if (f) { 2619 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2620 } 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2626 /*@C 2627 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2628 CSR format the local rows. 2629 2630 Collective on MPI_Comm 2631 2632 Input Parameters: 2633 + comm - MPI communicator 2634 . m - number of local rows (Cannot be PETSC_DECIDE) 2635 . n - This value should be the same as the local size used in creating the 2636 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2637 calculated if N is given) For square matrices n is almost always m. 2638 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2639 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2640 . i - row indices 2641 . j - column indices 2642 - a - matrix values 2643 2644 Output Parameter: 2645 . mat - the matrix 2646 2647 Level: intermediate 2648 2649 Notes: 2650 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2651 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2652 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2653 2654 The i and j indices are 0 based 2655 2656 .keywords: matrix, aij, compressed row, sparse, parallel 2657 2658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2659 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2660 @*/ 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2662 { 2663 PetscErrorCode ierr; 2664 2665 PetscFunctionBegin; 2666 if (i[0]) { 2667 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2668 } 2669 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2670 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2671 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2672 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatCreateMPIAIJ" 2678 /*@C 2679 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2680 (the default parallel PETSc format). For good matrix assembly performance 2681 the user should preallocate the matrix storage by setting the parameters 2682 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2683 performance can be increased by more than a factor of 50. 2684 2685 Collective on MPI_Comm 2686 2687 Input Parameters: 2688 + comm - MPI communicator 2689 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2690 This value should be the same as the local size used in creating the 2691 y vector for the matrix-vector product y = Ax. 2692 . n - This value should be the same as the local size used in creating the 2693 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2694 calculated if N is given) For square matrices n is almost always m. 2695 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2696 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2697 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2698 (same value is used for all local rows) 2699 . d_nnz - array containing the number of nonzeros in the various rows of the 2700 DIAGONAL portion of the local submatrix (possibly different for each row) 2701 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2702 The size of this array is equal to the number of local rows, i.e 'm'. 2703 You must leave room for the diagonal entry even if it is zero. 2704 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2705 submatrix (same value is used for all local rows). 2706 - o_nnz - array containing the number of nonzeros in the various rows of the 2707 OFF-DIAGONAL portion of the local submatrix (possibly different for 2708 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2709 structure. The size of this array is equal to the number 2710 of local rows, i.e 'm'. 2711 2712 Output Parameter: 2713 . A - the matrix 2714 2715 Notes: 2716 If the *_nnz parameter is given then the *_nz parameter is ignored 2717 2718 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2719 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2720 storage requirements for this matrix. 2721 2722 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2723 processor than it must be used on all processors that share the object for 2724 that argument. 2725 2726 The user MUST specify either the local or global matrix dimensions 2727 (possibly both). 2728 2729 The parallel matrix is partitioned such that the first m0 rows belong to 2730 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2731 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2732 2733 The DIAGONAL portion of the local submatrix of a processor can be defined 2734 as the submatrix which is obtained by extraction the part corresponding 2735 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2736 first row that belongs to the processor, and r2 is the last row belonging 2737 to the this processor. This is a square mxm matrix. The remaining portion 2738 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2739 2740 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2741 2742 When calling this routine with a single process communicator, a matrix of 2743 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2744 type of communicator, use the construction mechanism: 2745 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2746 2747 By default, this format uses inodes (identical nodes) when possible. 2748 We search for consecutive rows with the same nonzero structure, thereby 2749 reusing matrix information to achieve increased efficiency. 2750 2751 Options Database Keys: 2752 + -mat_no_inode - Do not use inodes 2753 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2754 - -mat_aij_oneindex - Internally use indexing starting at 1 2755 rather than 0. Note that when calling MatSetValues(), 2756 the user still MUST index entries starting at 0! 2757 2758 2759 Example usage: 2760 2761 Consider the following 8x8 matrix with 34 non-zero values, that is 2762 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2763 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2764 as follows: 2765 2766 .vb 2767 1 2 0 | 0 3 0 | 0 4 2768 Proc0 0 5 6 | 7 0 0 | 8 0 2769 9 0 10 | 11 0 0 | 12 0 2770 ------------------------------------- 2771 13 0 14 | 15 16 17 | 0 0 2772 Proc1 0 18 0 | 19 20 21 | 0 0 2773 0 0 0 | 22 23 0 | 24 0 2774 ------------------------------------- 2775 Proc2 25 26 27 | 0 0 28 | 29 0 2776 30 0 0 | 31 32 33 | 0 34 2777 .ve 2778 2779 This can be represented as a collection of submatrices as: 2780 2781 .vb 2782 A B C 2783 D E F 2784 G H I 2785 .ve 2786 2787 Where the submatrices A,B,C are owned by proc0, D,E,F are 2788 owned by proc1, G,H,I are owned by proc2. 2789 2790 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2791 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2792 The 'M','N' parameters are 8,8, and have the same values on all procs. 2793 2794 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2795 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2796 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2797 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2798 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2799 matrix, ans [DF] as another SeqAIJ matrix. 2800 2801 When d_nz, o_nz parameters are specified, d_nz storage elements are 2802 allocated for every row of the local diagonal submatrix, and o_nz 2803 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2804 One way to choose d_nz and o_nz is to use the max nonzerors per local 2805 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2806 In this case, the values of d_nz,o_nz are: 2807 .vb 2808 proc0 : dnz = 2, o_nz = 2 2809 proc1 : dnz = 3, o_nz = 2 2810 proc2 : dnz = 1, o_nz = 4 2811 .ve 2812 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2813 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2814 for proc3. i.e we are using 12+15+10=37 storage locations to store 2815 34 values. 2816 2817 When d_nnz, o_nnz parameters are specified, the storage is specified 2818 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2819 In the above case the values for d_nnz,o_nnz are: 2820 .vb 2821 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2822 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2823 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2824 .ve 2825 Here the space allocated is sum of all the above values i.e 34, and 2826 hence pre-allocation is perfect. 2827 2828 Level: intermediate 2829 2830 .keywords: matrix, aij, compressed row, sparse, parallel 2831 2832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2833 MPIAIJ, MatCreateMPIAIJWithArrays() 2834 @*/ 2835 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2836 { 2837 PetscErrorCode ierr; 2838 PetscMPIInt size; 2839 2840 PetscFunctionBegin; 2841 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2842 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2843 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2844 if (size > 1) { 2845 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2846 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2847 } else { 2848 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2849 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2856 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2857 { 2858 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2859 2860 PetscFunctionBegin; 2861 *Ad = a->A; 2862 *Ao = a->B; 2863 *colmap = a->garray; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2869 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2870 { 2871 PetscErrorCode ierr; 2872 PetscInt i; 2873 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2874 2875 PetscFunctionBegin; 2876 if (coloring->ctype == IS_COLORING_GLOBAL) { 2877 ISColoringValue *allcolors,*colors; 2878 ISColoring ocoloring; 2879 2880 /* set coloring for diagonal portion */ 2881 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2882 2883 /* set coloring for off-diagonal portion */ 2884 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2885 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2886 for (i=0; i<a->B->cmap.n; i++) { 2887 colors[i] = allcolors[a->garray[i]]; 2888 } 2889 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2890 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2891 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2892 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2893 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2894 ISColoringValue *colors; 2895 PetscInt *larray; 2896 ISColoring ocoloring; 2897 2898 /* set coloring for diagonal portion */ 2899 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2900 for (i=0; i<a->A->cmap.n; i++) { 2901 larray[i] = i + A->cmap.rstart; 2902 } 2903 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2904 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2905 for (i=0; i<a->A->cmap.n; i++) { 2906 colors[i] = coloring->colors[larray[i]]; 2907 } 2908 ierr = PetscFree(larray);CHKERRQ(ierr); 2909 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2910 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2911 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2912 2913 /* set coloring for off-diagonal portion */ 2914 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2915 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2916 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2917 for (i=0; i<a->B->cmap.n; i++) { 2918 colors[i] = coloring->colors[larray[i]]; 2919 } 2920 ierr = PetscFree(larray);CHKERRQ(ierr); 2921 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2922 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2923 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2924 } else { 2925 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2926 } 2927 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #if defined(PETSC_HAVE_ADIC) 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2934 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2935 { 2936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2941 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2942 PetscFunctionReturn(0); 2943 } 2944 #endif 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2948 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2949 { 2950 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2955 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatMerge" 2961 /*@C 2962 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2963 matrices from each processor 2964 2965 Collective on MPI_Comm 2966 2967 Input Parameters: 2968 + comm - the communicators the parallel matrix will live on 2969 . inmat - the input sequential matrices 2970 . n - number of local columns (or PETSC_DECIDE) 2971 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2972 2973 Output Parameter: 2974 . outmat - the parallel matrix generated 2975 2976 Level: advanced 2977 2978 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2979 2980 @*/ 2981 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2982 { 2983 PetscErrorCode ierr; 2984 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2985 PetscInt *indx; 2986 PetscScalar *values; 2987 2988 PetscFunctionBegin; 2989 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2990 if (scall == MAT_INITIAL_MATRIX){ 2991 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2992 if (n == PETSC_DECIDE){ 2993 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2994 } 2995 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2996 rstart -= m; 2997 2998 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2999 for (i=0;i<m;i++) { 3000 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3001 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3002 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3003 } 3004 /* This routine will ONLY return MPIAIJ type matrix */ 3005 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3006 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3007 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3008 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3009 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3010 3011 } else if (scall == MAT_REUSE_MATRIX){ 3012 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3013 } else { 3014 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3015 } 3016 3017 for (i=0;i<m;i++) { 3018 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3019 Ii = i + rstart; 3020 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3021 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3022 } 3023 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3024 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3026 3027 PetscFunctionReturn(0); 3028 } 3029 3030 #undef __FUNCT__ 3031 #define __FUNCT__ "MatFileSplit" 3032 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3033 { 3034 PetscErrorCode ierr; 3035 PetscMPIInt rank; 3036 PetscInt m,N,i,rstart,nnz; 3037 size_t len; 3038 const PetscInt *indx; 3039 PetscViewer out; 3040 char *name; 3041 Mat B; 3042 const PetscScalar *values; 3043 3044 PetscFunctionBegin; 3045 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3046 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3047 /* Should this be the type of the diagonal block of A? */ 3048 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3049 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3050 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3051 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3052 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3053 for (i=0;i<m;i++) { 3054 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3055 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3056 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3057 } 3058 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 3061 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3062 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3063 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3064 sprintf(name,"%s.%d",outfile,rank); 3065 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3066 ierr = PetscFree(name); 3067 ierr = MatView(B,out);CHKERRQ(ierr); 3068 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3069 ierr = MatDestroy(B);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3076 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3077 { 3078 PetscErrorCode ierr; 3079 Mat_Merge_SeqsToMPI *merge; 3080 PetscObjectContainer container; 3081 3082 PetscFunctionBegin; 3083 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3084 if (container) { 3085 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3086 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3087 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3088 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3089 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3090 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3091 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3092 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3093 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3094 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3095 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3096 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3097 3098 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 3099 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3100 } 3101 ierr = PetscFree(merge);CHKERRQ(ierr); 3102 3103 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3104 PetscFunctionReturn(0); 3105 } 3106 3107 #include "src/mat/utils/freespace.h" 3108 #include "petscbt.h" 3109 static PetscEvent logkey_seqstompinum = 0; 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3112 /*@C 3113 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3114 matrices from each processor 3115 3116 Collective on MPI_Comm 3117 3118 Input Parameters: 3119 + comm - the communicators the parallel matrix will live on 3120 . seqmat - the input sequential matrices 3121 . m - number of local rows (or PETSC_DECIDE) 3122 . n - number of local columns (or PETSC_DECIDE) 3123 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3124 3125 Output Parameter: 3126 . mpimat - the parallel matrix generated 3127 3128 Level: advanced 3129 3130 Notes: 3131 The dimensions of the sequential matrix in each processor MUST be the same. 3132 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3133 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3134 @*/ 3135 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3136 { 3137 PetscErrorCode ierr; 3138 MPI_Comm comm=mpimat->comm; 3139 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3140 PetscMPIInt size,rank,taga,*len_s; 3141 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3142 PetscInt proc,m; 3143 PetscInt **buf_ri,**buf_rj; 3144 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3145 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3146 MPI_Request *s_waits,*r_waits; 3147 MPI_Status *status; 3148 MatScalar *aa=a->a,**abuf_r,*ba_i; 3149 Mat_Merge_SeqsToMPI *merge; 3150 PetscObjectContainer container; 3151 3152 PetscFunctionBegin; 3153 if (!logkey_seqstompinum) { 3154 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3155 } 3156 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3157 3158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3160 3161 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3162 if (container) { 3163 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3164 } 3165 bi = merge->bi; 3166 bj = merge->bj; 3167 buf_ri = merge->buf_ri; 3168 buf_rj = merge->buf_rj; 3169 3170 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3171 owners = merge->rowmap.range; 3172 len_s = merge->len_s; 3173 3174 /* send and recv matrix values */ 3175 /*-----------------------------*/ 3176 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3177 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3178 3179 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3180 for (proc=0,k=0; proc<size; proc++){ 3181 if (!len_s[proc]) continue; 3182 i = owners[proc]; 3183 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3184 k++; 3185 } 3186 3187 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3188 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3189 ierr = PetscFree(status);CHKERRQ(ierr); 3190 3191 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3192 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3193 3194 /* insert mat values of mpimat */ 3195 /*----------------------------*/ 3196 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3197 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3198 nextrow = buf_ri_k + merge->nrecv; 3199 nextai = nextrow + merge->nrecv; 3200 3201 for (k=0; k<merge->nrecv; k++){ 3202 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3203 nrows = *(buf_ri_k[k]); 3204 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3205 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3206 } 3207 3208 /* set values of ba */ 3209 m = merge->rowmap.n; 3210 for (i=0; i<m; i++) { 3211 arow = owners[rank] + i; 3212 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3213 bnzi = bi[i+1] - bi[i]; 3214 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3215 3216 /* add local non-zero vals of this proc's seqmat into ba */ 3217 anzi = ai[arow+1] - ai[arow]; 3218 aj = a->j + ai[arow]; 3219 aa = a->a + ai[arow]; 3220 nextaj = 0; 3221 for (j=0; nextaj<anzi; j++){ 3222 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3223 ba_i[j] += aa[nextaj++]; 3224 } 3225 } 3226 3227 /* add received vals into ba */ 3228 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3229 /* i-th row */ 3230 if (i == *nextrow[k]) { 3231 anzi = *(nextai[k]+1) - *nextai[k]; 3232 aj = buf_rj[k] + *(nextai[k]); 3233 aa = abuf_r[k] + *(nextai[k]); 3234 nextaj = 0; 3235 for (j=0; nextaj<anzi; j++){ 3236 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3237 ba_i[j] += aa[nextaj++]; 3238 } 3239 } 3240 nextrow[k]++; nextai[k]++; 3241 } 3242 } 3243 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3244 } 3245 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3247 3248 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3249 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3250 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3251 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3252 PetscFunctionReturn(0); 3253 } 3254 3255 static PetscEvent logkey_seqstompisym = 0; 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3259 { 3260 PetscErrorCode ierr; 3261 Mat B_mpi; 3262 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3263 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3264 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3265 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3266 PetscInt len,proc,*dnz,*onz; 3267 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3268 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3269 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3270 MPI_Status *status; 3271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3272 PetscBT lnkbt; 3273 Mat_Merge_SeqsToMPI *merge; 3274 PetscObjectContainer container; 3275 3276 PetscFunctionBegin; 3277 if (!logkey_seqstompisym) { 3278 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3279 } 3280 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3281 3282 /* make sure it is a PETSc comm */ 3283 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3284 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3285 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3286 3287 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3288 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3289 3290 /* determine row ownership */ 3291 /*---------------------------------------------------------*/ 3292 merge->rowmap.n = m; 3293 merge->rowmap.N = M; 3294 merge->rowmap.bs = 1; 3295 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3296 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3297 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3298 3299 m = merge->rowmap.n; 3300 M = merge->rowmap.N; 3301 owners = merge->rowmap.range; 3302 3303 /* determine the number of messages to send, their lengths */ 3304 /*---------------------------------------------------------*/ 3305 len_s = merge->len_s; 3306 3307 len = 0; /* length of buf_si[] */ 3308 merge->nsend = 0; 3309 for (proc=0; proc<size; proc++){ 3310 len_si[proc] = 0; 3311 if (proc == rank){ 3312 len_s[proc] = 0; 3313 } else { 3314 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3315 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3316 } 3317 if (len_s[proc]) { 3318 merge->nsend++; 3319 nrows = 0; 3320 for (i=owners[proc]; i<owners[proc+1]; i++){ 3321 if (ai[i+1] > ai[i]) nrows++; 3322 } 3323 len_si[proc] = 2*(nrows+1); 3324 len += len_si[proc]; 3325 } 3326 } 3327 3328 /* determine the number and length of messages to receive for ij-structure */ 3329 /*-------------------------------------------------------------------------*/ 3330 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3331 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3332 3333 /* post the Irecv of j-structure */ 3334 /*-------------------------------*/ 3335 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3336 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3337 3338 /* post the Isend of j-structure */ 3339 /*--------------------------------*/ 3340 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3341 sj_waits = si_waits + merge->nsend; 3342 3343 for (proc=0, k=0; proc<size; proc++){ 3344 if (!len_s[proc]) continue; 3345 i = owners[proc]; 3346 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3347 k++; 3348 } 3349 3350 /* receives and sends of j-structure are complete */ 3351 /*------------------------------------------------*/ 3352 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3353 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3354 3355 /* send and recv i-structure */ 3356 /*---------------------------*/ 3357 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3358 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3359 3360 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3361 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3362 for (proc=0,k=0; proc<size; proc++){ 3363 if (!len_s[proc]) continue; 3364 /* form outgoing message for i-structure: 3365 buf_si[0]: nrows to be sent 3366 [1:nrows]: row index (global) 3367 [nrows+1:2*nrows+1]: i-structure index 3368 */ 3369 /*-------------------------------------------*/ 3370 nrows = len_si[proc]/2 - 1; 3371 buf_si_i = buf_si + nrows+1; 3372 buf_si[0] = nrows; 3373 buf_si_i[0] = 0; 3374 nrows = 0; 3375 for (i=owners[proc]; i<owners[proc+1]; i++){ 3376 anzi = ai[i+1] - ai[i]; 3377 if (anzi) { 3378 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3379 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3380 nrows++; 3381 } 3382 } 3383 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3384 k++; 3385 buf_si += len_si[proc]; 3386 } 3387 3388 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3389 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3390 3391 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3392 for (i=0; i<merge->nrecv; i++){ 3393 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3394 } 3395 3396 ierr = PetscFree(len_si);CHKERRQ(ierr); 3397 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3398 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3399 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3400 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3401 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3402 ierr = PetscFree(status);CHKERRQ(ierr); 3403 3404 /* compute a local seq matrix in each processor */ 3405 /*----------------------------------------------*/ 3406 /* allocate bi array and free space for accumulating nonzero column info */ 3407 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3408 bi[0] = 0; 3409 3410 /* create and initialize a linked list */ 3411 nlnk = N+1; 3412 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3413 3414 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3415 len = 0; 3416 len = ai[owners[rank+1]] - ai[owners[rank]]; 3417 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3418 current_space = free_space; 3419 3420 /* determine symbolic info for each local row */ 3421 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3422 nextrow = buf_ri_k + merge->nrecv; 3423 nextai = nextrow + merge->nrecv; 3424 for (k=0; k<merge->nrecv; k++){ 3425 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3426 nrows = *buf_ri_k[k]; 3427 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3428 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3429 } 3430 3431 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3432 len = 0; 3433 for (i=0;i<m;i++) { 3434 bnzi = 0; 3435 /* add local non-zero cols of this proc's seqmat into lnk */ 3436 arow = owners[rank] + i; 3437 anzi = ai[arow+1] - ai[arow]; 3438 aj = a->j + ai[arow]; 3439 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3440 bnzi += nlnk; 3441 /* add received col data into lnk */ 3442 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3443 if (i == *nextrow[k]) { /* i-th row */ 3444 anzi = *(nextai[k]+1) - *nextai[k]; 3445 aj = buf_rj[k] + *nextai[k]; 3446 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3447 bnzi += nlnk; 3448 nextrow[k]++; nextai[k]++; 3449 } 3450 } 3451 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3452 3453 /* if free space is not available, make more free space */ 3454 if (current_space->local_remaining<bnzi) { 3455 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3456 nspacedouble++; 3457 } 3458 /* copy data into free space, then initialize lnk */ 3459 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3460 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3461 3462 current_space->array += bnzi; 3463 current_space->local_used += bnzi; 3464 current_space->local_remaining -= bnzi; 3465 3466 bi[i+1] = bi[i] + bnzi; 3467 } 3468 3469 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3470 3471 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3472 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3473 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3474 3475 /* create symbolic parallel matrix B_mpi */ 3476 /*---------------------------------------*/ 3477 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3478 if (n==PETSC_DECIDE) { 3479 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3480 } else { 3481 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3482 } 3483 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3484 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3485 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3486 3487 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3488 B_mpi->assembled = PETSC_FALSE; 3489 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3490 merge->bi = bi; 3491 merge->bj = bj; 3492 merge->buf_ri = buf_ri; 3493 merge->buf_rj = buf_rj; 3494 merge->coi = PETSC_NULL; 3495 merge->coj = PETSC_NULL; 3496 merge->owners_co = PETSC_NULL; 3497 3498 /* attach the supporting struct to B_mpi for reuse */ 3499 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3500 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3501 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3502 *mpimat = B_mpi; 3503 3504 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3505 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } 3508 3509 static PetscEvent logkey_seqstompi = 0; 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatMerge_SeqsToMPI" 3512 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3513 { 3514 PetscErrorCode ierr; 3515 3516 PetscFunctionBegin; 3517 if (!logkey_seqstompi) { 3518 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3519 } 3520 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3521 if (scall == MAT_INITIAL_MATRIX){ 3522 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3523 } 3524 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3525 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 static PetscEvent logkey_getlocalmat = 0; 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatGetLocalMat" 3531 /*@C 3532 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3533 3534 Not Collective 3535 3536 Input Parameters: 3537 + A - the matrix 3538 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3539 3540 Output Parameter: 3541 . A_loc - the local sequential matrix generated 3542 3543 Level: developer 3544 3545 @*/ 3546 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3547 { 3548 PetscErrorCode ierr; 3549 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3550 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3551 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3552 PetscScalar *aa=a->a,*ba=b->a,*ca; 3553 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3554 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3555 3556 PetscFunctionBegin; 3557 if (!logkey_getlocalmat) { 3558 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3559 } 3560 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3561 if (scall == MAT_INITIAL_MATRIX){ 3562 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3563 ci[0] = 0; 3564 for (i=0; i<am; i++){ 3565 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3566 } 3567 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3568 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3569 k = 0; 3570 for (i=0; i<am; i++) { 3571 ncols_o = bi[i+1] - bi[i]; 3572 ncols_d = ai[i+1] - ai[i]; 3573 /* off-diagonal portion of A */ 3574 for (jo=0; jo<ncols_o; jo++) { 3575 col = cmap[*bj]; 3576 if (col >= cstart) break; 3577 cj[k] = col; bj++; 3578 ca[k++] = *ba++; 3579 } 3580 /* diagonal portion of A */ 3581 for (j=0; j<ncols_d; j++) { 3582 cj[k] = cstart + *aj++; 3583 ca[k++] = *aa++; 3584 } 3585 /* off-diagonal portion of A */ 3586 for (j=jo; j<ncols_o; j++) { 3587 cj[k] = cmap[*bj++]; 3588 ca[k++] = *ba++; 3589 } 3590 } 3591 /* put together the new matrix */ 3592 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3593 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3594 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3595 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3596 mat->free_a = PETSC_TRUE; 3597 mat->free_ij = PETSC_TRUE; 3598 mat->nonew = 0; 3599 } else if (scall == MAT_REUSE_MATRIX){ 3600 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3601 ci = mat->i; cj = mat->j; ca = mat->a; 3602 for (i=0; i<am; i++) { 3603 /* off-diagonal portion of A */ 3604 ncols_o = bi[i+1] - bi[i]; 3605 for (jo=0; jo<ncols_o; jo++) { 3606 col = cmap[*bj]; 3607 if (col >= cstart) break; 3608 *ca++ = *ba++; bj++; 3609 } 3610 /* diagonal portion of A */ 3611 ncols_d = ai[i+1] - ai[i]; 3612 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3613 /* off-diagonal portion of A */ 3614 for (j=jo; j<ncols_o; j++) { 3615 *ca++ = *ba++; bj++; 3616 } 3617 } 3618 } else { 3619 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3620 } 3621 3622 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3623 PetscFunctionReturn(0); 3624 } 3625 3626 static PetscEvent logkey_getlocalmatcondensed = 0; 3627 #undef __FUNCT__ 3628 #define __FUNCT__ "MatGetLocalMatCondensed" 3629 /*@C 3630 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3631 3632 Not Collective 3633 3634 Input Parameters: 3635 + A - the matrix 3636 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3637 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3638 3639 Output Parameter: 3640 . A_loc - the local sequential matrix generated 3641 3642 Level: developer 3643 3644 @*/ 3645 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3646 { 3647 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3648 PetscErrorCode ierr; 3649 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3650 IS isrowa,iscola; 3651 Mat *aloc; 3652 3653 PetscFunctionBegin; 3654 if (!logkey_getlocalmatcondensed) { 3655 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3656 } 3657 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3658 if (!row){ 3659 start = A->rmap.rstart; end = A->rmap.rend; 3660 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3661 } else { 3662 isrowa = *row; 3663 } 3664 if (!col){ 3665 start = A->cmap.rstart; 3666 cmap = a->garray; 3667 nzA = a->A->cmap.n; 3668 nzB = a->B->cmap.n; 3669 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3670 ncols = 0; 3671 for (i=0; i<nzB; i++) { 3672 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3673 else break; 3674 } 3675 imark = i; 3676 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3677 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3678 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3679 ierr = PetscFree(idx);CHKERRQ(ierr); 3680 } else { 3681 iscola = *col; 3682 } 3683 if (scall != MAT_INITIAL_MATRIX){ 3684 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3685 aloc[0] = *A_loc; 3686 } 3687 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3688 *A_loc = aloc[0]; 3689 ierr = PetscFree(aloc);CHKERRQ(ierr); 3690 if (!row){ 3691 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3692 } 3693 if (!col){ 3694 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3695 } 3696 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 3700 static PetscEvent logkey_GetBrowsOfAcols = 0; 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "MatGetBrowsOfAcols" 3703 /*@C 3704 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3705 3706 Collective on Mat 3707 3708 Input Parameters: 3709 + A,B - the matrices in mpiaij format 3710 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3711 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3712 3713 Output Parameter: 3714 + rowb, colb - index sets of rows and columns of B to extract 3715 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3716 - B_seq - the sequential matrix generated 3717 3718 Level: developer 3719 3720 @*/ 3721 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3722 { 3723 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3724 PetscErrorCode ierr; 3725 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3726 IS isrowb,iscolb; 3727 Mat *bseq; 3728 3729 PetscFunctionBegin; 3730 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3731 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3732 } 3733 if (!logkey_GetBrowsOfAcols) { 3734 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3735 } 3736 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3737 3738 if (scall == MAT_INITIAL_MATRIX){ 3739 start = A->cmap.rstart; 3740 cmap = a->garray; 3741 nzA = a->A->cmap.n; 3742 nzB = a->B->cmap.n; 3743 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3744 ncols = 0; 3745 for (i=0; i<nzB; i++) { /* row < local row index */ 3746 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3747 else break; 3748 } 3749 imark = i; 3750 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3751 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3752 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3753 ierr = PetscFree(idx);CHKERRQ(ierr); 3754 *brstart = imark; 3755 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3756 } else { 3757 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3758 isrowb = *rowb; iscolb = *colb; 3759 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3760 bseq[0] = *B_seq; 3761 } 3762 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3763 *B_seq = bseq[0]; 3764 ierr = PetscFree(bseq);CHKERRQ(ierr); 3765 if (!rowb){ 3766 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3767 } else { 3768 *rowb = isrowb; 3769 } 3770 if (!colb){ 3771 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3772 } else { 3773 *colb = iscolb; 3774 } 3775 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3776 PetscFunctionReturn(0); 3777 } 3778 3779 static PetscEvent logkey_GetBrowsOfAocols = 0; 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatGetBrowsOfAoCols" 3782 /*@C 3783 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3784 of the OFF-DIAGONAL portion of local A 3785 3786 Collective on Mat 3787 3788 Input Parameters: 3789 + A,B - the matrices in mpiaij format 3790 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3791 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3792 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3793 3794 Output Parameter: 3795 + B_oth - the sequential matrix generated 3796 3797 Level: developer 3798 3799 @*/ 3800 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3801 { 3802 VecScatter_MPI_General *gen_to,*gen_from; 3803 PetscErrorCode ierr; 3804 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3805 Mat_SeqAIJ *b_oth; 3806 VecScatter ctx=a->Mvctx; 3807 MPI_Comm comm=ctx->comm; 3808 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3809 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3810 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3811 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3812 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3813 MPI_Status *sstatus,rstatus; 3814 PetscInt *cols; 3815 PetscScalar *vals; 3816 PetscMPIInt j; 3817 3818 PetscFunctionBegin; 3819 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3820 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3821 } 3822 if (!logkey_GetBrowsOfAocols) { 3823 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3824 } 3825 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3826 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3827 3828 gen_to = (VecScatter_MPI_General*)ctx->todata; 3829 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3830 rvalues = gen_from->values; /* holds the length of sending row */ 3831 svalues = gen_to->values; /* holds the length of receiving row */ 3832 nrecvs = gen_from->n; 3833 nsends = gen_to->n; 3834 3835 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3836 srow = gen_to->indices; /* local row index to be sent */ 3837 rstarts = gen_from->starts; 3838 sstarts = gen_to->starts; 3839 rprocs = gen_from->procs; 3840 sprocs = gen_to->procs; 3841 sstatus = gen_to->sstatus; 3842 3843 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3844 if (scall == MAT_INITIAL_MATRIX){ 3845 /* i-array */ 3846 /*---------*/ 3847 /* post receives */ 3848 for (i=0; i<nrecvs; i++){ 3849 rowlen = (PetscInt*)rvalues + rstarts[i]; 3850 nrows = rstarts[i+1]-rstarts[i]; 3851 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3852 } 3853 3854 /* pack the outgoing message */ 3855 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3856 rstartsj = sstartsj + nsends +1; 3857 sstartsj[0] = 0; rstartsj[0] = 0; 3858 len = 0; /* total length of j or a array to be sent */ 3859 k = 0; 3860 for (i=0; i<nsends; i++){ 3861 rowlen = (PetscInt*)svalues + sstarts[i]; 3862 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3863 for (j=0; j<nrows; j++) { 3864 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3865 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3866 len += rowlen[j]; 3867 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3868 k++; 3869 } 3870 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3871 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3872 } 3873 /* recvs and sends of i-array are completed */ 3874 i = nrecvs; 3875 while (i--) { 3876 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3877 } 3878 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3879 /* allocate buffers for sending j and a arrays */ 3880 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3881 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3882 3883 /* create i-array of B_oth */ 3884 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3885 b_othi[0] = 0; 3886 len = 0; /* total length of j or a array to be received */ 3887 k = 0; 3888 for (i=0; i<nrecvs; i++){ 3889 rowlen = (PetscInt*)rvalues + rstarts[i]; 3890 nrows = rstarts[i+1]-rstarts[i]; 3891 for (j=0; j<nrows; j++) { 3892 b_othi[k+1] = b_othi[k] + rowlen[j]; 3893 len += rowlen[j]; k++; 3894 } 3895 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3896 } 3897 3898 /* allocate space for j and a arrrays of B_oth */ 3899 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3900 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3901 3902 /* j-array */ 3903 /*---------*/ 3904 /* post receives of j-array */ 3905 for (i=0; i<nrecvs; i++){ 3906 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3907 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3908 } 3909 k = 0; 3910 for (i=0; i<nsends; i++){ 3911 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3912 bufJ = bufj+sstartsj[i]; 3913 for (j=0; j<nrows; j++) { 3914 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3915 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3916 for (l=0; l<ncols; l++){ 3917 *bufJ++ = cols[l]; 3918 } 3919 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3920 } 3921 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3922 } 3923 3924 /* recvs and sends of j-array are completed */ 3925 i = nrecvs; 3926 while (i--) { 3927 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3928 } 3929 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3930 } else if (scall == MAT_REUSE_MATRIX){ 3931 sstartsj = *startsj; 3932 rstartsj = sstartsj + nsends +1; 3933 bufa = *bufa_ptr; 3934 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3935 b_otha = b_oth->a; 3936 } else { 3937 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3938 } 3939 3940 /* a-array */ 3941 /*---------*/ 3942 /* post receives of a-array */ 3943 for (i=0; i<nrecvs; i++){ 3944 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3945 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3946 } 3947 k = 0; 3948 for (i=0; i<nsends; i++){ 3949 nrows = sstarts[i+1]-sstarts[i]; 3950 bufA = bufa+sstartsj[i]; 3951 for (j=0; j<nrows; j++) { 3952 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3953 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3954 for (l=0; l<ncols; l++){ 3955 *bufA++ = vals[l]; 3956 } 3957 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3958 3959 } 3960 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3961 } 3962 /* recvs and sends of a-array are completed */ 3963 i = nrecvs; 3964 while (i--) { 3965 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3966 } 3967 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3968 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3969 3970 if (scall == MAT_INITIAL_MATRIX){ 3971 /* put together the new matrix */ 3972 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3973 3974 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3975 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3976 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3977 b_oth->free_a = PETSC_TRUE; 3978 b_oth->free_ij = PETSC_TRUE; 3979 b_oth->nonew = 0; 3980 3981 ierr = PetscFree(bufj);CHKERRQ(ierr); 3982 if (!startsj || !bufa_ptr){ 3983 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3984 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3985 } else { 3986 *startsj = sstartsj; 3987 *bufa_ptr = bufa; 3988 } 3989 } 3990 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3991 3992 PetscFunctionReturn(0); 3993 } 3994 3995 #undef __FUNCT__ 3996 #define __FUNCT__ "MatGetCommunicationStructs" 3997 /*@C 3998 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3999 4000 Not Collective 4001 4002 Input Parameters: 4003 . A - The matrix in mpiaij format 4004 4005 Output Parameter: 4006 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4007 . colmap - A map from global column index to local index into lvec 4008 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4009 4010 Level: developer 4011 4012 @*/ 4013 #if defined (PETSC_USE_CTABLE) 4014 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4015 #else 4016 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4017 #endif 4018 { 4019 Mat_MPIAIJ *a; 4020 4021 PetscFunctionBegin; 4022 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4023 PetscValidPointer(lvec, 2) 4024 PetscValidPointer(colmap, 3) 4025 PetscValidPointer(multScatter, 4) 4026 a = (Mat_MPIAIJ *) A->data; 4027 if (lvec) *lvec = a->lvec; 4028 if (colmap) *colmap = a->colmap; 4029 if (multScatter) *multScatter = a->Mvctx; 4030 PetscFunctionReturn(0); 4031 } 4032 4033 EXTERN_C_BEGIN 4034 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4035 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4036 EXTERN_C_END 4037 4038 /*MC 4039 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4040 4041 Options Database Keys: 4042 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4043 4044 Level: beginner 4045 4046 .seealso: MatCreateMPIAIJ 4047 M*/ 4048 4049 EXTERN_C_BEGIN 4050 #undef __FUNCT__ 4051 #define __FUNCT__ "MatCreate_MPIAIJ" 4052 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4053 { 4054 Mat_MPIAIJ *b; 4055 PetscErrorCode ierr; 4056 PetscMPIInt size; 4057 4058 PetscFunctionBegin; 4059 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4060 4061 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4062 B->data = (void*)b; 4063 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4064 B->factor = 0; 4065 B->rmap.bs = 1; 4066 B->assembled = PETSC_FALSE; 4067 B->mapping = 0; 4068 4069 B->insertmode = NOT_SET_VALUES; 4070 b->size = size; 4071 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4072 4073 /* build cache for off array entries formed */ 4074 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4075 b->donotstash = PETSC_FALSE; 4076 b->colmap = 0; 4077 b->garray = 0; 4078 b->roworiented = PETSC_TRUE; 4079 4080 /* stuff used for matrix vector multiply */ 4081 b->lvec = PETSC_NULL; 4082 b->Mvctx = PETSC_NULL; 4083 4084 /* stuff for MatGetRow() */ 4085 b->rowindices = 0; 4086 b->rowvalues = 0; 4087 b->getrowactive = PETSC_FALSE; 4088 4089 4090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4091 "MatStoreValues_MPIAIJ", 4092 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4094 "MatRetrieveValues_MPIAIJ", 4095 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4097 "MatGetDiagonalBlock_MPIAIJ", 4098 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4100 "MatIsTranspose_MPIAIJ", 4101 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4103 "MatMPIAIJSetPreallocation_MPIAIJ", 4104 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4106 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4107 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4109 "MatDiagonalScaleLocal_MPIAIJ", 4110 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4112 "MatConvert_MPIAIJ_MPICSRPERM", 4113 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4115 "MatConvert_MPIAIJ_MPICRL", 4116 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4117 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120 EXTERN_C_END 4121 4122 #undef __FUNCT__ 4123 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 4124 /*@C 4125 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 4126 and "off-diagonal" part of the matrix in CSR format. 4127 4128 Collective on MPI_Comm 4129 4130 Input Parameters: 4131 + comm - MPI communicator 4132 . m - number of local rows (Cannot be PETSC_DECIDE) 4133 . n - This value should be the same as the local size used in creating the 4134 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4135 calculated if N is given) For square matrices n is almost always m. 4136 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4137 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4138 . i - row indices for "diagonal" portion of matrix 4139 . j - column indices 4140 . a - matrix values 4141 . oi - row indices for "off-diagonal" portion of matrix 4142 . oj - column indices 4143 - oa - matrix values 4144 4145 Output Parameter: 4146 . mat - the matrix 4147 4148 Level: advanced 4149 4150 Notes: 4151 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 4152 4153 The i and j indices are 0 based 4154 4155 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 4156 4157 4158 .keywords: matrix, aij, compressed row, sparse, parallel 4159 4160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4161 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 4162 @*/ 4163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 4164 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 4165 { 4166 PetscErrorCode ierr; 4167 Mat_MPIAIJ *maij; 4168 4169 PetscFunctionBegin; 4170 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4171 if (i[0]) { 4172 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4173 } 4174 if (oi[0]) { 4175 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 4176 } 4177 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4178 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4179 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4180 maij = (Mat_MPIAIJ*) (*mat)->data; 4181 maij->donotstash = PETSC_TRUE; 4182 (*mat)->preallocated = PETSC_TRUE; 4183 4184 (*mat)->rmap.bs = (*mat)->cmap.bs = 1; 4185 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr); 4186 ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr); 4187 4188 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 4189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 4190 4191 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4192 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4193 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4194 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4195 4196 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4197 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4198 PetscFunctionReturn(0); 4199 } 4200 4201 /* 4202 Special version for direct calls from Fortran 4203 */ 4204 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4205 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4206 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4207 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4208 #endif 4209 4210 /* Change these macros so can be used in void function */ 4211 #undef CHKERRQ 4212 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4213 #undef SETERRQ2 4214 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4215 #undef SETERRQ 4216 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4217 4218 EXTERN_C_BEGIN 4219 #undef __FUNCT__ 4220 #define __FUNCT__ "matsetvaluesmpiaij_" 4221 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4222 { 4223 Mat mat = *mmat; 4224 PetscInt m = *mm, n = *mn; 4225 InsertMode addv = *maddv; 4226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4227 PetscScalar value; 4228 PetscErrorCode ierr; 4229 4230 MatPreallocated(mat); 4231 if (mat->insertmode == NOT_SET_VALUES) { 4232 mat->insertmode = addv; 4233 } 4234 #if defined(PETSC_USE_DEBUG) 4235 else if (mat->insertmode != addv) { 4236 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4237 } 4238 #endif 4239 { 4240 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4241 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4242 PetscTruth roworiented = aij->roworiented; 4243 4244 /* Some Variables required in the macro */ 4245 Mat A = aij->A; 4246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4247 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4248 PetscScalar *aa = a->a; 4249 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4250 Mat B = aij->B; 4251 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4252 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4253 PetscScalar *ba = b->a; 4254 4255 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4256 PetscInt nonew = a->nonew; 4257 PetscScalar *ap1,*ap2; 4258 4259 PetscFunctionBegin; 4260 for (i=0; i<m; i++) { 4261 if (im[i] < 0) continue; 4262 #if defined(PETSC_USE_DEBUG) 4263 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4264 #endif 4265 if (im[i] >= rstart && im[i] < rend) { 4266 row = im[i] - rstart; 4267 lastcol1 = -1; 4268 rp1 = aj + ai[row]; 4269 ap1 = aa + ai[row]; 4270 rmax1 = aimax[row]; 4271 nrow1 = ailen[row]; 4272 low1 = 0; 4273 high1 = nrow1; 4274 lastcol2 = -1; 4275 rp2 = bj + bi[row]; 4276 ap2 = ba + bi[row]; 4277 rmax2 = bimax[row]; 4278 nrow2 = bilen[row]; 4279 low2 = 0; 4280 high2 = nrow2; 4281 4282 for (j=0; j<n; j++) { 4283 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4284 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4285 if (in[j] >= cstart && in[j] < cend){ 4286 col = in[j] - cstart; 4287 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4288 } else if (in[j] < 0) continue; 4289 #if defined(PETSC_USE_DEBUG) 4290 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4291 #endif 4292 else { 4293 if (mat->was_assembled) { 4294 if (!aij->colmap) { 4295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4296 } 4297 #if defined (PETSC_USE_CTABLE) 4298 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4299 col--; 4300 #else 4301 col = aij->colmap[in[j]] - 1; 4302 #endif 4303 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4304 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4305 col = in[j]; 4306 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4307 B = aij->B; 4308 b = (Mat_SeqAIJ*)B->data; 4309 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4310 rp2 = bj + bi[row]; 4311 ap2 = ba + bi[row]; 4312 rmax2 = bimax[row]; 4313 nrow2 = bilen[row]; 4314 low2 = 0; 4315 high2 = nrow2; 4316 bm = aij->B->rmap.n; 4317 ba = b->a; 4318 } 4319 } else col = in[j]; 4320 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4321 } 4322 } 4323 } else { 4324 if (!aij->donotstash) { 4325 if (roworiented) { 4326 if (ignorezeroentries && v[i*n] == 0.0) continue; 4327 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4328 } else { 4329 if (ignorezeroentries && v[i] == 0.0) continue; 4330 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4331 } 4332 } 4333 } 4334 }} 4335 PetscFunctionReturnVoid(); 4336 } 4337 EXTERN_C_END 4338 4339