1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 break; 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1522 { 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1528 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1534 { 1535 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatEqual_MPIAIJ" 1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1546 { 1547 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1548 Mat a,b,c,d; 1549 PetscTruth flg; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 a = matA->A; b = matA->B; 1554 c = matB->A; d = matB->B; 1555 1556 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1557 if (flg) { 1558 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1559 } 1560 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatCopy_MPIAIJ" 1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1567 { 1568 PetscErrorCode ierr; 1569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1570 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1571 1572 PetscFunctionBegin; 1573 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1574 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1575 /* because of the column compression in the off-processor part of the matrix a->B, 1576 the number of columns in a->B and b->B may be different, hence we cannot call 1577 the MatCopy() directly on the two parts. If need be, we can provide a more 1578 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1579 then copying the submatrices */ 1580 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1581 } else { 1582 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1583 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1591 { 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #include "petscblaslapack.h" 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatAXPY_MPIAIJ" 1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1603 { 1604 PetscErrorCode ierr; 1605 PetscInt i; 1606 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1607 PetscBLASInt bnz,one=1; 1608 Mat_SeqAIJ *x,*y; 1609 1610 PetscFunctionBegin; 1611 if (str == SAME_NONZERO_PATTERN) { 1612 PetscScalar alpha = a; 1613 x = (Mat_SeqAIJ *)xx->A->data; 1614 y = (Mat_SeqAIJ *)yy->A->data; 1615 bnz = (PetscBLASInt)x->nz; 1616 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1617 x = (Mat_SeqAIJ *)xx->B->data; 1618 y = (Mat_SeqAIJ *)yy->B->data; 1619 bnz = (PetscBLASInt)x->nz; 1620 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1621 } else if (str == SUBSET_NONZERO_PATTERN) { 1622 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1623 1624 x = (Mat_SeqAIJ *)xx->B->data; 1625 y = (Mat_SeqAIJ *)yy->B->data; 1626 if (y->xtoy && y->XtoY != xx->B) { 1627 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1628 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1629 } 1630 if (!y->xtoy) { /* get xtoy */ 1631 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1632 y->XtoY = xx->B; 1633 } 1634 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1635 } else { 1636 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatConjugate_MPIAIJ" 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1646 { 1647 #if defined(PETSC_USE_COMPLEX) 1648 PetscErrorCode ierr; 1649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1650 1651 PetscFunctionBegin; 1652 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1653 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1654 #else 1655 PetscFunctionBegin; 1656 #endif 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatRealPart_MPIAIJ" 1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 PetscErrorCode ierr; 1666 1667 PetscFunctionBegin; 1668 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1669 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1682 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #ifdef PETSC_HAVE_PBGL 1687 #include <boost/parallel/mpi/bsp_process_group.hpp> 1688 typedef boost::parallel::mpi::bsp_process_group process_group_type; 1689 1690 #include <boost/graph/distributed/adjacency_list.hpp> 1691 #include <boost/parallel/mpi/bsp_process_group.hpp> 1692 1693 #include <boost/graph/distributed/petsc/interface.hpp> 1694 #include <boost/graph/distributed/ilu_0.hpp> 1695 1696 namespace petsc = boost::distributed::petsc; 1697 using namespace std; 1698 typedef double value_type; 1699 typedef boost::graph::distributed::ilu_elimination_state elimination_state; 1700 typedef boost::adjacency_list<boost::listS, 1701 boost::distributedS<process_group_type, boost::vecS>, 1702 boost::bidirectionalS, 1703 // Vertex properties 1704 boost::no_property, 1705 // Edge properties 1706 boost::property<boost::edge_weight_t, value_type, 1707 boost::property<boost::edge_finished_t, elimination_state> > > graph_type; 1708 1709 typedef boost::graph_traits<graph_type>::vertex_descriptor vertex_type; 1710 typedef boost::graph_traits<graph_type>::edge_descriptor edge_type; 1711 typedef boost::property_map<graph_type, boost::edge_weight_t>::type weight_map_type; 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1715 /* 1716 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1717 */ 1718 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact) 1719 { 1720 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1721 PetscTruth row_identity, col_identity; 1722 PetscObjectContainer c; 1723 PetscInt m, n, M, N; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1728 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1729 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1730 if (!row_identity || !col_identity) { 1731 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1732 } 1733 1734 process_group_type pg; 1735 graph_type* graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1736 graph_type& graph = *graph_p; 1737 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1738 1739 //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph)); 1740 boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph); 1741 BGL_FORALL_EDGES(e, graph, graph_type) 1742 put(finished, e, boost::graph::distributed::unseen); 1743 1744 ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph)); 1745 1746 /* put together the new matrix */ 1747 ierr = MatCreate(A->comm, fact);CHKERRQ(ierr); 1748 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1749 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1750 ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr); 1751 ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr); 1752 (*fact)->factor = FACTOR_LU; 1753 1754 ierr = PetscObjectContainerCreate(A->comm, &c); 1755 ierr = PetscObjectContainerSetPointer(c, graph_p); 1756 ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1762 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B) 1763 { 1764 PetscFunctionBegin; 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatSolve_MPIAIJ" 1770 /* 1771 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1772 */ 1773 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1774 { 1775 graph_type* graph_p; 1776 PetscObjectContainer c; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1781 ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr); 1782 ierr = VecCopy(b, x); CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 #endif 1786 1787 /* -------------------------------------------------------------------*/ 1788 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1789 MatGetRow_MPIAIJ, 1790 MatRestoreRow_MPIAIJ, 1791 MatMult_MPIAIJ, 1792 /* 4*/ MatMultAdd_MPIAIJ, 1793 MatMultTranspose_MPIAIJ, 1794 MatMultTransposeAdd_MPIAIJ, 1795 #ifdef PETSC_HAVE_PBGL 1796 MatSolve_MPIAIJ, 1797 #else 1798 0, 1799 #endif 1800 0, 1801 0, 1802 /*10*/ 0, 1803 0, 1804 0, 1805 MatRelax_MPIAIJ, 1806 MatTranspose_MPIAIJ, 1807 /*15*/ MatGetInfo_MPIAIJ, 1808 MatEqual_MPIAIJ, 1809 MatGetDiagonal_MPIAIJ, 1810 MatDiagonalScale_MPIAIJ, 1811 MatNorm_MPIAIJ, 1812 /*20*/ MatAssemblyBegin_MPIAIJ, 1813 MatAssemblyEnd_MPIAIJ, 1814 0, 1815 MatSetOption_MPIAIJ, 1816 MatZeroEntries_MPIAIJ, 1817 /*25*/ MatZeroRows_MPIAIJ, 1818 0, 1819 #ifdef PETSC_HAVE_PBGL 1820 MatLUFactorNumeric_MPIAIJ, 1821 #else 1822 0, 1823 #endif 1824 0, 1825 0, 1826 /*30*/ MatSetUpPreallocation_MPIAIJ, 1827 #ifdef PETSC_HAVE_PBGL 1828 MatILUFactorSymbolic_MPIAIJ, 1829 #else 1830 0, 1831 #endif 1832 0, 1833 0, 1834 0, 1835 /*35*/ MatDuplicate_MPIAIJ, 1836 0, 1837 0, 1838 0, 1839 0, 1840 /*40*/ MatAXPY_MPIAIJ, 1841 MatGetSubMatrices_MPIAIJ, 1842 MatIncreaseOverlap_MPIAIJ, 1843 MatGetValues_MPIAIJ, 1844 MatCopy_MPIAIJ, 1845 /*45*/ 0, 1846 MatScale_MPIAIJ, 1847 0, 1848 0, 1849 0, 1850 /*50*/ MatSetBlockSize_MPIAIJ, 1851 0, 1852 0, 1853 0, 1854 0, 1855 /*55*/ MatFDColoringCreate_MPIAIJ, 1856 0, 1857 MatSetUnfactored_MPIAIJ, 1858 MatPermute_MPIAIJ, 1859 0, 1860 /*60*/ MatGetSubMatrix_MPIAIJ, 1861 MatDestroy_MPIAIJ, 1862 MatView_MPIAIJ, 1863 0, 1864 0, 1865 /*65*/ 0, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /*70*/ 0, 1871 0, 1872 MatSetColoring_MPIAIJ, 1873 #if defined(PETSC_HAVE_ADIC) 1874 MatSetValuesAdic_MPIAIJ, 1875 #else 1876 0, 1877 #endif 1878 MatSetValuesAdifor_MPIAIJ, 1879 /*75*/ 0, 1880 0, 1881 0, 1882 0, 1883 0, 1884 /*80*/ 0, 1885 0, 1886 0, 1887 0, 1888 /*84*/ MatLoad_MPIAIJ, 1889 0, 1890 0, 1891 0, 1892 0, 1893 0, 1894 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1895 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1896 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1897 MatPtAP_Basic, 1898 MatPtAPSymbolic_MPIAIJ, 1899 /*95*/ MatPtAPNumeric_MPIAIJ, 1900 0, 1901 0, 1902 0, 1903 0, 1904 /*100*/0, 1905 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1906 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1907 MatConjugate_MPIAIJ, 1908 0, 1909 /*105*/MatSetValuesRow_MPIAIJ, 1910 MatRealPart_MPIAIJ, 1911 MatImaginaryPart_MPIAIJ}; 1912 1913 /* ----------------------------------------------------------------------------------------*/ 1914 1915 EXTERN_C_BEGIN 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1918 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1919 { 1920 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1925 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 EXTERN_C_END 1929 1930 EXTERN_C_BEGIN 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1933 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1934 { 1935 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1940 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 EXTERN_C_END 1944 1945 #include "petscpc.h" 1946 EXTERN_C_BEGIN 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1949 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1950 { 1951 Mat_MPIAIJ *b; 1952 PetscErrorCode ierr; 1953 PetscInt i; 1954 1955 PetscFunctionBegin; 1956 B->preallocated = PETSC_TRUE; 1957 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1958 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1959 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1960 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1961 1962 B->rmap.bs = B->cmap.bs = 1; 1963 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1964 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1965 if (d_nnz) { 1966 for (i=0; i<B->rmap.n; i++) { 1967 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1968 } 1969 } 1970 if (o_nnz) { 1971 for (i=0; i<B->rmap.n; i++) { 1972 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1973 } 1974 } 1975 b = (Mat_MPIAIJ*)B->data; 1976 1977 /* Explicitly create 2 MATSEQAIJ matrices. */ 1978 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1979 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1980 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1981 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1982 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1983 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1984 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1985 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1986 1987 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1988 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1989 1990 PetscFunctionReturn(0); 1991 } 1992 EXTERN_C_END 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1996 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1997 { 1998 Mat mat; 1999 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 *newmat = 0; 2004 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2005 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2006 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2007 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2008 a = (Mat_MPIAIJ*)mat->data; 2009 2010 mat->factor = matin->factor; 2011 mat->rmap.bs = matin->rmap.bs; 2012 mat->assembled = PETSC_TRUE; 2013 mat->insertmode = NOT_SET_VALUES; 2014 mat->preallocated = PETSC_TRUE; 2015 2016 a->size = oldmat->size; 2017 a->rank = oldmat->rank; 2018 a->donotstash = oldmat->donotstash; 2019 a->roworiented = oldmat->roworiented; 2020 a->rowindices = 0; 2021 a->rowvalues = 0; 2022 a->getrowactive = PETSC_FALSE; 2023 2024 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2025 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2026 2027 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2028 if (oldmat->colmap) { 2029 #if defined (PETSC_USE_CTABLE) 2030 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2031 #else 2032 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2033 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2034 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 2035 #endif 2036 } else a->colmap = 0; 2037 if (oldmat->garray) { 2038 PetscInt len; 2039 len = oldmat->B->cmap.n; 2040 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2041 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2042 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2043 } else a->garray = 0; 2044 2045 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2046 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2047 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2048 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2049 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2050 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2051 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2052 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2053 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2054 *newmat = mat; 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #include "petscsys.h" 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatLoad_MPIAIJ" 2062 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2063 { 2064 Mat A; 2065 PetscScalar *vals,*svals; 2066 MPI_Comm comm = ((PetscObject)viewer)->comm; 2067 MPI_Status status; 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 2070 PetscInt i,nz,j,rstart,rend,mmax; 2071 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2072 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2073 PetscInt cend,cstart,n,*rowners; 2074 int fd; 2075 2076 PetscFunctionBegin; 2077 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2079 if (!rank) { 2080 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2081 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2082 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2083 } 2084 2085 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2086 M = header[1]; N = header[2]; 2087 /* determine ownership of all rows */ 2088 m = M/size + ((M % size) > rank); 2089 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2090 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2091 2092 /* First process needs enough room for process with most rows */ 2093 if (!rank) { 2094 mmax = rowners[1]; 2095 for (i=2; i<size; i++) { 2096 mmax = PetscMax(mmax,rowners[i]); 2097 } 2098 } else mmax = m; 2099 2100 rowners[0] = 0; 2101 for (i=2; i<=size; i++) { 2102 mmax = PetscMax(mmax,rowners[i]); 2103 rowners[i] += rowners[i-1]; 2104 } 2105 rstart = rowners[rank]; 2106 rend = rowners[rank+1]; 2107 2108 /* distribute row lengths to all processors */ 2109 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2110 if (!rank) { 2111 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2112 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2113 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2114 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2115 for (j=0; j<m; j++) { 2116 procsnz[0] += ourlens[j]; 2117 } 2118 for (i=1; i<size; i++) { 2119 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2120 /* calculate the number of nonzeros on each processor */ 2121 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2122 procsnz[i] += rowlengths[j]; 2123 } 2124 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2127 } else { 2128 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2129 } 2130 2131 if (!rank) { 2132 /* determine max buffer needed and allocate it */ 2133 maxnz = 0; 2134 for (i=0; i<size; i++) { 2135 maxnz = PetscMax(maxnz,procsnz[i]); 2136 } 2137 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2138 2139 /* read in my part of the matrix column indices */ 2140 nz = procsnz[0]; 2141 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2142 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2143 2144 /* read in every one elses and ship off */ 2145 for (i=1; i<size; i++) { 2146 nz = procsnz[i]; 2147 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2148 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2149 } 2150 ierr = PetscFree(cols);CHKERRQ(ierr); 2151 } else { 2152 /* determine buffer space needed for message */ 2153 nz = 0; 2154 for (i=0; i<m; i++) { 2155 nz += ourlens[i]; 2156 } 2157 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2158 2159 /* receive message of column indices*/ 2160 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2161 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2162 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2163 } 2164 2165 /* determine column ownership if matrix is not square */ 2166 if (N != M) { 2167 n = N/size + ((N % size) > rank); 2168 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2169 cstart = cend - n; 2170 } else { 2171 cstart = rstart; 2172 cend = rend; 2173 n = cend - cstart; 2174 } 2175 2176 /* loop over local rows, determining number of off diagonal entries */ 2177 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2178 jj = 0; 2179 for (i=0; i<m; i++) { 2180 for (j=0; j<ourlens[i]; j++) { 2181 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2182 jj++; 2183 } 2184 } 2185 2186 /* create our matrix */ 2187 for (i=0; i<m; i++) { 2188 ourlens[i] -= offlens[i]; 2189 } 2190 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2191 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2192 ierr = MatSetType(A,type);CHKERRQ(ierr); 2193 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2194 2195 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2196 for (i=0; i<m; i++) { 2197 ourlens[i] += offlens[i]; 2198 } 2199 2200 if (!rank) { 2201 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2202 2203 /* read in my part of the matrix numerical values */ 2204 nz = procsnz[0]; 2205 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2206 2207 /* insert into matrix */ 2208 jj = rstart; 2209 smycols = mycols; 2210 svals = vals; 2211 for (i=0; i<m; i++) { 2212 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2213 smycols += ourlens[i]; 2214 svals += ourlens[i]; 2215 jj++; 2216 } 2217 2218 /* read in other processors and ship out */ 2219 for (i=1; i<size; i++) { 2220 nz = procsnz[i]; 2221 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2222 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2223 } 2224 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2225 } else { 2226 /* receive numeric values */ 2227 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2228 2229 /* receive message of values*/ 2230 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2231 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2232 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2233 2234 /* insert into matrix */ 2235 jj = rstart; 2236 smycols = mycols; 2237 svals = vals; 2238 for (i=0; i<m; i++) { 2239 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2240 smycols += ourlens[i]; 2241 svals += ourlens[i]; 2242 jj++; 2243 } 2244 } 2245 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2246 ierr = PetscFree(vals);CHKERRQ(ierr); 2247 ierr = PetscFree(mycols);CHKERRQ(ierr); 2248 ierr = PetscFree(rowners);CHKERRQ(ierr); 2249 2250 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 *newmat = A; 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2258 /* 2259 Not great since it makes two copies of the submatrix, first an SeqAIJ 2260 in local and then by concatenating the local matrices the end result. 2261 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2262 */ 2263 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2264 { 2265 PetscErrorCode ierr; 2266 PetscMPIInt rank,size; 2267 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2268 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2269 Mat *local,M,Mreuse; 2270 PetscScalar *vwork,*aa; 2271 MPI_Comm comm = mat->comm; 2272 Mat_SeqAIJ *aij; 2273 2274 2275 PetscFunctionBegin; 2276 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2277 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2278 2279 if (call == MAT_REUSE_MATRIX) { 2280 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2281 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2282 local = &Mreuse; 2283 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2284 } else { 2285 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2286 Mreuse = *local; 2287 ierr = PetscFree(local);CHKERRQ(ierr); 2288 } 2289 2290 /* 2291 m - number of local rows 2292 n - number of columns (same on all processors) 2293 rstart - first row in new global matrix generated 2294 */ 2295 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2296 if (call == MAT_INITIAL_MATRIX) { 2297 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2298 ii = aij->i; 2299 jj = aij->j; 2300 2301 /* 2302 Determine the number of non-zeros in the diagonal and off-diagonal 2303 portions of the matrix in order to do correct preallocation 2304 */ 2305 2306 /* first get start and end of "diagonal" columns */ 2307 if (csize == PETSC_DECIDE) { 2308 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2309 if (mglobal == n) { /* square matrix */ 2310 nlocal = m; 2311 } else { 2312 nlocal = n/size + ((n % size) > rank); 2313 } 2314 } else { 2315 nlocal = csize; 2316 } 2317 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2318 rstart = rend - nlocal; 2319 if (rank == size - 1 && rend != n) { 2320 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2321 } 2322 2323 /* next, compute all the lengths */ 2324 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2325 olens = dlens + m; 2326 for (i=0; i<m; i++) { 2327 jend = ii[i+1] - ii[i]; 2328 olen = 0; 2329 dlen = 0; 2330 for (j=0; j<jend; j++) { 2331 if (*jj < rstart || *jj >= rend) olen++; 2332 else dlen++; 2333 jj++; 2334 } 2335 olens[i] = olen; 2336 dlens[i] = dlen; 2337 } 2338 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2339 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2340 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2342 ierr = PetscFree(dlens);CHKERRQ(ierr); 2343 } else { 2344 PetscInt ml,nl; 2345 2346 M = *newmat; 2347 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2348 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2349 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2350 /* 2351 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2352 rather than the slower MatSetValues(). 2353 */ 2354 M->was_assembled = PETSC_TRUE; 2355 M->assembled = PETSC_FALSE; 2356 } 2357 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2358 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2359 ii = aij->i; 2360 jj = aij->j; 2361 aa = aij->a; 2362 for (i=0; i<m; i++) { 2363 row = rstart + i; 2364 nz = ii[i+1] - ii[i]; 2365 cwork = jj; jj += nz; 2366 vwork = aa; aa += nz; 2367 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2368 } 2369 2370 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 *newmat = M; 2373 2374 /* save submatrix used in processor for next request */ 2375 if (call == MAT_INITIAL_MATRIX) { 2376 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2377 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2378 } 2379 2380 PetscFunctionReturn(0); 2381 } 2382 2383 EXTERN_C_BEGIN 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2386 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2387 { 2388 PetscInt m,cstart, cend,j,nnz,i,d; 2389 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2390 const PetscInt *JJ; 2391 PetscScalar *values; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2396 2397 B->rmap.bs = B->cmap.bs = 1; 2398 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2399 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2400 m = B->rmap.n; 2401 cstart = B->cmap.rstart; 2402 cend = B->cmap.rend; 2403 rstart = B->rmap.rstart; 2404 2405 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2406 o_nnz = d_nnz + m; 2407 2408 for (i=0; i<m; i++) { 2409 nnz = Ii[i+1]- Ii[i]; 2410 JJ = J + Ii[i]; 2411 nnz_max = PetscMax(nnz_max,nnz); 2412 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2413 for (j=0; j<nnz; j++) { 2414 if (*JJ >= cstart) break; 2415 JJ++; 2416 } 2417 d = 0; 2418 for (; j<nnz; j++) { 2419 if (*JJ++ >= cend) break; 2420 d++; 2421 } 2422 d_nnz[i] = d; 2423 o_nnz[i] = nnz - d; 2424 } 2425 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2426 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2427 2428 if (v) values = (PetscScalar*)v; 2429 else { 2430 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2431 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2432 } 2433 2434 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2435 for (i=0; i<m; i++) { 2436 ii = i + rstart; 2437 nnz = Ii[i+1]- Ii[i]; 2438 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2439 } 2440 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2441 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2442 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2443 2444 if (!v) { 2445 ierr = PetscFree(values);CHKERRQ(ierr); 2446 } 2447 PetscFunctionReturn(0); 2448 } 2449 EXTERN_C_END 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2453 /*@ 2454 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2455 (the default parallel PETSc format). 2456 2457 Collective on MPI_Comm 2458 2459 Input Parameters: 2460 + B - the matrix 2461 . i - the indices into j for the start of each local row (starts with zero) 2462 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2463 - v - optional values in the matrix 2464 2465 Level: developer 2466 2467 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2468 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2469 2470 .keywords: matrix, aij, compressed row, sparse, parallel 2471 2472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2473 MatCreateSeqAIJWithArrays() 2474 @*/ 2475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2476 { 2477 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2478 2479 PetscFunctionBegin; 2480 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2481 if (f) { 2482 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2483 } 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2489 /*@C 2490 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2491 (the default parallel PETSc format). For good matrix assembly performance 2492 the user should preallocate the matrix storage by setting the parameters 2493 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2494 performance can be increased by more than a factor of 50. 2495 2496 Collective on MPI_Comm 2497 2498 Input Parameters: 2499 + A - the matrix 2500 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2501 (same value is used for all local rows) 2502 . d_nnz - array containing the number of nonzeros in the various rows of the 2503 DIAGONAL portion of the local submatrix (possibly different for each row) 2504 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2505 The size of this array is equal to the number of local rows, i.e 'm'. 2506 You must leave room for the diagonal entry even if it is zero. 2507 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2508 submatrix (same value is used for all local rows). 2509 - o_nnz - array containing the number of nonzeros in the various rows of the 2510 OFF-DIAGONAL portion of the local submatrix (possibly different for 2511 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2512 structure. The size of this array is equal to the number 2513 of local rows, i.e 'm'. 2514 2515 If the *_nnz parameter is given then the *_nz parameter is ignored 2516 2517 The AIJ format (also called the Yale sparse matrix format or 2518 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2519 storage. The stored row and column indices begin with zero. See the users manual for details. 2520 2521 The parallel matrix is partitioned such that the first m0 rows belong to 2522 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2523 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2524 2525 The DIAGONAL portion of the local submatrix of a processor can be defined 2526 as the submatrix which is obtained by extraction the part corresponding 2527 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2528 first row that belongs to the processor, and r2 is the last row belonging 2529 to the this processor. This is a square mxm matrix. The remaining portion 2530 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2531 2532 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2533 2534 Example usage: 2535 2536 Consider the following 8x8 matrix with 34 non-zero values, that is 2537 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2538 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2539 as follows: 2540 2541 .vb 2542 1 2 0 | 0 3 0 | 0 4 2543 Proc0 0 5 6 | 7 0 0 | 8 0 2544 9 0 10 | 11 0 0 | 12 0 2545 ------------------------------------- 2546 13 0 14 | 15 16 17 | 0 0 2547 Proc1 0 18 0 | 19 20 21 | 0 0 2548 0 0 0 | 22 23 0 | 24 0 2549 ------------------------------------- 2550 Proc2 25 26 27 | 0 0 28 | 29 0 2551 30 0 0 | 31 32 33 | 0 34 2552 .ve 2553 2554 This can be represented as a collection of submatrices as: 2555 2556 .vb 2557 A B C 2558 D E F 2559 G H I 2560 .ve 2561 2562 Where the submatrices A,B,C are owned by proc0, D,E,F are 2563 owned by proc1, G,H,I are owned by proc2. 2564 2565 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2566 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2567 The 'M','N' parameters are 8,8, and have the same values on all procs. 2568 2569 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2570 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2571 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2572 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2573 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2574 matrix, ans [DF] as another SeqAIJ matrix. 2575 2576 When d_nz, o_nz parameters are specified, d_nz storage elements are 2577 allocated for every row of the local diagonal submatrix, and o_nz 2578 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2579 One way to choose d_nz and o_nz is to use the max nonzerors per local 2580 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2581 In this case, the values of d_nz,o_nz are: 2582 .vb 2583 proc0 : dnz = 2, o_nz = 2 2584 proc1 : dnz = 3, o_nz = 2 2585 proc2 : dnz = 1, o_nz = 4 2586 .ve 2587 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2588 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2589 for proc3. i.e we are using 12+15+10=37 storage locations to store 2590 34 values. 2591 2592 When d_nnz, o_nnz parameters are specified, the storage is specified 2593 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2594 In the above case the values for d_nnz,o_nnz are: 2595 .vb 2596 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2597 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2598 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2599 .ve 2600 Here the space allocated is sum of all the above values i.e 34, and 2601 hence pre-allocation is perfect. 2602 2603 Level: intermediate 2604 2605 .keywords: matrix, aij, compressed row, sparse, parallel 2606 2607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2608 MPIAIJ 2609 @*/ 2610 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2611 { 2612 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2613 2614 PetscFunctionBegin; 2615 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2616 if (f) { 2617 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2618 } 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2624 /*@C 2625 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2626 CSR format the local rows. 2627 2628 Collective on MPI_Comm 2629 2630 Input Parameters: 2631 + comm - MPI communicator 2632 . m - number of local rows (Cannot be PETSC_DECIDE) 2633 . n - This value should be the same as the local size used in creating the 2634 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2635 calculated if N is given) For square matrices n is almost always m. 2636 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2637 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2638 . i - row indices 2639 . j - column indices 2640 - a - matrix values 2641 2642 Output Parameter: 2643 . mat - the matrix 2644 Level: intermediate 2645 2646 Notes: 2647 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2648 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2649 called this routine. 2650 2651 The i and j indices are 0 based 2652 2653 .keywords: matrix, aij, compressed row, sparse, parallel 2654 2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2656 MPIAIJ, MatCreateMPIAIJ() 2657 @*/ 2658 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2659 { 2660 PetscErrorCode ierr; 2661 2662 PetscFunctionBegin; 2663 if (i[0]) { 2664 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2665 } 2666 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2667 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2668 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2669 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatCreateMPIAIJ" 2675 /*@C 2676 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2677 (the default parallel PETSc format). For good matrix assembly performance 2678 the user should preallocate the matrix storage by setting the parameters 2679 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2680 performance can be increased by more than a factor of 50. 2681 2682 Collective on MPI_Comm 2683 2684 Input Parameters: 2685 + comm - MPI communicator 2686 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2687 This value should be the same as the local size used in creating the 2688 y vector for the matrix-vector product y = Ax. 2689 . n - This value should be the same as the local size used in creating the 2690 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2691 calculated if N is given) For square matrices n is almost always m. 2692 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2693 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2694 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2695 (same value is used for all local rows) 2696 . d_nnz - array containing the number of nonzeros in the various rows of the 2697 DIAGONAL portion of the local submatrix (possibly different for each row) 2698 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2699 The size of this array is equal to the number of local rows, i.e 'm'. 2700 You must leave room for the diagonal entry even if it is zero. 2701 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2702 submatrix (same value is used for all local rows). 2703 - o_nnz - array containing the number of nonzeros in the various rows of the 2704 OFF-DIAGONAL portion of the local submatrix (possibly different for 2705 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2706 structure. The size of this array is equal to the number 2707 of local rows, i.e 'm'. 2708 2709 Output Parameter: 2710 . A - the matrix 2711 2712 Notes: 2713 If the *_nnz parameter is given then the *_nz parameter is ignored 2714 2715 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2716 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2717 storage requirements for this matrix. 2718 2719 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2720 processor than it must be used on all processors that share the object for 2721 that argument. 2722 2723 The user MUST specify either the local or global matrix dimensions 2724 (possibly both). 2725 2726 The parallel matrix is partitioned such that the first m0 rows belong to 2727 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2728 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2729 2730 The DIAGONAL portion of the local submatrix of a processor can be defined 2731 as the submatrix which is obtained by extraction the part corresponding 2732 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2733 first row that belongs to the processor, and r2 is the last row belonging 2734 to the this processor. This is a square mxm matrix. The remaining portion 2735 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2736 2737 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2738 2739 When calling this routine with a single process communicator, a matrix of 2740 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2741 type of communicator, use the construction mechanism: 2742 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2743 2744 By default, this format uses inodes (identical nodes) when possible. 2745 We search for consecutive rows with the same nonzero structure, thereby 2746 reusing matrix information to achieve increased efficiency. 2747 2748 Options Database Keys: 2749 + -mat_no_inode - Do not use inodes 2750 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2751 - -mat_aij_oneindex - Internally use indexing starting at 1 2752 rather than 0. Note that when calling MatSetValues(), 2753 the user still MUST index entries starting at 0! 2754 2755 2756 Example usage: 2757 2758 Consider the following 8x8 matrix with 34 non-zero values, that is 2759 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2760 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2761 as follows: 2762 2763 .vb 2764 1 2 0 | 0 3 0 | 0 4 2765 Proc0 0 5 6 | 7 0 0 | 8 0 2766 9 0 10 | 11 0 0 | 12 0 2767 ------------------------------------- 2768 13 0 14 | 15 16 17 | 0 0 2769 Proc1 0 18 0 | 19 20 21 | 0 0 2770 0 0 0 | 22 23 0 | 24 0 2771 ------------------------------------- 2772 Proc2 25 26 27 | 0 0 28 | 29 0 2773 30 0 0 | 31 32 33 | 0 34 2774 .ve 2775 2776 This can be represented as a collection of submatrices as: 2777 2778 .vb 2779 A B C 2780 D E F 2781 G H I 2782 .ve 2783 2784 Where the submatrices A,B,C are owned by proc0, D,E,F are 2785 owned by proc1, G,H,I are owned by proc2. 2786 2787 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2788 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2789 The 'M','N' parameters are 8,8, and have the same values on all procs. 2790 2791 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2792 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2793 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2794 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2795 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2796 matrix, ans [DF] as another SeqAIJ matrix. 2797 2798 When d_nz, o_nz parameters are specified, d_nz storage elements are 2799 allocated for every row of the local diagonal submatrix, and o_nz 2800 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2801 One way to choose d_nz and o_nz is to use the max nonzerors per local 2802 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2803 In this case, the values of d_nz,o_nz are: 2804 .vb 2805 proc0 : dnz = 2, o_nz = 2 2806 proc1 : dnz = 3, o_nz = 2 2807 proc2 : dnz = 1, o_nz = 4 2808 .ve 2809 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2810 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2811 for proc3. i.e we are using 12+15+10=37 storage locations to store 2812 34 values. 2813 2814 When d_nnz, o_nnz parameters are specified, the storage is specified 2815 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2816 In the above case the values for d_nnz,o_nnz are: 2817 .vb 2818 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2819 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2820 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2821 .ve 2822 Here the space allocated is sum of all the above values i.e 34, and 2823 hence pre-allocation is perfect. 2824 2825 Level: intermediate 2826 2827 .keywords: matrix, aij, compressed row, sparse, parallel 2828 2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2830 MPIAIJ, MatCreateMPIAIJWithArrays() 2831 @*/ 2832 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2833 { 2834 PetscErrorCode ierr; 2835 PetscMPIInt size; 2836 2837 PetscFunctionBegin; 2838 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2839 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2840 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2841 if (size > 1) { 2842 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2843 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2844 } else { 2845 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2846 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2847 } 2848 PetscFunctionReturn(0); 2849 } 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2854 { 2855 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2856 2857 PetscFunctionBegin; 2858 *Ad = a->A; 2859 *Ao = a->B; 2860 *colmap = a->garray; 2861 PetscFunctionReturn(0); 2862 } 2863 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2866 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2867 { 2868 PetscErrorCode ierr; 2869 PetscInt i; 2870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2871 2872 PetscFunctionBegin; 2873 if (coloring->ctype == IS_COLORING_LOCAL) { 2874 ISColoringValue *allcolors,*colors; 2875 ISColoring ocoloring; 2876 2877 /* set coloring for diagonal portion */ 2878 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2879 2880 /* set coloring for off-diagonal portion */ 2881 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2882 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2883 for (i=0; i<a->B->cmap.n; i++) { 2884 colors[i] = allcolors[a->garray[i]]; 2885 } 2886 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2887 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2888 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2889 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2890 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2891 ISColoringValue *colors; 2892 PetscInt *larray; 2893 ISColoring ocoloring; 2894 2895 /* set coloring for diagonal portion */ 2896 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2897 for (i=0; i<a->A->cmap.n; i++) { 2898 larray[i] = i + A->cmap.rstart; 2899 } 2900 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2901 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2902 for (i=0; i<a->A->cmap.n; i++) { 2903 colors[i] = coloring->colors[larray[i]]; 2904 } 2905 ierr = PetscFree(larray);CHKERRQ(ierr); 2906 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2907 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2908 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2909 2910 /* set coloring for off-diagonal portion */ 2911 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2912 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2913 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2914 for (i=0; i<a->B->cmap.n; i++) { 2915 colors[i] = coloring->colors[larray[i]]; 2916 } 2917 ierr = PetscFree(larray);CHKERRQ(ierr); 2918 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2919 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2920 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2921 } else { 2922 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2923 } 2924 2925 PetscFunctionReturn(0); 2926 } 2927 2928 #if defined(PETSC_HAVE_ADIC) 2929 #undef __FUNCT__ 2930 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2931 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2932 { 2933 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2934 PetscErrorCode ierr; 2935 2936 PetscFunctionBegin; 2937 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2938 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 #endif 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2945 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2946 { 2947 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2948 PetscErrorCode ierr; 2949 2950 PetscFunctionBegin; 2951 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2952 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatMerge" 2958 /*@C 2959 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2960 matrices from each processor 2961 2962 Collective on MPI_Comm 2963 2964 Input Parameters: 2965 + comm - the communicators the parallel matrix will live on 2966 . inmat - the input sequential matrices 2967 . n - number of local columns (or PETSC_DECIDE) 2968 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2969 2970 Output Parameter: 2971 . outmat - the parallel matrix generated 2972 2973 Level: advanced 2974 2975 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2976 2977 @*/ 2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2979 { 2980 PetscErrorCode ierr; 2981 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 2982 PetscInt *indx; 2983 PetscScalar *values; 2984 2985 PetscFunctionBegin; 2986 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2987 if (scall == MAT_INITIAL_MATRIX){ 2988 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2989 if (n == PETSC_DECIDE){ 2990 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2991 } 2992 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2993 rstart -= m; 2994 2995 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2996 for (i=0;i<m;i++) { 2997 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2998 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2999 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3000 } 3001 /* This routine will ONLY return MPIAIJ type matrix */ 3002 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3003 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3004 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3005 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3006 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3007 3008 } else if (scall == MAT_REUSE_MATRIX){ 3009 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3010 } else { 3011 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3012 } 3013 3014 for (i=0;i<m;i++) { 3015 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3016 Ii = i + rstart; 3017 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3018 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3019 } 3020 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3021 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 3024 PetscFunctionReturn(0); 3025 } 3026 3027 #undef __FUNCT__ 3028 #define __FUNCT__ "MatFileSplit" 3029 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3030 { 3031 PetscErrorCode ierr; 3032 PetscMPIInt rank; 3033 PetscInt m,N,i,rstart,nnz; 3034 size_t len; 3035 const PetscInt *indx; 3036 PetscViewer out; 3037 char *name; 3038 Mat B; 3039 const PetscScalar *values; 3040 3041 PetscFunctionBegin; 3042 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3043 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3044 /* Should this be the type of the diagonal block of A? */ 3045 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3046 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3047 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3048 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3049 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3050 for (i=0;i<m;i++) { 3051 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3052 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3053 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3054 } 3055 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 3058 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 3059 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3060 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3061 sprintf(name,"%s.%d",outfile,rank); 3062 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3063 ierr = PetscFree(name); 3064 ierr = MatView(B,out);CHKERRQ(ierr); 3065 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3066 ierr = MatDestroy(B);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3073 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3074 { 3075 PetscErrorCode ierr; 3076 Mat_Merge_SeqsToMPI *merge; 3077 PetscObjectContainer container; 3078 3079 PetscFunctionBegin; 3080 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3081 if (container) { 3082 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3083 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3084 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3085 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3086 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3087 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3088 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3089 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3090 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3091 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3092 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3093 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3094 3095 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 3096 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3097 } 3098 ierr = PetscFree(merge);CHKERRQ(ierr); 3099 3100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 #include "src/mat/utils/freespace.h" 3105 #include "petscbt.h" 3106 static PetscEvent logkey_seqstompinum = 0; 3107 #undef __FUNCT__ 3108 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3109 /*@C 3110 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3111 matrices from each processor 3112 3113 Collective on MPI_Comm 3114 3115 Input Parameters: 3116 + comm - the communicators the parallel matrix will live on 3117 . seqmat - the input sequential matrices 3118 . m - number of local rows (or PETSC_DECIDE) 3119 . n - number of local columns (or PETSC_DECIDE) 3120 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3121 3122 Output Parameter: 3123 . mpimat - the parallel matrix generated 3124 3125 Level: advanced 3126 3127 Notes: 3128 The dimensions of the sequential matrix in each processor MUST be the same. 3129 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3130 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3131 @*/ 3132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3133 { 3134 PetscErrorCode ierr; 3135 MPI_Comm comm=mpimat->comm; 3136 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3137 PetscMPIInt size,rank,taga,*len_s; 3138 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3139 PetscInt proc,m; 3140 PetscInt **buf_ri,**buf_rj; 3141 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3142 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3143 MPI_Request *s_waits,*r_waits; 3144 MPI_Status *status; 3145 MatScalar *aa=a->a,**abuf_r,*ba_i; 3146 Mat_Merge_SeqsToMPI *merge; 3147 PetscObjectContainer container; 3148 3149 PetscFunctionBegin; 3150 if (!logkey_seqstompinum) { 3151 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3152 } 3153 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3154 3155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3157 3158 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3159 if (container) { 3160 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3161 } 3162 bi = merge->bi; 3163 bj = merge->bj; 3164 buf_ri = merge->buf_ri; 3165 buf_rj = merge->buf_rj; 3166 3167 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3168 owners = merge->rowmap.range; 3169 len_s = merge->len_s; 3170 3171 /* send and recv matrix values */ 3172 /*-----------------------------*/ 3173 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3174 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3175 3176 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3177 for (proc=0,k=0; proc<size; proc++){ 3178 if (!len_s[proc]) continue; 3179 i = owners[proc]; 3180 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3181 k++; 3182 } 3183 3184 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3185 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3186 ierr = PetscFree(status);CHKERRQ(ierr); 3187 3188 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3189 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3190 3191 /* insert mat values of mpimat */ 3192 /*----------------------------*/ 3193 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3194 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3195 nextrow = buf_ri_k + merge->nrecv; 3196 nextai = nextrow + merge->nrecv; 3197 3198 for (k=0; k<merge->nrecv; k++){ 3199 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3200 nrows = *(buf_ri_k[k]); 3201 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3202 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3203 } 3204 3205 /* set values of ba */ 3206 m = merge->rowmap.n; 3207 for (i=0; i<m; i++) { 3208 arow = owners[rank] + i; 3209 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3210 bnzi = bi[i+1] - bi[i]; 3211 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3212 3213 /* add local non-zero vals of this proc's seqmat into ba */ 3214 anzi = ai[arow+1] - ai[arow]; 3215 aj = a->j + ai[arow]; 3216 aa = a->a + ai[arow]; 3217 nextaj = 0; 3218 for (j=0; nextaj<anzi; j++){ 3219 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3220 ba_i[j] += aa[nextaj++]; 3221 } 3222 } 3223 3224 /* add received vals into ba */ 3225 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3226 /* i-th row */ 3227 if (i == *nextrow[k]) { 3228 anzi = *(nextai[k]+1) - *nextai[k]; 3229 aj = buf_rj[k] + *(nextai[k]); 3230 aa = abuf_r[k] + *(nextai[k]); 3231 nextaj = 0; 3232 for (j=0; nextaj<anzi; j++){ 3233 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3234 ba_i[j] += aa[nextaj++]; 3235 } 3236 } 3237 nextrow[k]++; nextai[k]++; 3238 } 3239 } 3240 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3241 } 3242 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 3245 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3246 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3247 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3248 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3249 PetscFunctionReturn(0); 3250 } 3251 3252 static PetscEvent logkey_seqstompisym = 0; 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3255 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3256 { 3257 PetscErrorCode ierr; 3258 Mat B_mpi; 3259 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3260 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3261 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3262 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3263 PetscInt len,proc,*dnz,*onz; 3264 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3265 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3266 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3267 MPI_Status *status; 3268 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3269 PetscBT lnkbt; 3270 Mat_Merge_SeqsToMPI *merge; 3271 PetscObjectContainer container; 3272 3273 PetscFunctionBegin; 3274 if (!logkey_seqstompisym) { 3275 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3276 } 3277 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3278 3279 /* make sure it is a PETSc comm */ 3280 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3283 3284 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3285 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3286 3287 /* determine row ownership */ 3288 /*---------------------------------------------------------*/ 3289 merge->rowmap.n = m; 3290 merge->rowmap.N = M; 3291 merge->rowmap.bs = 1; 3292 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3293 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3294 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3295 3296 m = merge->rowmap.n; 3297 M = merge->rowmap.N; 3298 owners = merge->rowmap.range; 3299 3300 /* determine the number of messages to send, their lengths */ 3301 /*---------------------------------------------------------*/ 3302 len_s = merge->len_s; 3303 3304 len = 0; /* length of buf_si[] */ 3305 merge->nsend = 0; 3306 for (proc=0; proc<size; proc++){ 3307 len_si[proc] = 0; 3308 if (proc == rank){ 3309 len_s[proc] = 0; 3310 } else { 3311 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3312 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3313 } 3314 if (len_s[proc]) { 3315 merge->nsend++; 3316 nrows = 0; 3317 for (i=owners[proc]; i<owners[proc+1]; i++){ 3318 if (ai[i+1] > ai[i]) nrows++; 3319 } 3320 len_si[proc] = 2*(nrows+1); 3321 len += len_si[proc]; 3322 } 3323 } 3324 3325 /* determine the number and length of messages to receive for ij-structure */ 3326 /*-------------------------------------------------------------------------*/ 3327 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3328 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3329 3330 /* post the Irecv of j-structure */ 3331 /*-------------------------------*/ 3332 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3333 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3334 3335 /* post the Isend of j-structure */ 3336 /*--------------------------------*/ 3337 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3338 sj_waits = si_waits + merge->nsend; 3339 3340 for (proc=0, k=0; proc<size; proc++){ 3341 if (!len_s[proc]) continue; 3342 i = owners[proc]; 3343 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3344 k++; 3345 } 3346 3347 /* receives and sends of j-structure are complete */ 3348 /*------------------------------------------------*/ 3349 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3350 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3351 3352 /* send and recv i-structure */ 3353 /*---------------------------*/ 3354 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3355 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3356 3357 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3358 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3359 for (proc=0,k=0; proc<size; proc++){ 3360 if (!len_s[proc]) continue; 3361 /* form outgoing message for i-structure: 3362 buf_si[0]: nrows to be sent 3363 [1:nrows]: row index (global) 3364 [nrows+1:2*nrows+1]: i-structure index 3365 */ 3366 /*-------------------------------------------*/ 3367 nrows = len_si[proc]/2 - 1; 3368 buf_si_i = buf_si + nrows+1; 3369 buf_si[0] = nrows; 3370 buf_si_i[0] = 0; 3371 nrows = 0; 3372 for (i=owners[proc]; i<owners[proc+1]; i++){ 3373 anzi = ai[i+1] - ai[i]; 3374 if (anzi) { 3375 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3376 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3377 nrows++; 3378 } 3379 } 3380 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3381 k++; 3382 buf_si += len_si[proc]; 3383 } 3384 3385 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3386 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3387 3388 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3389 for (i=0; i<merge->nrecv; i++){ 3390 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3391 } 3392 3393 ierr = PetscFree(len_si);CHKERRQ(ierr); 3394 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3395 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3396 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3397 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3398 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3399 ierr = PetscFree(status);CHKERRQ(ierr); 3400 3401 /* compute a local seq matrix in each processor */ 3402 /*----------------------------------------------*/ 3403 /* allocate bi array and free space for accumulating nonzero column info */ 3404 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3405 bi[0] = 0; 3406 3407 /* create and initialize a linked list */ 3408 nlnk = N+1; 3409 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3410 3411 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3412 len = 0; 3413 len = ai[owners[rank+1]] - ai[owners[rank]]; 3414 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3415 current_space = free_space; 3416 3417 /* determine symbolic info for each local row */ 3418 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3419 nextrow = buf_ri_k + merge->nrecv; 3420 nextai = nextrow + merge->nrecv; 3421 for (k=0; k<merge->nrecv; k++){ 3422 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3423 nrows = *buf_ri_k[k]; 3424 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3425 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3426 } 3427 3428 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3429 len = 0; 3430 for (i=0;i<m;i++) { 3431 bnzi = 0; 3432 /* add local non-zero cols of this proc's seqmat into lnk */ 3433 arow = owners[rank] + i; 3434 anzi = ai[arow+1] - ai[arow]; 3435 aj = a->j + ai[arow]; 3436 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3437 bnzi += nlnk; 3438 /* add received col data into lnk */ 3439 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3440 if (i == *nextrow[k]) { /* i-th row */ 3441 anzi = *(nextai[k]+1) - *nextai[k]; 3442 aj = buf_rj[k] + *nextai[k]; 3443 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3444 bnzi += nlnk; 3445 nextrow[k]++; nextai[k]++; 3446 } 3447 } 3448 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3449 3450 /* if free space is not available, make more free space */ 3451 if (current_space->local_remaining<bnzi) { 3452 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3453 nspacedouble++; 3454 } 3455 /* copy data into free space, then initialize lnk */ 3456 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3457 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3458 3459 current_space->array += bnzi; 3460 current_space->local_used += bnzi; 3461 current_space->local_remaining -= bnzi; 3462 3463 bi[i+1] = bi[i] + bnzi; 3464 } 3465 3466 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3467 3468 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3469 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3470 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3471 3472 /* create symbolic parallel matrix B_mpi */ 3473 /*---------------------------------------*/ 3474 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3475 if (n==PETSC_DECIDE) { 3476 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3477 } else { 3478 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3479 } 3480 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3481 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3482 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3483 3484 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3485 B_mpi->assembled = PETSC_FALSE; 3486 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3487 merge->bi = bi; 3488 merge->bj = bj; 3489 merge->buf_ri = buf_ri; 3490 merge->buf_rj = buf_rj; 3491 merge->coi = PETSC_NULL; 3492 merge->coj = PETSC_NULL; 3493 merge->owners_co = PETSC_NULL; 3494 3495 /* attach the supporting struct to B_mpi for reuse */ 3496 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3497 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3498 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3499 *mpimat = B_mpi; 3500 3501 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3502 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 static PetscEvent logkey_seqstompi = 0; 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatMerge_SeqsToMPI" 3509 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3510 { 3511 PetscErrorCode ierr; 3512 3513 PetscFunctionBegin; 3514 if (!logkey_seqstompi) { 3515 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3516 } 3517 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3518 if (scall == MAT_INITIAL_MATRIX){ 3519 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3520 } 3521 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3522 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3523 PetscFunctionReturn(0); 3524 } 3525 static PetscEvent logkey_getlocalmat = 0; 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatGetLocalMat" 3528 /*@C 3529 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3530 3531 Not Collective 3532 3533 Input Parameters: 3534 + A - the matrix 3535 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3536 3537 Output Parameter: 3538 . A_loc - the local sequential matrix generated 3539 3540 Level: developer 3541 3542 @*/ 3543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3544 { 3545 PetscErrorCode ierr; 3546 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3547 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3548 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3549 PetscScalar *aa=a->a,*ba=b->a,*ca; 3550 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3551 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3552 3553 PetscFunctionBegin; 3554 if (!logkey_getlocalmat) { 3555 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3556 } 3557 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3558 if (scall == MAT_INITIAL_MATRIX){ 3559 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3560 ci[0] = 0; 3561 for (i=0; i<am; i++){ 3562 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3563 } 3564 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3565 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3566 k = 0; 3567 for (i=0; i<am; i++) { 3568 ncols_o = bi[i+1] - bi[i]; 3569 ncols_d = ai[i+1] - ai[i]; 3570 /* off-diagonal portion of A */ 3571 for (jo=0; jo<ncols_o; jo++) { 3572 col = cmap[*bj]; 3573 if (col >= cstart) break; 3574 cj[k] = col; bj++; 3575 ca[k++] = *ba++; 3576 } 3577 /* diagonal portion of A */ 3578 for (j=0; j<ncols_d; j++) { 3579 cj[k] = cstart + *aj++; 3580 ca[k++] = *aa++; 3581 } 3582 /* off-diagonal portion of A */ 3583 for (j=jo; j<ncols_o; j++) { 3584 cj[k] = cmap[*bj++]; 3585 ca[k++] = *ba++; 3586 } 3587 } 3588 /* put together the new matrix */ 3589 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3590 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3591 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3592 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3593 mat->free_a = PETSC_TRUE; 3594 mat->free_ij = PETSC_TRUE; 3595 mat->nonew = 0; 3596 } else if (scall == MAT_REUSE_MATRIX){ 3597 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3598 ci = mat->i; cj = mat->j; ca = mat->a; 3599 for (i=0; i<am; i++) { 3600 /* off-diagonal portion of A */ 3601 ncols_o = bi[i+1] - bi[i]; 3602 for (jo=0; jo<ncols_o; jo++) { 3603 col = cmap[*bj]; 3604 if (col >= cstart) break; 3605 *ca++ = *ba++; bj++; 3606 } 3607 /* diagonal portion of A */ 3608 ncols_d = ai[i+1] - ai[i]; 3609 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3610 /* off-diagonal portion of A */ 3611 for (j=jo; j<ncols_o; j++) { 3612 *ca++ = *ba++; bj++; 3613 } 3614 } 3615 } else { 3616 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3617 } 3618 3619 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3620 PetscFunctionReturn(0); 3621 } 3622 3623 static PetscEvent logkey_getlocalmatcondensed = 0; 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatGetLocalMatCondensed" 3626 /*@C 3627 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3628 3629 Not Collective 3630 3631 Input Parameters: 3632 + A - the matrix 3633 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3634 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3635 3636 Output Parameter: 3637 . A_loc - the local sequential matrix generated 3638 3639 Level: developer 3640 3641 @*/ 3642 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3643 { 3644 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3645 PetscErrorCode ierr; 3646 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3647 IS isrowa,iscola; 3648 Mat *aloc; 3649 3650 PetscFunctionBegin; 3651 if (!logkey_getlocalmatcondensed) { 3652 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3653 } 3654 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3655 if (!row){ 3656 start = A->rmap.rstart; end = A->rmap.rend; 3657 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3658 } else { 3659 isrowa = *row; 3660 } 3661 if (!col){ 3662 start = A->cmap.rstart; 3663 cmap = a->garray; 3664 nzA = a->A->cmap.n; 3665 nzB = a->B->cmap.n; 3666 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3667 ncols = 0; 3668 for (i=0; i<nzB; i++) { 3669 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3670 else break; 3671 } 3672 imark = i; 3673 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3674 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3675 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3676 ierr = PetscFree(idx);CHKERRQ(ierr); 3677 } else { 3678 iscola = *col; 3679 } 3680 if (scall != MAT_INITIAL_MATRIX){ 3681 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3682 aloc[0] = *A_loc; 3683 } 3684 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3685 *A_loc = aloc[0]; 3686 ierr = PetscFree(aloc);CHKERRQ(ierr); 3687 if (!row){ 3688 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3689 } 3690 if (!col){ 3691 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3692 } 3693 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3694 PetscFunctionReturn(0); 3695 } 3696 3697 static PetscEvent logkey_GetBrowsOfAcols = 0; 3698 #undef __FUNCT__ 3699 #define __FUNCT__ "MatGetBrowsOfAcols" 3700 /*@C 3701 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3702 3703 Collective on Mat 3704 3705 Input Parameters: 3706 + A,B - the matrices in mpiaij format 3707 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3708 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3709 3710 Output Parameter: 3711 + rowb, colb - index sets of rows and columns of B to extract 3712 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3713 - B_seq - the sequential matrix generated 3714 3715 Level: developer 3716 3717 @*/ 3718 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3719 { 3720 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3721 PetscErrorCode ierr; 3722 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3723 IS isrowb,iscolb; 3724 Mat *bseq; 3725 3726 PetscFunctionBegin; 3727 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3728 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3729 } 3730 if (!logkey_GetBrowsOfAcols) { 3731 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3732 } 3733 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3734 3735 if (scall == MAT_INITIAL_MATRIX){ 3736 start = A->cmap.rstart; 3737 cmap = a->garray; 3738 nzA = a->A->cmap.n; 3739 nzB = a->B->cmap.n; 3740 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3741 ncols = 0; 3742 for (i=0; i<nzB; i++) { /* row < local row index */ 3743 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3744 else break; 3745 } 3746 imark = i; 3747 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3748 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3749 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3750 ierr = PetscFree(idx);CHKERRQ(ierr); 3751 *brstart = imark; 3752 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3753 } else { 3754 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3755 isrowb = *rowb; iscolb = *colb; 3756 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3757 bseq[0] = *B_seq; 3758 } 3759 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3760 *B_seq = bseq[0]; 3761 ierr = PetscFree(bseq);CHKERRQ(ierr); 3762 if (!rowb){ 3763 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3764 } else { 3765 *rowb = isrowb; 3766 } 3767 if (!colb){ 3768 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3769 } else { 3770 *colb = iscolb; 3771 } 3772 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3773 PetscFunctionReturn(0); 3774 } 3775 3776 static PetscEvent logkey_GetBrowsOfAocols = 0; 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatGetBrowsOfAoCols" 3779 /*@C 3780 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3781 of the OFF-DIAGONAL portion of local A 3782 3783 Collective on Mat 3784 3785 Input Parameters: 3786 + A,B - the matrices in mpiaij format 3787 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3788 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3789 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3790 3791 Output Parameter: 3792 + B_oth - the sequential matrix generated 3793 3794 Level: developer 3795 3796 @*/ 3797 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3798 { 3799 VecScatter_MPI_General *gen_to,*gen_from; 3800 PetscErrorCode ierr; 3801 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3802 Mat_SeqAIJ *b_oth; 3803 VecScatter ctx=a->Mvctx; 3804 MPI_Comm comm=ctx->comm; 3805 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3806 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3807 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3808 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3809 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 3810 MPI_Status *sstatus,rstatus; 3811 PetscInt *cols; 3812 PetscScalar *vals; 3813 PetscMPIInt j; 3814 3815 PetscFunctionBegin; 3816 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3817 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3818 } 3819 if (!logkey_GetBrowsOfAocols) { 3820 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3821 } 3822 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3824 3825 gen_to = (VecScatter_MPI_General*)ctx->todata; 3826 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3827 rvalues = gen_from->values; /* holds the length of sending row */ 3828 svalues = gen_to->values; /* holds the length of receiving row */ 3829 nrecvs = gen_from->n; 3830 nsends = gen_to->n; 3831 3832 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3833 srow = gen_to->indices; /* local row index to be sent */ 3834 rstarts = gen_from->starts; 3835 sstarts = gen_to->starts; 3836 rprocs = gen_from->procs; 3837 sprocs = gen_to->procs; 3838 sstatus = gen_to->sstatus; 3839 3840 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3841 if (scall == MAT_INITIAL_MATRIX){ 3842 /* i-array */ 3843 /*---------*/ 3844 /* post receives */ 3845 for (i=0; i<nrecvs; i++){ 3846 rowlen = (PetscInt*)rvalues + rstarts[i]; 3847 nrows = rstarts[i+1]-rstarts[i]; 3848 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3849 } 3850 3851 /* pack the outgoing message */ 3852 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3853 rstartsj = sstartsj + nsends +1; 3854 sstartsj[0] = 0; rstartsj[0] = 0; 3855 len = 0; /* total length of j or a array to be sent */ 3856 k = 0; 3857 for (i=0; i<nsends; i++){ 3858 rowlen = (PetscInt*)svalues + sstarts[i]; 3859 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3860 for (j=0; j<nrows; j++) { 3861 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3862 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3863 len += rowlen[j]; 3864 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3865 k++; 3866 } 3867 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3868 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3869 } 3870 /* recvs and sends of i-array are completed */ 3871 i = nrecvs; 3872 while (i--) { 3873 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3874 } 3875 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3876 /* allocate buffers for sending j and a arrays */ 3877 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3878 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3879 3880 /* create i-array of B_oth */ 3881 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3882 b_othi[0] = 0; 3883 len = 0; /* total length of j or a array to be received */ 3884 k = 0; 3885 for (i=0; i<nrecvs; i++){ 3886 rowlen = (PetscInt*)rvalues + rstarts[i]; 3887 nrows = rstarts[i+1]-rstarts[i]; 3888 for (j=0; j<nrows; j++) { 3889 b_othi[k+1] = b_othi[k] + rowlen[j]; 3890 len += rowlen[j]; k++; 3891 } 3892 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3893 } 3894 3895 /* allocate space for j and a arrrays of B_oth */ 3896 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3897 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3898 3899 /* j-array */ 3900 /*---------*/ 3901 /* post receives of j-array */ 3902 for (i=0; i<nrecvs; i++){ 3903 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3904 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3905 } 3906 k = 0; 3907 for (i=0; i<nsends; i++){ 3908 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3909 bufJ = bufj+sstartsj[i]; 3910 for (j=0; j<nrows; j++) { 3911 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3912 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3913 for (l=0; l<ncols; l++){ 3914 *bufJ++ = cols[l]; 3915 } 3916 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3917 } 3918 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3919 } 3920 3921 /* recvs and sends of j-array are completed */ 3922 i = nrecvs; 3923 while (i--) { 3924 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3925 } 3926 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3927 } else if (scall == MAT_REUSE_MATRIX){ 3928 sstartsj = *startsj; 3929 rstartsj = sstartsj + nsends +1; 3930 bufa = *bufa_ptr; 3931 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3932 b_otha = b_oth->a; 3933 } else { 3934 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3935 } 3936 3937 /* a-array */ 3938 /*---------*/ 3939 /* post receives of a-array */ 3940 for (i=0; i<nrecvs; i++){ 3941 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3942 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3943 } 3944 k = 0; 3945 for (i=0; i<nsends; i++){ 3946 nrows = sstarts[i+1]-sstarts[i]; 3947 bufA = bufa+sstartsj[i]; 3948 for (j=0; j<nrows; j++) { 3949 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3950 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3951 for (l=0; l<ncols; l++){ 3952 *bufA++ = vals[l]; 3953 } 3954 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3955 3956 } 3957 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3958 } 3959 /* recvs and sends of a-array are completed */ 3960 i = nrecvs; 3961 while (i--) { 3962 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3963 } 3964 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3965 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3966 3967 if (scall == MAT_INITIAL_MATRIX){ 3968 /* put together the new matrix */ 3969 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3970 3971 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3972 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3973 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3974 b_oth->free_a = PETSC_TRUE; 3975 b_oth->free_ij = PETSC_TRUE; 3976 b_oth->nonew = 0; 3977 3978 ierr = PetscFree(bufj);CHKERRQ(ierr); 3979 if (!startsj || !bufa_ptr){ 3980 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3981 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3982 } else { 3983 *startsj = sstartsj; 3984 *bufa_ptr = bufa; 3985 } 3986 } 3987 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3988 3989 PetscFunctionReturn(0); 3990 } 3991 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "MatGetCommunicationStructs" 3994 /*@C 3995 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3996 3997 Not Collective 3998 3999 Input Parameters: 4000 . A - The matrix in mpiaij format 4001 4002 Output Parameter: 4003 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4004 . colmap - A map from global column index to local index into lvec 4005 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4006 4007 Level: developer 4008 4009 @*/ 4010 #if defined (PETSC_USE_CTABLE) 4011 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4012 #else 4013 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4014 #endif 4015 { 4016 Mat_MPIAIJ *a; 4017 4018 PetscFunctionBegin; 4019 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4020 PetscValidPointer(lvec, 2) 4021 PetscValidPointer(colmap, 3) 4022 PetscValidPointer(multScatter, 4) 4023 a = (Mat_MPIAIJ *) A->data; 4024 if (lvec) *lvec = a->lvec; 4025 if (colmap) *colmap = a->colmap; 4026 if (multScatter) *multScatter = a->Mvctx; 4027 PetscFunctionReturn(0); 4028 } 4029 4030 EXTERN_C_BEGIN 4031 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*); 4032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*); 4033 EXTERN_C_END 4034 4035 /*MC 4036 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4037 4038 Options Database Keys: 4039 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4040 4041 Level: beginner 4042 4043 .seealso: MatCreateMPIAIJ 4044 M*/ 4045 4046 EXTERN_C_BEGIN 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatCreate_MPIAIJ" 4049 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 4050 { 4051 Mat_MPIAIJ *b; 4052 PetscErrorCode ierr; 4053 PetscMPIInt size; 4054 4055 PetscFunctionBegin; 4056 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 4057 4058 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 4059 B->data = (void*)b; 4060 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4061 B->factor = 0; 4062 B->rmap.bs = 1; 4063 B->assembled = PETSC_FALSE; 4064 B->mapping = 0; 4065 4066 B->insertmode = NOT_SET_VALUES; 4067 b->size = size; 4068 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 4069 4070 /* build cache for off array entries formed */ 4071 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 4072 b->donotstash = PETSC_FALSE; 4073 b->colmap = 0; 4074 b->garray = 0; 4075 b->roworiented = PETSC_TRUE; 4076 4077 /* stuff used for matrix vector multiply */ 4078 b->lvec = PETSC_NULL; 4079 b->Mvctx = PETSC_NULL; 4080 4081 /* stuff for MatGetRow() */ 4082 b->rowindices = 0; 4083 b->rowvalues = 0; 4084 b->getrowactive = PETSC_FALSE; 4085 4086 4087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 4088 "MatStoreValues_MPIAIJ", 4089 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 4091 "MatRetrieveValues_MPIAIJ", 4092 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 4094 "MatGetDiagonalBlock_MPIAIJ", 4095 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 4096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 4097 "MatIsTranspose_MPIAIJ", 4098 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 4100 "MatMPIAIJSetPreallocation_MPIAIJ", 4101 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 4103 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4104 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4106 "MatDiagonalScaleLocal_MPIAIJ", 4107 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 4109 "MatConvert_MPIAIJ_MPICSRPERM", 4110 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 4111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 4112 "MatConvert_MPIAIJ_MPICRL", 4113 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 4114 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 4115 PetscFunctionReturn(0); 4116 } 4117 EXTERN_C_END 4118 4119 /* 4120 Special version for direct calls from Fortran 4121 */ 4122 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4123 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4125 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4126 #endif 4127 4128 /* Change these macros so can be used in void function */ 4129 #undef CHKERRQ 4130 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4131 #undef SETERRQ2 4132 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4133 #undef SETERRQ 4134 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4135 4136 EXTERN_C_BEGIN 4137 #undef __FUNCT__ 4138 #define __FUNCT__ "matsetvaluesmpiaij_" 4139 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 4140 { 4141 Mat mat = *mmat; 4142 PetscInt m = *mm, n = *mn; 4143 InsertMode addv = *maddv; 4144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4145 PetscScalar value; 4146 PetscErrorCode ierr; 4147 4148 MatPreallocated(mat); 4149 if (mat->insertmode == NOT_SET_VALUES) { 4150 mat->insertmode = addv; 4151 } 4152 #if defined(PETSC_USE_DEBUG) 4153 else if (mat->insertmode != addv) { 4154 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4155 } 4156 #endif 4157 { 4158 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4159 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4160 PetscTruth roworiented = aij->roworiented; 4161 4162 /* Some Variables required in the macro */ 4163 Mat A = aij->A; 4164 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4165 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4166 PetscScalar *aa = a->a; 4167 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4168 Mat B = aij->B; 4169 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4170 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4171 PetscScalar *ba = b->a; 4172 4173 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4174 PetscInt nonew = a->nonew; 4175 PetscScalar *ap1,*ap2; 4176 4177 PetscFunctionBegin; 4178 for (i=0; i<m; i++) { 4179 if (im[i] < 0) continue; 4180 #if defined(PETSC_USE_DEBUG) 4181 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4182 #endif 4183 if (im[i] >= rstart && im[i] < rend) { 4184 row = im[i] - rstart; 4185 lastcol1 = -1; 4186 rp1 = aj + ai[row]; 4187 ap1 = aa + ai[row]; 4188 rmax1 = aimax[row]; 4189 nrow1 = ailen[row]; 4190 low1 = 0; 4191 high1 = nrow1; 4192 lastcol2 = -1; 4193 rp2 = bj + bi[row]; 4194 ap2 = ba + bi[row]; 4195 rmax2 = bimax[row]; 4196 nrow2 = bilen[row]; 4197 low2 = 0; 4198 high2 = nrow2; 4199 4200 for (j=0; j<n; j++) { 4201 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4202 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4203 if (in[j] >= cstart && in[j] < cend){ 4204 col = in[j] - cstart; 4205 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4206 } else if (in[j] < 0) continue; 4207 #if defined(PETSC_USE_DEBUG) 4208 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4209 #endif 4210 else { 4211 if (mat->was_assembled) { 4212 if (!aij->colmap) { 4213 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4214 } 4215 #if defined (PETSC_USE_CTABLE) 4216 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4217 col--; 4218 #else 4219 col = aij->colmap[in[j]] - 1; 4220 #endif 4221 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4222 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4223 col = in[j]; 4224 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4225 B = aij->B; 4226 b = (Mat_SeqAIJ*)B->data; 4227 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4228 rp2 = bj + bi[row]; 4229 ap2 = ba + bi[row]; 4230 rmax2 = bimax[row]; 4231 nrow2 = bilen[row]; 4232 low2 = 0; 4233 high2 = nrow2; 4234 bm = aij->B->rmap.n; 4235 ba = b->a; 4236 } 4237 } else col = in[j]; 4238 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4239 } 4240 } 4241 } else { 4242 if (!aij->donotstash) { 4243 if (roworiented) { 4244 if (ignorezeroentries && v[i*n] == 0.0) continue; 4245 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4246 } else { 4247 if (ignorezeroentries && v[i] == 0.0) continue; 4248 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4249 } 4250 } 4251 } 4252 }} 4253 PetscFunctionReturnVoid(); 4254 } 4255 EXTERN_C_END 4256